phpcms模拟登陆执行已经抓取到的网址进行采集内容

来源:互联网 发布:安卓秒变iphone软件 编辑:程序博客网 时间:2024/05/19 19:13
<?php//模拟登陆获取pc_hash执行采集已有的网址function arr(){    //获取所有未被采集的数量和nodeid    $mysqli = new mysqli('localhost', 'root', '12345', 'phpcmsv9') or die('数据库链接失败');    $sql = 'select nodeid from v9_collection_content where status=0';    $query = $mysqli->query($sql);    while ($rs = mysqli_fetch_row($query)) {        $result[] = $rs[0];    }    $mysqli->close();    return array_count_values($result);}$arr2=arr();function login_post($url, $cookie, $post) {$curl = curl_init();//初始化curl模块curl_setopt($curl, CURLOPT_URL, $url);//登录提交的地址curl_setopt($curl, CURLOPT_HEADER, 0);//是否显示头信息curl_setopt($curl, CURLOPT_RETURNTRANSFER, 0);//是否自动显示返回的信息curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中curl_setopt($curl, CURLOPT_POST, 1);//post方式提交curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));//要提交的信息curl_exec($curl);//执行cURLcurl_close($curl);//关闭cURL资源,并且释放系统资源}function get_content($url, $cookie) {    $ch = curl_init();    curl_setopt($ch, CURLOPT_URL, $url);    curl_setopt($ch, CURLOPT_HEADER, 0);    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); //读取cookie    $rs = curl_exec($ch); //执行cURL抓取页面内容    curl_close($ch);    return $rs;}function star($url, $cookie) {    $ch = curl_init();    curl_setopt($ch, CURLOPT_URL, $url);    curl_setopt($ch, CURLOPT_HEADER, 0);    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); //读取cookie    curl_exec($ch); //执行cURL    curl_close($ch);}//设置post的数据$post = array (    'username' => 'phpcms',    'password' => 'phpcms',);//登录地址$url = "http://localhost/index.php?m=admin&c=index&a=login&dosubmit=1";//设置cookie保存路径$cookie = dirname(__FILE__) . '/cookie.txt';//登录后要获取信息的地址$url2 = 'http://localhost/index.php?m=admin';//模拟登录login_post($url, $cookie, $post);//获取登录页的信息$content = get_content($url2, $cookie);$preg = '/var pc_hash = \'(.*)\'/i';preg_match_all($preg, $content, $arr);$str = $arr[1][0];//获取pc_hash//因为一次执行两条 总共执行的次数是round($num/2)foreach($arr2 as $key=>$value){    echo $value.'<br>';    $num=round($value/2);    for($i=0;$i<$num;$i++) {        star('http://localhost/index.php?m=collection&c=node&a=col_content&page=8&nodeid='.$key.'&pc_hash=' . $str, $cookie);        echo $key.'<br>';   }}//删除cookie文件@ unlink($cookie);
0 0