PHP模拟登陆

来源:互联网 发布:视唱练耳软件 编辑:程序博客网 时间:2024/05/17 01:00

PHP模拟登陆抓取,不使用cookieJar文件保存cookie,第一次尝试写面向对象Mark,自己留着看。其中的几个难点,password的加密方法,cookie的接连获取,巧用substr()和strpos取值。

<?phpset_time_limit(120);date_default_timezone_set('Asia/Shanghai');class yingji{    private $cookie="";    private $username="email";    private $password="password";    private $url ="https://host/login";    private $loginaction="https://host/loginAction";    private $getcloneEidurl="https://host/monitor/pad/addAttention";    private $targeturl="https://host/monitor/query-micro-blogs-count";    private $request_headers=array (              'Host' => 'host',              'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',              'Accept' => '*/*',              'Accept-Language' => 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',              'Accept-Encoding' => 'gzip, deflate',               'X-Requested-With'=>'XMLHttpRequest',            'Connection' => 'keep-alive'           );     private $cloneEid;    public $keyword;    public $fromdate;    public $todate;    public function __construct($keyword,$fromdate,$todate){        $this->keyword=$keyword;        $this->fromdate=$fromdate;        $this->todate=$todate;    }    public function geturl(){        if($this->cookie==""|$this->cookie==NULL){            $this::dologin();        }        $this::getcloneEid();        $result =$this::again();        $times=1;        while($result=="0"){            $result=$this::again();            $times++;            if($times==3)                break;        }        return $result;    }    private function again(){        $post_data="viewType=day&startTime=".$this->fromdate."&endTime=".$this->todate."&dt=&dtt=day&st=MICRO_BLOG_ALL&fq=%7B%22blogType%22%3A0%2C%22blogStatus%22%3A0%2C%22content%22%3A%22%22%2C%22bloggerVipType%22%3A-1%2C%22minFans%22%3A%220%22%2C%22maxFans%22%3A%22-1%22%2C%22bloggerType%22%3A0%2C%22platformType%22%3A%22MICRO_BLOG_ALL%22%7D&q=".urlencode($this->keyword)."&cloneEid=".$this->cloneEid;            $result=$this::docurl($this->targeturl,TRUE,$post_data,$this->cookie);            // var_dump($this->cookie);            $status=substr($result, 9,3);            if($status!="200"){                return "0";            }            return substr($result, strpos($result, "count")+8,-2);    }    private function getcloneEid(){//获取最终页面post的其中一个值        //$this::dologin();        $post="at=EVENT&st=MICRO_BLOG_ALL&name=".urlencode($this->keyword)."&keywords=".urlencode($this->keyword);        $output=$this::docurl($this->getcloneEidurl,TRUE,$post,$this->cookie);        $this->cloneEid=substr($output, strpos($output, "id")+5,36);    }    private function dologin(){//积累cookie        $this::prelogin();        $post="client_screen=1440+x+900&langCode=&username=15221197583%40139.com&password=7c2605c596c3002057999999776af6d7";        $result=$this::docurl($this->loginaction,TRUE,$post,$this->cookie);        list($header, $body) = explode("\r\n\r\n", $result);         // 解析COOKIE         preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches);         $cookie=substr($header, strrpos($header, "Cookie:")+8,58);        $this->cookie=substr($this->cookie, 0,40);        $this->cookie .= $cookie;    }    private function prelogin(){//积累cookie        $url=$this->url;        $result=$this::docurl($url,FALSE,"",$this->cookie);        list($header, $body) = explode("\r\n\r\n", $result);         // 解析COOKIE         // preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches);         $cookie_route=substr($header, strrpos($header, "route"),38);        $cookie_JSESS=substr($header, strrpos($header, "JSESSIONID"),51);        // 后面用CURL提交的时候可以直接使用         // curl_setopt($ch, CURLOPT_COOKIE, $cookie);         $this->cookie =$cookie_route."; ".$cookie_JSESS;    }    private function docurl($url,$is_post=FALSE,$postdata="",$cookie=""){        $ch=curl_init();        curl_setopt($ch, CURLOPT_URL, $url);        curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);        curl_setopt($ch, CURLOPT_HEADER, 1);        curl_setopt($ch, CURLOPT_HTTPHEADER, $this->request_headers);        if($is_post){            curl_setopt($ch, CURLOPT_POST, 1);            curl_setopt($ch, CURLOPT_POSTFIELDS, $postdata);        }        if($cookie!=""){            curl_setopt($ch, CURLOPT_COOKIE, $cookie);        }        $output=curl_exec($ch);        curl_close($ch);        return $output;    }}//这里想链接数据库,在外操作数据库,以便添加或删除爬虫任务$conn=mysqli_connect('localhost','','');if(!$conn){    die("连接数据库失败".mysqli_error());}mysqli_select_db($conn,"");mysqli_query($conn,"set names utf8");$query="SELECT * FROM keywords";$result=mysqli_query($conn,$query);$daycount1="begin";while ($row=mysqli_fetch_object($result)) {    echo "正在抓取".$row->words."...<br/>";    /*这里是没有之前没有进行过爬虫的程序*/    if($row->tempdate=='1'){        $todate=(strtotime(date("Y-m-d",time()))-1);        $fromdate =$row->Fromdate;        if(strtotime(date("Y-m-d",time()))!=strtotime($fromdate)||strtotime(date("Y-m-d",time()))>strtotime($fromdate)){            $daycount = $row->daycount;            echo "fromdate:".date("Y-m-d H:i:s",($todate+1-86400))." todate ".date("Y-m-d H:i:s",($todate));            $a=new yingji($row->words,(1000*($todate+1-86400)),(1000*$todate));            $count= $a->geturl();            $daycount.=",".$count;            unset($a);            $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount."\" where words=\"".$row->words."\"";            var_dump($update);            $resul=mysqli_query($conn,$update);        }else{            //do nothing        }    }else{        $fromdate=strtotime($row->Fromdate);        $times=floor((time()-strtotime($row->Fromdate))/86400);        for ($i=1; $i <=$times ; $i++) {             $a=new yingji($row->words,(1000*$fromdate),(1000*($fromdate+86400-1)));            $count= $a->geturl();            echo date("Y-m-d H:i:s",$fromdate)."  to ".date("Y-m-d H:i:s",($fromdate+86400-1))."<br/>";            $fromdate+=86400;            $daycount1.=",".$count;            unset($a);            ob_flush();            flush();            sleep(1);        }        $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount1."\" where words=\"".$row->words."\"";        var_dump($update);        $resul=mysqli_query($conn,$update);        if($resul){            $daycount1="begin";        }    }    /*抓取之前的数据到此结束*/    }mysqli_close($conn);

Mark给自己看

0 0
原创粉丝点击