PHP模拟登陆
来源:互联网 发布:视唱练耳软件 编辑:程序博客网 时间:2024/05/17 01:00
PHP模拟登陆抓取,不使用cookieJar文件保存cookie,第一次尝试写面向对象Mark,自己留着看。其中的几个难点,password的加密方法,cookie的接连获取,巧用substr()和strpos取值。
<?phpset_time_limit(120);date_default_timezone_set('Asia/Shanghai');class yingji{ private $cookie=""; private $username="email"; private $password="password"; private $url ="https://host/login"; private $loginaction="https://host/loginAction"; private $getcloneEidurl="https://host/monitor/pad/addAttention"; private $targeturl="https://host/monitor/query-micro-blogs-count"; private $request_headers=array ( 'Host' => 'host', 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36', 'Accept' => '*/*', 'Accept-Language' => 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding' => 'gzip, deflate', 'X-Requested-With'=>'XMLHttpRequest', 'Connection' => 'keep-alive' ); private $cloneEid; public $keyword; public $fromdate; public $todate; public function __construct($keyword,$fromdate,$todate){ $this->keyword=$keyword; $this->fromdate=$fromdate; $this->todate=$todate; } public function geturl(){ if($this->cookie==""|$this->cookie==NULL){ $this::dologin(); } $this::getcloneEid(); $result =$this::again(); $times=1; while($result=="0"){ $result=$this::again(); $times++; if($times==3) break; } return $result; } private function again(){ $post_data="viewType=day&startTime=".$this->fromdate."&endTime=".$this->todate."&dt=&dtt=day&st=MICRO_BLOG_ALL&fq=%7B%22blogType%22%3A0%2C%22blogStatus%22%3A0%2C%22content%22%3A%22%22%2C%22bloggerVipType%22%3A-1%2C%22minFans%22%3A%220%22%2C%22maxFans%22%3A%22-1%22%2C%22bloggerType%22%3A0%2C%22platformType%22%3A%22MICRO_BLOG_ALL%22%7D&q=".urlencode($this->keyword)."&cloneEid=".$this->cloneEid; $result=$this::docurl($this->targeturl,TRUE,$post_data,$this->cookie); // var_dump($this->cookie); $status=substr($result, 9,3); if($status!="200"){ return "0"; } return substr($result, strpos($result, "count")+8,-2); } private function getcloneEid(){//获取最终页面post的其中一个值 //$this::dologin(); $post="at=EVENT&st=MICRO_BLOG_ALL&name=".urlencode($this->keyword)."&keywords=".urlencode($this->keyword); $output=$this::docurl($this->getcloneEidurl,TRUE,$post,$this->cookie); $this->cloneEid=substr($output, strpos($output, "id")+5,36); } private function dologin(){//积累cookie $this::prelogin(); $post="client_screen=1440+x+900&langCode=&username=15221197583%40139.com&password=7c2605c596c3002057999999776af6d7"; $result=$this::docurl($this->loginaction,TRUE,$post,$this->cookie); list($header, $body) = explode("\r\n\r\n", $result); // 解析COOKIE preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches); $cookie=substr($header, strrpos($header, "Cookie:")+8,58); $this->cookie=substr($this->cookie, 0,40); $this->cookie .= $cookie; } private function prelogin(){//积累cookie $url=$this->url; $result=$this::docurl($url,FALSE,"",$this->cookie); list($header, $body) = explode("\r\n\r\n", $result); // 解析COOKIE // preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches); $cookie_route=substr($header, strrpos($header, "route"),38); $cookie_JSESS=substr($header, strrpos($header, "JSESSIONID"),51); // 后面用CURL提交的时候可以直接使用 // curl_setopt($ch, CURLOPT_COOKIE, $cookie); $this->cookie =$cookie_route."; ".$cookie_JSESS; } private function docurl($url,$is_post=FALSE,$postdata="",$cookie=""){ $ch=curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_HTTPHEADER, $this->request_headers); if($is_post){ curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $postdata); } if($cookie!=""){ curl_setopt($ch, CURLOPT_COOKIE, $cookie); } $output=curl_exec($ch); curl_close($ch); return $output; }}//这里想链接数据库,在外操作数据库,以便添加或删除爬虫任务$conn=mysqli_connect('localhost','','');if(!$conn){ die("连接数据库失败".mysqli_error());}mysqli_select_db($conn,"");mysqli_query($conn,"set names utf8");$query="SELECT * FROM keywords";$result=mysqli_query($conn,$query);$daycount1="begin";while ($row=mysqli_fetch_object($result)) { echo "正在抓取".$row->words."...<br/>"; /*这里是没有之前没有进行过爬虫的程序*/ if($row->tempdate=='1'){ $todate=(strtotime(date("Y-m-d",time()))-1); $fromdate =$row->Fromdate; if(strtotime(date("Y-m-d",time()))!=strtotime($fromdate)||strtotime(date("Y-m-d",time()))>strtotime($fromdate)){ $daycount = $row->daycount; echo "fromdate:".date("Y-m-d H:i:s",($todate+1-86400))." todate ".date("Y-m-d H:i:s",($todate)); $a=new yingji($row->words,(1000*($todate+1-86400)),(1000*$todate)); $count= $a->geturl(); $daycount.=",".$count; unset($a); $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount."\" where words=\"".$row->words."\""; var_dump($update); $resul=mysqli_query($conn,$update); }else{ //do nothing } }else{ $fromdate=strtotime($row->Fromdate); $times=floor((time()-strtotime($row->Fromdate))/86400); for ($i=1; $i <=$times ; $i++) { $a=new yingji($row->words,(1000*$fromdate),(1000*($fromdate+86400-1))); $count= $a->geturl(); echo date("Y-m-d H:i:s",$fromdate)." to ".date("Y-m-d H:i:s",($fromdate+86400-1))."<br/>"; $fromdate+=86400; $daycount1.=",".$count; unset($a); ob_flush(); flush(); sleep(1); } $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount1."\" where words=\"".$row->words."\""; var_dump($update); $resul=mysqli_query($conn,$update); if($resul){ $daycount1="begin"; } } /*抓取之前的数据到此结束*/ }mysqli_close($conn);
Mark给自己看
0 0
- php curl模拟登陆
- php 模拟登陆
- php实现模拟登陆
- PHP HttpClient模拟登陆
- php模拟登陆
- PHP 模拟登陆实例
- php 模拟登陆
- php curl模拟登陆
- PHP模拟post登陆
- php模拟登陆
- PHP模拟登陆
- php模拟登陆
- php模拟登陆
- PHP网站模拟登陆
- php模拟登陆,网络爬虫
- php curl 模拟登陆https
- 网站模拟登陆备忘(php + python)
- php使用curl模拟用户登陆
- User Notifications Framework in iOS 10
- 正则表达式-匹配密码验证(密码强弱等级,不能匹配中文,匹配数字 字母 字符)
- struct结构的内存对齐
- python小练习2:删除列表中重复的元素
- Android 反编译 -smali语法
- PHP模拟登陆
- ELK性能优化
- Codeforces 450B Jzzhu and Sequences(矩阵快速幂)
- Ubuntu下格式化U盘的方法(基于格式化命令)
- 自定义实现ListView左滑删除
- 动画的简单应用
- 关于字节的换算
- MITM(中间人攻击)原理及防范初探(一)
- NOIP模拟题 by天津南开中学 莫凡[tarjan][树剖][并查集]