PHP爬虫之刷博客访问量

来源:互联网 发布:python 矩阵相乘 编辑:程序博客网 时间:2024/05/20 04:14

本着学以致用的态度,现在想试试用爬虫技术来刷一刷博客访问量。

学习交流之用,不要瞎搞。

<?php//只需把beginUrl改为你的博客列表首页就可以了$beginUrl = "http://blog.csdn.net/qq_27988539?viewmode=contents";$listpage = file_get_contents($beginUrl);//博客列表可能有多页//获得每一页的链接$pattern ="/<div id=\"papelist\".*>.*<\/div>/sU";$arr = array();if(preg_match_all($pattern,$listpage,$arr)){$str = $arr[0][0];$pattern = "/<a.*>.*<\/a>/sU";$temp1 = array();if(preg_match_all($pattern,$str,$temp1)){$temp1 = $temp1[0];array_splice($temp1,-2);$pages = array();foreach ($temp1 as $key => $value) {$temp2 = array();$pattern = "/\".*\"/U";if(preg_match_all($pattern,$value,$temp2)){$pages[] = $temp2[0][0];}}}}$rooturl = "http://blog.csdn.net";$blogPage = array($beginUrl);if(!empty($pages)){foreach($pages as $key => $value){$pages[$key] = preg_replace("/\"/",'', $value);$blogPage[] = $rooturl.$pages[$key];}}//print_r($blogPage);//获得所有博客的链接$blogUrls = array();foreach($blogPage as $key => $value){$page = file_get_contents($value);$pattern = "/<span class=\"link_title\">.*<\/span>/sU";if(preg_match_all($pattern,$page,$arr)){$arr = $arr[0];foreach($arr as $key => $value){$pattern = "/<a.*<\/a>/sU";$temp3 = array();if(preg_match_all($pattern,$value,$temp3)){$pattern = "/\".*\"/U";$str = $temp3[0][0];$temp4 = array();if(preg_match_all($pattern,$str,$temp4)){$blogUrls[] = preg_replace("/\"/","",$temp4[0][0]);}}}}}//var_dump($blogUrls);//遍历访问所有的博客链接for($i=0;$i<100;$i++){foreach($blogUrls as $value){$blogUrl = $rooturl.$value;file_get_contents($blogUrl);sleep(2);}}?>