PHP 爬虫 两篇转载
来源:互联网 发布:相片变成漫画图软件 编辑:程序博客网 时间:2024/05/17 14:20
http://hi.baidu.com/xiaojiang/item/774af38966cf44ca98255ff0
<?phpclassCurlComponent{ var $headers; var $user_agent; var $compression; var $cookie_file; var $proxy; functionset_value($cookies=TRUE,$cookie='cookies.txt',$compression='gzip',$proxy='') { $this->headers[] = "Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg"; $this->headers[] = "Connection: Keep-Alive"; $this->headers[] = "Content-type: application/x-www-form-urlencoded"; $this->user_agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0)"; $this->compression=$compression; $this->proxy=$proxy; $this->cookies=$cookies; if ($this->cookies == TRUE) $this->cookie($cookie); } functioncookie($cookie_file) { if (file_exists($cookie_file)) { $this->cookie_file=$cookie_file; } else { @fopen($cookie_file,'w')or$this->error("The cookie file could not be opened. Make sure this directory has the correct permissions"); $this->cookie_file=$cookie_file; @fclose($cookie_file); } } functionget($url,$refer='') { $process =curl_init($url); curl_setopt($process,CURLOPT_REFERER, $refer); curl_setopt($process,CURLOPT_HTTPHEADER, $this->headers); curl_setopt($process,CURLOPT_USERAGENT, $this->user_agent); if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEFILE, $this->cookie_file); if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEJAR, $this->cookie_file); curl_setopt($process,CURLOPT_ENCODING, $this->compression); curl_setopt($process,CURLOPT_TIMEOUT, 30000); if ($this->proxy)curl_setopt($cUrl,CURLOPT_PROXY, 'proxy_ip:proxy_port'); curl_setopt($process,CURLOPT_RETURNTRANSFER, 1); $return =curl_exec($process); curl_close($process); return $return; } functionpost($url,$data,$refer) { $process =curl_init($url); curl_setopt($process,CURLOPT_REFERER, $refer); curl_setopt($process,CURLOPT_HTTPHEADER, $this->headers); curl_setopt($process,CURLOPT_USERAGENT, $this->user_agent); if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEFILE, $this->cookie_file); if ($this->cookies == TRUE)curl_setopt($process,CURLOPT_COOKIEJAR, $this->cookie_file); curl_setopt($process,CURLOPT_ENCODING, $this->compression); curl_setopt($process,CURLOPT_TIMEOUT, 30000); if ($this->proxy)curl_setopt($cUrl,CURLOPT_PROXY, 'proxy_ip:proxy_port'); curl_setopt($process,CURLOPT_POSTFIELDS, $data); curl_setopt($process,CURLOPT_RETURNTRANSFER, 1); curl_setopt($process,CURLOPT_FOLLOWLOCATION, 1); curl_setopt($process,CURLOPT_POST, 1); $return =curl_exec($process); curl_close($process); return $return; } functionerror($error) { echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>"; die; } } ?>
http://www.hdj.me/get-cookie-without-cookiejar-by-curl
PHP中CURL类是一个非常牛逼的工具类,具体怎么牛逼就不啰嗦了。
对于COOKIE,CURL类也有很不错的支持,但不够灵活,并未能通过现成的方法以变量的方法获取到,而以要通过以下方法实现。
// 把COOKIE保存至cookie.txtcurl_setopt($ch, CURLOPT_COOKIEFILE,'cookie.txt');curl_setopt($ch, CURLOPT_COOKIEJAR,'cookie.txt');
先把COOKIE保存文件,调用的时候还得读取文件,这样意味着两次的IO操作,效率如何,不用说大家都清楚了。
那么有没有办法可以绕过写读文件呢?不卖关子,直接上代码:
// 初始化CURL$ch= curl_init();curl_setopt($ch, CURLOPT_URL,$url);// 获取头部信息curl_setopt($ch, CURLOPT_HEADER, 1);// 返回原生的(Raw)输出curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);// 执行并获取返回结果$content= curl_exec($ch);// 关闭CURLcurl_close($ch);// 解析HTTP数据流list($header,$body) =explode("\r\n\r\n",$content);// 解析COOKIEpreg_match("/set\-cookie:([^\r\n]*)/i",$header,$matches);// 后面用CURL提交的时候可以直接使用// curl_setopt($ch, CURLOPT_COOKIE, $cookie);$cookie= $matches[1];
打完收工!欢迎大家来喷!
0 0
- PHP 爬虫 两篇转载
- 转载爬虫
- 【php爬虫】
- php 爬虫
- 爬虫php
- 转载识别网络爬虫
- 转载的两篇qt 的技巧
- 转载两篇关于程序员的文章
- 转载两篇Gparted文章(一)
- 转载两篇关于WSGI的文章
- PHP(转载)
- php写爬虫工具
- php 简单爬虫
- PHP 爬虫记录
- PHP采集程序(爬虫)
- PHP写爬虫
- php 实现简单爬虫
- php爬虫框架crawler
- 遇到的一些Android小问题
- Windows下通过Cygwin使用OpenSSh
- STL hashmap
- Cube Stacking
- 【LeetCode】Convert Sorted List to Binary Search Tree
- PHP 爬虫 两篇转载
- 二插排序树
- hibernate demo
- 简单的vim配置文件
- eclipse django sae网站开发流程
- uva 537
- 5.3poj2964日历问题
- OrmLite动态创建表,一个实体类创建多张表的的偏招
- PL/SQL 编程(二)