curl抓取
来源:互联网 发布:怎么区分淘宝二手镜头 编辑:程序博客网 时间:2024/05/17 23:30
<?php
/**
* 抓取信息操作
*/
class Reptile{
/**
* 通过url获得网页内容【单线程】
* @param $url
* @return array
*/
function getContent($url, $proxy = 0){
$header = array(
//]]'Accept:*/*',
'Accept-Encoding:gzip',
/* 'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:no-cache',
'Host:che.xin.com',*/
);
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//不自动输出内容
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
if($proxy == 1) {
curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_setopt($ch, CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
//在需要用户检测的网页里需要增加下面两行
//curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
//curl_setopt($ch, CURLOPT_USERPWD, US_NAME.":".US_PWD);
$contents = curl_exec($ch);
curl_close($ch);
/*if(empty($contents)){
$contents = file_get_contents($url);
}*/
//echo $url;exit;
return gzdecode($contents);
}
/**
* 通过url获得网页内容【多线程】
* @param $urlArr 网页地址结合
* @return array
*/
function getContentByMulti($urlArr, $proxy = 0){
/*$urlArr = array(
"http://www.baidu.com/",
"http://www.baidu.com/"
);*/
$mh = curl_multi_init();
$header = array(
'Accept-Encoding:gzip',
);
foreach ($urlArr as $i => $url) {
$conn[$i]=curl_init($url);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($conn[$i], CURLOPT_HTTPHEADER, $header);
curl_setopt($conn[$i], CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
if($proxy == 1) {
curl_setopt($conn[$i], CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_multi_add_handle ($mh,$conn[$i]);
}
do { $n=curl_multi_exec($mh,$active);} while ($active);
// do {
// $mrc = curl_multi_exec($mh,$active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// while ($active and $mrc == CURLM_OK) {
// if (curl_multi_select($mh) != -1) {
// do {
// $mrc = curl_multi_exec($mh, $active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// }
// }
$res = array();
foreach ($urlArr as $i => $url) {
$res[$i]=mb_convert_encoding(gzdecode(curl_multi_getcontent($conn[$i])), 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
curl_close($conn[$i]);
}
return $res;
}
}//class end
/**
* 抓取信息操作
*/
class Reptile{
/**
* 通过url获得网页内容【单线程】
* @param $url
* @return array
*/
function getContent($url, $proxy = 0){
$header = array(
//]]'Accept:*/*',
'Accept-Encoding:gzip',
/* 'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:no-cache',
'Host:che.xin.com',*/
);
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//不自动输出内容
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
if($proxy == 1) {
curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_setopt($ch, CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
//在需要用户检测的网页里需要增加下面两行
//curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
//curl_setopt($ch, CURLOPT_USERPWD, US_NAME.":".US_PWD);
$contents = curl_exec($ch);
curl_close($ch);
/*if(empty($contents)){
$contents = file_get_contents($url);
}*/
//echo $url;exit;
return gzdecode($contents);
}
/**
* 通过url获得网页内容【多线程】
* @param $urlArr 网页地址结合
* @return array
*/
function getContentByMulti($urlArr, $proxy = 0){
/*$urlArr = array(
"http://www.baidu.com/",
"http://www.baidu.com/"
);*/
$mh = curl_multi_init();
$header = array(
'Accept-Encoding:gzip',
);
foreach ($urlArr as $i => $url) {
$conn[$i]=curl_init($url);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($conn[$i], CURLOPT_HTTPHEADER, $header);
curl_setopt($conn[$i], CURLOPT_USERAGENT, ':Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36');
if($proxy == 1) {
curl_setopt($conn[$i], CURLOPT_PROXY, '127.0.0.1:9090');
}
curl_multi_add_handle ($mh,$conn[$i]);
}
do { $n=curl_multi_exec($mh,$active);} while ($active);
// do {
// $mrc = curl_multi_exec($mh,$active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// while ($active and $mrc == CURLM_OK) {
// if (curl_multi_select($mh) != -1) {
// do {
// $mrc = curl_multi_exec($mh, $active);
// } while ($mrc == CURLM_CALL_MULTI_PERFORM);
// }
// }
$res = array();
foreach ($urlArr as $i => $url) {
$res[$i]=mb_convert_encoding(gzdecode(curl_multi_getcontent($conn[$i])), 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');
curl_close($conn[$i]);
}
return $res;
}
}//class end
0 0
- curl抓取
- 用curl抓取数据
- 用curl抓取数据
- CURL多线程抓取网页
- curl 抓取google简体中文
- curl 抓取数据
- curl用cookie抓取
- php curl 多线程抓取
- curl多批次抓取,
- curl抓取页面Demo
- curl抓取https网页
- php curl 抓取数据
- curl多线程抓取数据
- curl 实现抓取网站内容
- curl 抓取图片(PHP)
- php curl 代理 抓取数据
- 应用curl扩展抓取网页
- PHP使用cURL抓取数据
- GC overhead limit exceeded : Spark
- 浅析Android系统属性SystemProperties
- 一个经典例子让你彻彻底底理解java回调机制
- Notification(通知)的使用简介
- 尺寸工具箱DimenUtils
- curl抓取
- CDH离线安装成功,Mark下,20151012
- iOS图片处理,截图,缩放,存储
- 教你如何调用百度编辑器ueditor的上传图片、上传文件等模块
- java 类加载机制
- thrift的使用介绍
- Android厨房的使用环境搭建以及使用
- Erlang 游戏后端性能优化总结
- JDBC(Java Database Connection) 教程