【转载+原创】curl_multi_init 简单并发下载图片问题

来源:互联网 发布:华润置地 知乎 编辑:程序博客网 时间:2024/05/29 11:10
class BatchedCurlUtil{private $_mh;private $_timeout = 30;private $_handleArr = array();/** * 初始化curl_multi_init*/public function __construct($url_arr){if(!is_array($url_arr)){return false;}$this->_mh = curl_multi_init();foreach ($url_arr as $i => $url){$ch = curl_init();curl_setopt($ch, CURLOPT_URL, $url);curl_setopt($ch, CURLOPT_HTTPHEADER, array('Accept-Language:zh-CN,zh;q=0.8'));curl_setopt($ch, CURLOPT_HEADER, 0); //启用时会将头文件的信息作为数据流输出。此处不需要curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);curl_setopt($ch, CURLOPT_TIMEOUT, $this->_timeout);curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);//启用时会将服务器服务器返回的"Location: "放在header中递归的返回给服务器,使用CURLOPT_MAXREDIRS可以限定递归返回的数量curl_setopt($ch, CURLOPT_MAXREDIRS, 5); //指定最多的HTTP重定向的数量,这个选项是和CURLOPT_FOLLOWLOCATION一起使用的。curl_multi_add_handle($this->_mh, $ch);$this->_handleArr[$url] = $ch;}return $this->_handleArr;}/** * 关闭批量抓取 */public function __destruct(){curl_multi_close($this->_mh);}/** * 批量抓取函数 */public function batchedGetHtml(){$runningSig = 0;$dataArr = array();if(!is_array($this->_handleArr)){return false;}print_r($this->_handleArr);exit;do{curl_multi_exec($this->_mh, $runningSig);}while($runningSig > 0);foreach($this->_handleArr as $key => $url){$content = curl_multi_getcontent($url);$dataArr[$key] = (curl_errno($url) == 0) ? $content : false;}return $dataArr;}}
 $url = array('http://img01.taobaocdn.com/bao/uploaded/i1/19297024898783520/T1fKGoXt8iXXXXXXXX_!!0-item_pic.jpg','http://img01.taobaocdn.com/bao/uploaded/i1/1040439297/T2GfL1XdpXXXXXXXXX_!!1040439297.jpg','http://img01.taobaocdn.com/bao/uploaded/i1/15291036636308449/T1b6uHXyFcXXXXXXXX_!!0-item_pic.jpg','http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg','http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg','http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg');
以上url后三个是重复的,
print_r($this->_handleArr);的结果,可以看到重复的自动覆盖掉了(暂时这么理解)。
Array ( [http://img01.taobaocdn.com/bao/uploaded/i1/19297024898783520/T1fKGoXt8iXXXXXXXX_!!0-item_pic.jpg] => Resource id #3 [http://img01.taobaocdn.com/bao/uploaded/i1/1040439297/T2GfL1XdpXXXXXXXXX_!!1040439297.jpg] => Resource id #4 [http://img01.taobaocdn.com/bao/uploaded/i1/15291036636308449/T1b6uHXyFcXXXXXXXX_!!0-item_pic.jpg] => Resource id #5 [http://img03.taobaocdn.com/bao/uploaded/i3/272715291/T2uUcmXhlXXXXXXXXX_!!272715291.jpg] => Resource id #8 ) 不会重复下载图片(当然也可以程序判断)。这个要注意的是,如果是批量替换某个文本里的图片就得注意,要一一对应,要先去除重复的。
用preg_replace();或者str_replace()数组形式替换,注意str_replace的问题



原创粉丝点击