获取header头及获取乱码网页的解决

来源:互联网 发布:你不知道的js中文pdf版 编辑:程序博客网 时间:2024/05/08 11:45
<?php
//header('content-type:type');
$url="http://www.sohu.com/";
//获取header头信息课判断获取内容是否加密[Content-Encoding] => gzip
/*方法1
print_r(get_headers($url,1));die;
方法2

$fp=fopen($url,"r");
print_r(stream_get_meta_data($fp));die;
*/


//ini_set('user_agent','Mozilla/4.0 (comptible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727;http://www.aptimize.com');
/*
安装zlip
 $content=file_get_contents("compress.zlib://".$url);
//mb_convert_encoding($content,'GBK','utf-8');
echo $content;*/


//curl抓取网页
$head=get_headers($url);
$gzip=trim(substr($head[10],17));
$a=curl_get($url,$gzip);
print_r($a);
function curl_get($url, $gzip=false){
 $curl = curl_init($url);
 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
 curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
 if($gzip) curl_setopt($curl, CURLOPT_ENCODING, "gzip"); // 关键在这里
 $content = curl_exec($curl);
 curl_close($curl);
 return $content;
}

?>
0 0
原创粉丝点击