一个PHP页面抓取函数

来源:互联网 发布:华为windows平板 编辑:程序博客网 时间:2024/05/20 08:26
function fcontents( $url, $timeout = 5, $referer = "" ){$bits = @parse_url( $url );if ( !$bits[host] ){return "";}if ( $bits[port] ){$port = intval( $bits[port] );}else{$port = $bits[scheme] == "https" ? 443 : 80;}$portq = $port == 80 ? "" : ":{$port}";$stime = time( );$fp = @fsockopen( $bits[host], $port, $errno, $errstr, $timeout );if ( !$fp ){return "";}else{$stime = time( ) - $stime;$timeout = $timeout - $stime;if ( $timeout < 1 ){$timeout = 1;}stream_set_timeout( $fp, $timeout );if ( !$referer ){$referer = $bits[scheme]."://".$bits[host]."/";}$path = $bits[path] ? $bits[path] : "/";if ( $bits[query] ){$path .= "?".$bits[query];}$out = "GET {$path} HTTP/1.0\r\n";$out .= "Host: {$bits[host]}{$portq}\r\n";$out .= "User-Agent: Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; SV1)\r\n";$out .= "Accept: */*\r\n";$out .= "Accept-Language: zh-cn\r\n";$out .= "Accept-Encoding: identity\r\n";$out .= "Referer: {$referer}\r\n";$out .= "Connection: Close\r\n\r\n";fputs( $fp, $out );$data = "";$inHeaders = true;while ( $line = @fgets( $fp, 2048 ) ){if ( $inHeaders ){$line = trim( $line );if ( empty( $line ) ){$inHeaders = false;}continue;}$data .= $line;}fclose( $fp );return $data;}}

0 0
原创粉丝点击