采集/小偷程序核心功能

来源:互联网 发布:雅思阅读知乎 编辑:程序博客网 时间:2024/05/16 02:31
<?phpfunction fcontents( $url, $timeout = 5, $fromurl = "" ) //创建fconrents内容获取函数{$urlarray = @parse_url( $url );                         //parse_url解析目标</div><div>  if ( !$urlarray[host] )            //如果目标host为空返回空{return "";}if ( $urlarray[port] ){$urlport = intval( $urlarray[port] ); //如果指定了端口直接使用该端口}else{$urlport = $urlarray[scheme] == "https" ? 443 : 80; //如果没有指定端口判断解析的scheme为https时端口指定为443,否则为80 $toport = $urlport == 80 ? "" : ":".$urlport;               //如果端口为80 省略端口信息,否则加上端口$starttime = time( );//开始计时$fp = @fsockopen( $urlarray[host], $urlport, $errno, $errstr, $timeout );  //fsockopen指定的目标信息获取内容if ( !$fp ) //  如果没获取到返回空{return "";}$starttime = time( ) - $starttime;    //运行时间$timeout -= $starttime;            //指定超时大于运行时间则把剩余时间给读取流时的超时/否则为1if ( $timeout < 1 ){$timeout = 1;}stream_set_timeout( $fp, $timeout );if ( !$fromurl ){$fromurl = $urlarray[scheme]."://".$urlarray[host]."/";      //当没指定Referer头信息时 初始化}$allurl = $urlarray[path] ? $urlarray[path] : "/"; //处理path头 默认为/if ( $urlarray[query] ){$allurl .= "?".$urlarray[query];  //?后面的变量}$date = "GET ".$allurl." HTTP/1.0\r\n";$date .= "Host: ".$urlarray[host]."{$toport}\r\n";$date .= "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\r\n";$date .= "Accept: */*\r\n";$date .= "Accept-Language: zh-cn\r\n";$date .= "Accept-Encoding: identity\r\n";$date .= "Referer: ".$fromurl."\r\n";$date .= "Connection: Close\r\n\r\n";fputs( $fp, $date ); //查询$goodtext = ""; //初始化$kg = true; //初始化变量开关while ( $fromtext = @fgets( $fp, 2048 ) )   //循环输出获取的内容{if ( $kg ){$fromtext = trim( $fromtext );if ( empty( $fromtext ) ){$kg = false;}}else{$goodtext .= $fromtext;}}fclose( $fp );return $goodtext;   //返回获取的内容}?>
//=============关键字替换伪原创
 这个直接str_replace搞就行了,可以指定需要替换的字符和要替换的字符,比较方便的是搞一个txt的
在搞成数组循环替换下
//==============正则提取指定内容
preg正则表达式搞定这里

原创粉丝点击