四脚猫php视频下载 检测一个网页上的链接是否都可访问(练一练)

来源:互联网 发布:淘宝网无法正常显示 编辑:程序博客网 时间:2024/05/16 12:12
思路:

1、 首先是采用curl的方式采集到这个网页的内容

2、 用正则的方式或者html解析器把url分析出来

3、 对于每一个url,进行请求,如果状态不是2xx、3xx等就定义为异常。

打个小广告:四脚猫视频下载,四脚猫培训视频下载,四脚猫php视频下载,四脚猫php百度云视频下载,请联系本人V43599939

<?phpclass http_stat{    public $url;    private $document;    private $links;    private $domain;    private $links_stat;    public function get_all_link_status($url){        if($this->__get_document($url)!=false){            $this->url = $url;            $this->document = $this->__get_document($this->url);            $this->links = $this->__strip_links($this->document);            foreach($this->links as $val){                if($val == '#'){                    $res['empty'] +=1;                }elseif(strpos($val,'http')!==false){                    $state_num = $this->__get_http_status($val);                    $res[$state_num][] = $val;                }else{                    $url = $this->url.$val;                    $state_num = $this->__get_http_status($url);                    $res[$state_num][] = $val;                }            }            return $res;        }    }    private function __get_http_status($s_url){        $curl = curl_init();        curl_setopt($curl,CURLOPT_URL,$s_url);        curl_setopt($curl,CURLOPT_HEADER,1);        curl_setopt($curl,CURLOPT_NOBODY,1);        curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);        curl_setopt($curl,CURLOPT_TIMEOUT,30);        curl_exec($curl);        $rtn= curl_getinfo($curl,CURLINFO_HTTP_CODE);        curl_close($curl);        return  $rtn;    }    private function __get_document($url){        $ch = curl_init();        curl_setopt($ch, CURLOPT_URL, $url);        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);        $response = curl_exec($ch);        if(curl_errno($ch))        {            print curl_error($ch);            return false;        }        curl_close($ch);        return $response;    }    private function __strip_links($document){        preg_match_all('|<a(.*?)href="(.*?)"(.*?)>(.*?)</a>|i', $document, $links);        while(list($key,$val) = each($links[2])){            if(!empty($val))$match[] = $val;        }        return $match;    }}$t = new http_stat();$res = $t->get_all_link_status("http://www.sina.com.cn");var_dump($res);


阅读全文
1 0
原创粉丝点击