网页抓取url替换程序Demo

来源:互联网 发布:2016交通事故数据 编辑:程序博客网 时间:2024/05/22 12:32
define("URL", "http://www.hao123.com");define("URLI", "http://www.hao123.com/");//首页function index(){    $str = $this->curl(URL);    $str = $this->url_add(URL,$str);    $this->url_handle($str);  }//url加密function url_be($arr,$str){    if($arr){            foreach($arr[2] as $i){                $preg_css = "/(css)/is";                $preg_js = "/(js)/is";                $preg_png = "/(png)/is";                $preg_jpg = "/(jpg)/is";                $preg_gif = "/(gif)/is";                if( !preg_match($preg_css,$i) and !preg_match($preg_js,$i) and !preg_match($preg_png,$i)                     and !preg_match($preg_jpg,$i) and !preg_match($preg_gif,$i) and $i != URLI and $i != '' and $i != URL ){                        $be = base64_encode($i);                        $be = str_replace('=','',$be);                        $str = str_replace($i,site_url().'/index/url_bd/'.$be,$str);                    }               }            return $str;    }}//url处理function url_handle($str){    $preg_s = '/(href=\")(.*?)(\")/is';    preg_match_all($preg_s,$str,$arr);    $str = $this->url_be($arr,$str);    $preg_s = '/(src=\")(.*?)(\")/is';    preg_match_all($preg_s,$str,$arr);    echo $this->url_be($arr,$str);}//url解密function url_bd($be){    if($be){        $url = base64_decode($be);        $str = $this->curl($url);        $str = $this->url_add(URL,$str);        $this->url_handle($str);    }}//追加主域名function url_add($url,$str){    $preg = "/(href=\"\/)(.*?)(\")/is";    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);    $preg = "/(src=\"\/)(.*?)(\")/is";    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);    $preg = "/(href=\")(?!http)(?!javascript)(.*?)(\")/is";    $str = preg_replace($preg,'href="'.$url.'/\\2"',$str);    return $str;}   //抓取页面function curl($url){    $ch = curl_init($url) ;    curl_setopt($ch, CURLOPT_HEADER, false);    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //如果把这行注释掉的话,就会直接输出    $out = curl_exec($ch) ;    curl_close($ch);    return $out;}
0 0
原创粉丝点击