php生成百度换量 XML文件

来源:互联网 发布:淘宝低价高配主机 编辑:程序博客网 时间:2024/05/16 01:05

百度开放平台提交资源的方式主要有两种,一种是将数据生成xml文件提交,另一种是使用开放平台的所见即所得方式直接提交。

如何用PHP来生成符合开放平台 要求规范的xml文件

  1. 首先看下我们都需要提交的东西,在百度的分布情况

这里写图片描述

  1. 然后再看具体的xml文件格式

这里写图片描述

  1. 然后我们进行php程序的书写
<?phpdate_default_timezone_set('Etc/GMT-8');error_reporting(E_ERROR | E_PARSE);set_time_limit(0);//抓取程序function post($sUrl){    $oCurl = curl_init();      $header[] = "Content-type: application/x-www-form-urlencoded";    $user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36";    curl_setopt($oCurl, CURLOPT_URL, $sUrl);    curl_setopt($oCurl, CURLOPT_HTTPHEADER,$header);    curl_setopt($oCurl, CURLOPT_USERAGENT,$user_agent);    curl_setopt($oCurl, CURLOPT_RETURNTRANSFER, 1 );    curl_setopt($oCurl, CURLOPT_POST,false);    $sContent = curl_exec($oCurl);    $aStatus = curl_getinfo($oCurl);    curl_close($oCurl);    return $sContent;}//生成xml$xml = <<<isEOF<?xml version="1.0" encoding="utf-8" ?><DOCUMENT>\r\nisEOF;$lines = array(    'name'=>'日喀则地区装修公司排名','url'=>'http://xizang.edeng.cn/73/rikaze/73/zhuangxiugongsi/',    'name'=>'日喀则地区装修设计公司','url'=>'http://xizang.edeng.cn/73/rikaze/73/zhuangxiugongsi/',    'name'=>'日喀则地区装饰','url'=>'http://xizang.edeng.cn/73/rikaze/73/zhuangshi/',    'name'=>'日喀则地区装饰公司','url'=>'http://xizang.edeng.cn/73/rikaze/73/zhuangxiugongsi/');foreach ($lines as $loop) {    $key = $loop['name'];    $thisurl = $loop['url'];    $edengStr = post($thisurl); //抓取    $edengPreg = '#<head>[\s\S.]*?<title>(.*?)</title>[\s\S.]*?<meta name="Description" content="(.*?)" />';    $edengPreg .= '[\s\S.]*?</head>#i';    $edengTitle = "";    $edengDesc = "";    preg_match_all($edengPreg, $edengStr, $edengMat); //正则匹配所需数据    $edengTitle = $edengMat[1][0];    $edengDesc = $edengMat[2][0];    $baiduCache = "";    if($edengMat == null || $edengMat[0] == null || count($edengMat[1]) <= 0){        $baiduUrl = "http://www.baidu.com/s?wd=".$thisurl;        $baiduUrl .= "&rsv_spt=1&issp=1&rsv_bp=0&ie=utf-8&tn=baiduhome_pg&rsv_n=2&rsv_sug3=1&rsv_sug4=251&inputT=3150";        $sContent = post($baiduUrl);        $infoPreg = '#<h3 class="t"><a[^>]*?>(.*?)</a>.*?</h3>.*?<div class=\"c-abstract\">(.*?)</div>';        $infoPreg .= '<div class="f13">.*?<span class="g">(.*?)&nbsp;(\d{4}-\d{1,2}-\d{1,2})&nbsp;</span>';        $infoPreg .= '.*?<a data-nolog href="([^>]*?)"[^>]*?>百度快照</a>#i';        preg_match_all($infoPreg, $sContent, $matches);        if(count($matches) <=0 || count($matches[1]) <= 0){            $baiduTitle = null;            $baiduContent = null;            $baiduCache = null;        }else{            $baiduTitle = $matches[1][0];            $baiduContent = $matches[2][0];            $baiduUrl = $matches[3][0];            // $baiduDate = $matches[4][0];            $baiduCache = $matches[5][0];            $baiduTitle = strip_tags($baiduTitle);            $baiduContent = strip_tags($baiduContent);            $baiduUrl = strip_tags($baiduUrl);            $baiduCacheArr = explode("?", $baiduCache);            $baiduCacheParmArr = explode("&", $baiduCacheArr[1]);            foreach ($baiduCacheParmArr as $loopCacheParm) {                $loopArr = explode("=", $loopCacheParm);                $nowKey = $loopArr[0];                $nowVal = $loopArr[1];                if($nowKey == "m"){                    $baiduCache = "m=".$nowVal;                }            }        }    }    if($edengTitle == null || time($edengTitle) == ""){        $edengTitle = $baiduTitle;    }    if($edengDesc == null || time($edengDesc) == ""){        $edengDesc = $baiduContent;    }    $baiduDate = date("Y-m-d");    $xml .= <<<isEOF<item>            <key>{$key}</key>            <display>                <url>{$thisurl}</url>                <title>{$edengTitle}</title>                <content>{$edengDesc}</content>                <showurl>{$thisurl}</showurl>                <date>{$baiduDate}</date>                <capture>{$baiduCache}</capture>            </display>        </item>isEOF;    sleep(1);}$xml .= <<<isEOF</DOCUMENT>isEOF;file_put_contents("baidu_open.xml", $xml);
0 0