插件73:读取wiki页面

来源:互联网 发布:同志软件有哪些 编辑:程序博客网 时间:2024/06/05 09:39
<?php // Plug-in 73: Fetch Wiki Page/* * 插件说明: * 读取wiki页面 * 插件接受一个维基文章的标题,返回这个文章的文本内容。如果读取失败,返回false. * 它需要以下参数: * $netry 危及文章的标题 */// This is an executable example with additional code supplied// To obtain just the plug-ins please click on the Download linkecho '<html><head><meta http-equiv="Content-Type" ' .     'content="text/html; charset=utf-8" /></head><body>';echo '<font face="Verdana" size="2">';echo PIPHP_FetchWikiPage('Climate Change');function PIPHP_FetchWikiPage($entry){   // Plug-in 73: Fetch Wiki Page   //   // This plug-in fetches the XML of a Wikipedia entry for the   // term $entry and returns a string containing the salient   // details. It requires the following argument:   //   //    $entry: The entry to fetch (eg: 'bread')   $agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; ' .            'rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR ' .            '3.5.30729)';   $text = '';   while ($text == '' || substr($text, 0, 9) == '#REDIRECT')   {      $entry = rawurlencode($entry);      $url   = "http://en.wikipedia.org/wiki/Special:Export/$entry";      $page  = PIPHP_CurlGetContents($url, $agent);      $xml   = simplexml_load_string($page);      $title = $xml->page->title;      $text  = $xml->page->revision->text;      if (substr($text, 0, 9) == '#REDIRECT')      {         preg_match('/\[\[(.+)\]\]/', $text, $matches);         $entry = $matches[1];      }   }   $sections = array('References', 'See also', 'External links',      'Notes', 'Further reading');   foreach($sections as $section)   {      $ptr = stripos($text, "==$section==");      if ($ptr) $text = substr($text, 0, $ptr);      $ptr = stripos($text, "== $section ==");      if ($ptr) $text = substr($text, 0, $ptr);   }   $data = array('\[{2}Imag(\[{2})*.*(\]{2})*\]{2}', '',                 '\[{2}File(\[{2})*.*(\]{2})*\]{2}', '',                 '\[{2}Cate(\[{2})*.*(\]{2})*\]{2}', '',                 '\{{2}([^\{\}]+|(?R))*\}{2}',       '',                 '\'{3}(.*?)\'{3}',         '<b>$1</b>',                 '\'{2}(.*?)\'{2}',         '<i>$1</i>',                 '\[{2}[^\|\]]+\|([^\]]*)\]{2}',   '$1',                 '\[{2}(.*?)\]{2}',                '$1',                 '\[(http[^\]]+)\]',                ' ',                 '\n(\*|#)+',   '<br /> ● ',                 '\n:.*?\n',                         '',                  '\n\{[^\}]+\}',                     '',                 '\n={7}([^=]+)={7}',     '<h7>$1</h7>',                 '\n={6}([^=]+)={6}',     '<h6>$1</h6>',                 '\n={5}([^=]+)={5}',     '<h5>$1</h5>',                 '\n={4}([^=]+)={4}',     '<h4>$1</h4>',                 '\n={3}([^=]+)={3}',     '<h3>$1</h3>',                 '\n={2}([^=]+)={2}',     '<h2>$1</h2>',                 '\n={1}([^=]+)={1}',     '<h1>$1</h1>',                 '\n{2}',                         '<p>',                 '<gallery>([^<]+?)<\/gallery>',     '',                 '<ref>([^<]+?)<\/ref>',             '',                 '<ref [^>]+>',                      '');   for ($j = 0 ; $j < count($data) ; $j += 2)      $text = preg_replace("/$data[$j]/", $data[$j+1], $text);   $text  = strip_tags($text, '<h1><h2><h3><h4><h5><h6><h7>' .                              '<p><br><b><i>');   $url   = "http://en.wikipedia.org/wiki/$title";   $text .= "<p>Source: <a href='$url'>Wikipedia ($title)</a>";   return trim($text);}function PIPHP_CurlGetContents($url, $agent){   // Plug-in 72: Curl Get Contents   //   // This plug-in fetches a page that may otherwise be   // forbidden using the file_get_contents() function.   // It requires the following arguments:   //   //    $url:   The URL of the page to fetch   //    $agent: A typical browser User Agent string   $ch = curl_init();   curl_setopt($ch, CURLOPT_URL,            $url);   curl_setopt($ch, CURLOPT_USERAGENT,      $agent);   curl_setopt($ch, CURLOPT_HEADER,         0);   curl_setopt($ch, CURLOPT_ENCODING,       "gzip");   curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);   curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);   curl_setopt($ch, CURLOPT_FAILONERROR,    1);   curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 8);   curl_setopt($ch, CURLOPT_TIMEOUT,        8);   $result = curl_exec($ch);   curl_close($ch);   return $result;}?>