插件23：检查链接地址

来源：互联网发布：finale 软件编辑：程序博客网时间：2024/06/05 23:43

<?php // Plug-in 23: Check Links// This is an executable example with additional code supplied// To obtain just the plug-ins please click on the Download link$page   = "http://alexa.com";echo      "Checking $page<br />\n"; ob_flush(); flush();$result = PIPHP_CheckLinks($page, 2, 180);if ($result[0] > 0){   echo "<br />$result[0] failed URLs:<br /><ul>\n";      for ($j = 0 ; $j < $result[0] ; ++$j)      echo "<li><a href='" . $result[1][$j] .         "'>" . $result[1][$j] . "</a></li>\n";}else echo "<br />All URLs successfully accessed.";function PIPHP_CheckLinks($page, $timeout, $runtime){   // Plug-in 23: Check Links   //   // This plug-in accepts a path/filename or a URL to an HTML   // web page containing links to be tested. It returns an   // array, the first element of which is an integer which is   // set to 0 if all links worked, otherwise it is set to the   // number of bad links. The second element is an array   // containing the bad link URLs. The arguments required   // are:   //   //    $page:    The web page to check. This must end in   //              either a filename.ext or a /   //    $timeout: Seconds to wait for a page to be returned   //    $runtime: Maximum number of seconds script can run   //   // Note that some sites may not allow pages to be grabbed   // in this manner and will result in a URL failing because   // of this, this includes pages requiring authentication.   ini_set('max_execution_time', $runtime);   $contents = @file_get_contents($page);   if (!$contents) return array(1, array($page));      $checked = array();   $failed  = array();   $fail    = 0;   $urls    = PIPHP_GetLinksFromURL($page);   $context = stream_context_create(array('http' =>      array('timeout' => $timeout)));          for ($j = 0 ; $j < count($urls); $j++)   {      if (!in_array($urls[$j], $checked))      {         $checked[] = $urls[$j];         // Uncomment the following line to view progress         // echo " $urls[$j]<br />\n"; ob_flush(); flush();         if (!@file_get_contents($urls[$j], 0, $context, 0, 256))            $failed[$fail++] = $urls[$j];      }   }   return array($fail, $failed);}// The below functions are repeated here to ensure they// are available to the main function which relies on themfunction PIPHP_GetLinksFromURL($page){   // Plug-in 22: get Links From URL   // This plug-in accepts the URL or a web page and returns   // an array of all the links found in it. The argument is:   //    $page: The web site's main URL   $contents = @file_get_contents($page);   if (!$contents) return NULL;      $urls    = array();   $dom     = new domdocument();   @$dom    ->loadhtml($contents);   $xpath   = new domxpath($dom);   $hrefs   = $xpath->evaluate("/html/body//a");   for ($j = 0 ; $j < $hrefs->length ; $j++)      $urls[$j] = PIPHP_RelToAbsURL($page,         $hrefs->item($j)->getAttribute('href'));   return $urls;}function PIPHP_RelToAbsURL($page, $url){   // Plug-in 21: Relative To Absolute URL   // This plug-in accepts the absolute URL of a web page   // and a link featured within that page. The link is then   // turned into an absolute URL which can be independently   // accessed. Only applies to http:// URLs. Arguments are:   //    $page: The web page containing the URL   //    $url:  The URL to convert to absolute   if (substr($page, 0, 7) != "http://") return $url;      $parse = parse_url($page);   $root  = $parse['scheme'] . "://" . $parse['host'];   $p     = strrpos(substr($page, 7), '/');      if ($p) $base = substr($page, 0, $p + 8);   else $base = "$page/";      if (substr($url, 0, 1) == '/')           $url = $root . $url;   elseif (substr($url, 0, 7) != "http://") $url = $base . $url;      return $url;}?>

插件说明：

本插件接受一个web页面的URL地址（自己的或第三方的）,检查这个页面里所有全部链接并进行测试，看看他们是否都链接到有效地页面。他需要以下参数：

$page web页面的URL地址，包括“http://"前导符和域名。

$timeout 在认为某个页面不可用之前必须等待的时间（单位为s）。

$runtime 在超时之前这个脚本必须运行的时间。