PHP从网页中提取关键字等经典代码片段 <转>

来源：互联网发布：电视直播软件ipad 编辑：程序博客网时间：2024/05/18 00:15

查看邮件是否已被阅读

<?error_reporting(0);Header("Content-Type: image/jpeg"); //Get IPif (!empty($_SERVER['HTTP_CLIENT_IP'])){  $ip=$_SERVER['HTTP_CLIENT_IP'];}elseif (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])){  $ip=$_SERVER['HTTP_X_FORWARDED_FOR'];}else{  $ip=$_SERVER['REMOTE_ADDR'];} //Time$actual_time = time();$actual_day = date('Y.m.d', $actual_time);$actual_day_chart = date('d/m/y', $actual_time);$actual_hour = date('H:i:s', $actual_time); //GET Browser$browser = $_SERVER['HTTP_USER_AGENT'];     //LOG$myFile = "log.txt";$fh = fopen($myFile, 'a+');$stringData = $actual_day . ' ' . $actual_hour . ' ' . $ip . ' ' . $browser . ' ' . "\r\n";fwrite($fh, $stringData);fclose($fh); //Generate Image (Es. dimesion is 1x1)$newimage = ImageCreate(1,1);$grigio = ImageColorAllocate($newimage,255,255,255);ImageJPEG($newimage);ImageDestroy($newimage);     ?>

从网页中提取关键字

$meta = get_meta_tags('http://www.emoticode.net/');$keywords = $meta['keywords'];// Split keywords$keywords = explode(',', $keywords );// Trim them$keywords = array_map( 'trim', $keywords );// Remove empty values$keywords = array_filter( $keywords ); print_r( $keywords );

查找页面上的所有链接

$html = file_get_contents('http://www.example.com'); $dom = new DOMDocument();@$dom->loadHTML($html); // grab all the on the page$xpath = new DOMXPath($dom);$hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) {       $href = $hrefs->item($i);       $url = $href->getAttribute('href');       echo $url.'<br />';}

自动转换URL，跳转至超链接

function _make_url_clickable_cb($matches) {    $ret = '';    $url = $matches[2];      if ( empty($url) )        return $matches[0];    // removed trailing [.,;:] from URL    if ( in_array(substr($url, -1), array('.', ',', ';', ':')) === true ) {        $ret = substr($url, -1);        $url = substr($url, 0, strlen($url)-1);    }    return $matches[1] . "<a href=\"$url\" rel=\"nofollow\">$url</a>" . $ret;}  function _make_web_ftp_clickable_cb($matches) {    $ret = '';    $dest = $matches[2];    $dest = 'http://' . $dest;      if ( empty($dest) )        return $matches[0];    // removed trailing [,;:] from URL    if ( in_array(substr($dest, -1), array('.', ',', ';', ':')) === true ) {        $ret = substr($dest, -1);        $dest = substr($dest, 0, strlen($dest)-1);    }    return $matches[1] . "<a href=\"$dest\" rel=\"nofollow\">$dest</a>" . $ret;}  function _make_email_clickable_cb($matches) {    $email = $matches[2] . '@' . $matches[3];    return $matches[1] . "<a href=\"mailto:$email\">$email</a>";}  function make_clickable($ret) {    $ret = ' ' . $ret;    // in testing, using arrays here was found to be faster    $ret = preg_replace_callback('#([\s>])([\w]+?://[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]*)#is', '_make_url_clickable_cb', $ret);    $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]*)#is', '_make_web_ftp_clickable_cb', $ret);    $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);      // this one is not in an array because we need it to run last, for cleanup of accidental links within links    $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);    $ret = trim($ret);    return $ret;}

创建数据URL

function data_uri($file, $mime) {

$contents=file_get_contents($file);

$base64=base64_encode($contents);

echo "data:$mime;base64,$base64";

}

从服务器上下载&保存一个远程图片

$image = file_get_contents('http://www.url.com/image.jpg');file_put_contents('/images/image.jpg', $image);//Where to save the image

移除Remove Microsoft Word HTML Tag

function cleanHTML($html) {/// <summary>/// Removes all FONT and SPAN tags, and all Class and Style attributes./// Designed to get rid of non-standard Microsoft Word HTML tags./// </summary>// start by completely removing all unwanted tags $html = ereg_replace("<(/)?(font|span|del|ins)[^>]*>","",$html); // then run another pass over the html (twice), removing unwanted attributes $html = ereg_replace("<([^>]*)(class|lang|style|size|face)=("[^"]*"|'[^']*'|[^>]+)([^>]*)>","<\1>",$html);$html = ereg_replace("<([^>]*)(class|lang|style|size|face)=("[^"]*"|'[^']*'|[^>]+)([^>]*)>","<\1>",$html); return $html}

检测浏览器语言

function get_client_language($availableLanguages, $default='en'){    if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {        $langs=explode(',',$_SERVER['HTTP_ACCEPT_LANGUAGE']);         foreach ($langs as $value){            $choice=substr($value,0,2);            if(in_array($choice, $availableLanguages)){                return $choice;            }        }    }    return $default;}

显示Facebook 粉丝数量

<?php    $page_id = "YOUR PAGE-ID";    $xml = @simplexml_load_file("http://api.facebook.com/restserver.php?method=facebook.fql.query&query=SELECT%20fan_count%20FROM%20page%20WHERE%20page_id=".$page_id."") or die ("a lot");    $fans = $xml->page->fan_count;    echo $fans;?>