JAVA在网页上抓取邮件地址

来源:互联网 发布:阿里云栖大会 ppt 编辑:程序博客网 时间:2024/05/16 10:34
import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class h1{   public static String   getWebCon(String domain)   {    System.out.println("开始抓取邮件地址..("+domain+")");    StringBuffer sb=new StringBuffer();    try    {     java.net.URL url=new java.net.URL(domain);     BufferedReader in=new BufferedReader(new InputStreamReader(url.openStream()));     String line;     while((line=in.readLine())!=null)     {      parse(line);     }     in.close();    }    catch(Exception e)    {     sb.append(e.toString());     System.err.println(e);        }    return sb.toString();   }   public static void main(String[] args)   {    String s;    s=h1.getWebCon("http://post.baidu.com/f?kz=341341344"); //这是要抓取的网页,自己可以试下.    //System.out.println(s);   }   private static void parse(String line)   {    Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");//邮箱的正则表达式    Matcher m=p.matcher(line);    while(m.find())    {     System.out.println(m.group());    }   }}