JAVA在网页上抓取邮件地址
来源:互联网 发布:阿里云栖大会 ppt 编辑:程序博客网 时间:2024/05/16 10:34
import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class h1{ public static String getWebCon(String domain) { System.out.println("开始抓取邮件地址..("+domain+")"); StringBuffer sb=new StringBuffer(); try { java.net.URL url=new java.net.URL(domain); BufferedReader in=new BufferedReader(new InputStreamReader(url.openStream())); String line; while((line=in.readLine())!=null) { parse(line); } in.close(); } catch(Exception e) { sb.append(e.toString()); System.err.println(e); } return sb.toString(); } public static void main(String[] args) { String s; s=h1.getWebCon("http://post.baidu.com/f?kz=341341344"); //这是要抓取的网页,自己可以试下. //System.out.println(s); } private static void parse(String line) { Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");//邮箱的正则表达式 Matcher m=p.matcher(line); while(m.find()) { System.out.println(m.group()); } }}