黑马程序员——网页爬虫(网页蜘蛛)

来源:互联网 发布:滴滴打车软件介绍 编辑:程序博客网 时间:2024/05/29 12:46

------- <a href="http://www.itheima.com" target="blank">android培训</a>、<a href="http://www.itheima.com" target="blank">java培训</a>、期待与您交流! ----------


-通过学习IO流,网络编程,正则表达式等知识编写一个用于获取电子邮箱的程序——网页爬虫

package day25;import java.io.*;import java.net.*;import java.util.regex.Matcher;import java.util.regex.Pattern;public class GetMailDemo {public static void main(String[] args) throws Exception {getNetMails();}//取得网络上的Emailpublic static void getNetMails() throws Exception{URL url= new URL("http://10.0.31.236:8080/testWeb/email.html");URLConnection conn= url.openConnection();BufferedReader bufIn= new BufferedReader(new InputStreamReader(conn.getInputStream()));String line= null;String mailreg= "\\w+@\\w+(\\.\\w+)+";Pattern p= Pattern.compile(mailreg);while((line=bufIn.readLine())!=null){//System.out.println(line);Matcher m= p.matcher(line);while(m.find()){System.out.println(m.group());}}}//取得本地文件中的Emailpublic static void getMails() throws Exception{@SuppressWarnings("resource")BufferedReader bufr= new BufferedReader(new FileReader("mail.txt"));String line= null;String mailreg= "\\w+@\\w+(\\.\\w+)+";Pattern p= Pattern.compile(mailreg);while((line=bufr.readLine())!=null){//System.out.println(line);Matcher m= p.matcher(line);while(m.find()){System.out.println(m.group());}}}}


0 0