如何用java实现抓取网页?

来源:互联网 发布:centos安装不上samba 编辑:程序博客网 时间:2024/05/14 11:33
import java.net.*;import java.io.*;public class Catch1 {     public void test(){    StringBuffer document= new StringBuffer();    try {      URL url = new URL(http://www.sohu.com);      URLConnection conn = url.openConnection();      BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));      String line = null;      while ((line = reader.readLine()) != null)        document.append(line + "\n");      reader.close();    } catch (MalformedURLException e) {      e.printStackTrace();    } catch (IOException e) {      e.printStackTrace();    }    //pzy add    String str = document.toString();    String strDir = "E:\\text";    String strFile = "test.html";    File myFile = new File(strDir, strFile);    try {      myFile.createNewFile();      BufferedWriter bw = new BufferedWriter(                            new FileWriter(myFile.getPath(), true));      bw.write(str);      bw.flush();      bw.close();    } catch (Exception ex) {      ex.printStackTrace();    }  }     public static void main(String[] args){      Catch1 catch2=new Catch1();         catch2.test();           }}


原创粉丝点击