爬虫学习之Java(一)

来源:互联网 发布:win8网络点不了 编辑:程序博客网 时间:2024/05/06 00:07

爬虫学习(一)


抓取网页源码

代码块:

package com.spider;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;public class BaiduSourceCodeTest {    public static void main(String[] args) {        String url = "http://www.baidu.com";        String result = "";        BufferedReader in = null;        try {            URL realUrl = new URL(url);            URLConnection con = realUrl.openConnection();            con.connect();            in = new BufferedReader(new InputStreamReader(con.getInputStream()));            String line;            while((line=in.readLine()) != null) {                result = result + line;            }        } catch (MalformedURLException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } finally {            if(in != null) {                try {                    in.close();                } catch (IOException e) {                    // TODO Auto-generated catch block                    e.printStackTrace();                }            }        }        System.out.println(result);    }}