文章标题

来源:互联网 发布:淘宝上的按摩器有用吗 编辑:程序博客网 时间:2024/05/18 02:41

要用java爬网页,首先需要下载相应的包,我这里下载的是HttpClient4.2
不多说,上代码

public class SpiderHttpClient {    public static void main(String[] args) throws Exception {        // TODO Auto-generated method stub        String url_str = "http://www.baidu.com";        String charset = "utf-8";        String filepath = "f:/baidu.html";        HttpClient hc = new DefaultHttpClient();//        HttpGet hg = new HttpGet(url_str);//        HttpResponse response = hc.execute(hg);//        HttpEntity entity = response.getEntity();        HttpEntity entity = new DefaultHttpClient().execute(new HttpGet(url_str)).getEntity();        InputStream htm_in = null;        if(entity != null){            htm_in = entity.getContent();            String htm_str = InputStream2String(htm_in,charset);            saveHtml(filepath,htm_str);        }    }     public static void saveHtml(String filepath, String str){            try {                OutputStreamWriter outs = new OutputStreamWriter(new FileOutputStream(filepath, true), "utf-8");                outs.write(str);                outs.close();            } catch (IOException e) {                e.printStackTrace();            }        }      public static String InputStream2String(InputStream in_st,String charset) throws IOException{            BufferedReader buff = new BufferedReader(new InputStreamReader(in_st, charset));            StringBuffer res = new StringBuffer();            String line = "";            while((line = buff.readLine()) != null){                res.append(line+"\r\n");            }            return res.toString();        }}
0 0