java读取URL的内容

来源：互联网发布：中控iface702软件编辑：程序博客网时间：2024/06/16 22:35

给定一个链接，返回里面的html代码，
1、可以用于在项目中抓取返回的某些字段
2、类似爬虫，抓取自己想要的信息

import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;public class LoadUrl {    public static void main(String[] args) throws Exception {        //这里要注意，因为是http请求，所以要加行http://，不然会报错：        //Http协议异常：java.net.MalformedURLException: no protocol:   www.baidu.com        System.out.println(readUrl(" http://www.baidu.com"));    }    /**     * 读取url的内容     * @param url     * @return     * @throws Exception     */    public static String readUrl(String url) throws Exception{        BufferedReader br = new BufferedReader(new InputStreamReader(new URL(url).openConnection().getInputStream(),"GB2312"));        StringBuffer str = new StringBuffer();        String realLineStr = null;        while((realLineStr = br.readLine()) != null){            str.append(realLineStr).append("\r\n"); //每读取一行就换行        }        return str.toString();    }}

也可以直接使用Jsoup来读取：

 Document doc = Jsoup.connect("http://www.baidu.com").get();

阅读全文

0 0