《小程序---利用jsoup解析CSDN博客信息》

来源:互联网 发布:天津联通网络测速平台 编辑:程序博客网 时间:2024/05/02 04:20
package com.fenghuo.html;import java.io.IOException;import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class AnalyzeHtml {/** * Example program to list links from a URL. */public static void main(String[] args) throws IOException {String csdn = "http://blog.csdn.net";String blog = "http://blog.csdn.net/w695050167";String url = blog + "?viewmode=list";Connection connection = Jsoup.connect(url);connection.timeout(500);//设置连接超时时间//给服务器发消息头,告诉服务器,俺不是java程序。CSDN不允许java程序访问connection.header("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)");Document doc = connection.get();//获取返回的html的document对象//解析document对象Elements links = doc.select(".link_title");for (Element e : links) {if (e.getAllElements().size() == 2) {Element ae = e.select("a[href]").first();String href = ae.attr("href");System.out.println(csdn + href);String text = e.text();System.out.println(text);}}}}

原创粉丝点击