记录: Jsoup小应用

来源:互联网 发布:cda数据分析师招聘 编辑:程序博客网 时间:2024/04/29 19:37

以 eoemarket  为html解析基础


关键代码:

 Document doc = Jsoup.parse(data);Elements divs = doc.select("div.appcell");if (divs != null) {for (Element div : divs) {Element titleA = div.select("img").first();if (null != titleA) {String title = titleA.attr("title");String img = titleA.attr("src");System.out.println("The title link is : "  + title);   System.out.println("The img link is : "  + img); }     Element linkA = div.select("a").last();         if (null != linkA) {                String link = linkA.attr("href");                System.out.println("The Page link is : "  + link);}            }}    


全程序代码:


import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class networkbrowser{private static final String IDOServer = "http://eoemarket.com/categories/2/order/down/page/";private static int PageCount = 1;public static void main(String args[ ]){String Page = IDOServer + PageCount;System.out.println("The Page is : "  + Page);String data = getResponseData(Page);if(!data.equals(""))    { Document doc = Jsoup.parse(data);Elements divs = doc.select("div.appcell");if (divs != null) {for (Element div : divs) {Element titleA = div.select("img").first();if (null != titleA) {String title = titleA.attr("title");String img = titleA.attr("src");System.out.println("The title link is : "  + title);   System.out.println("The img link is : "  + img); }     Element linkA = div.select("a").last();         if (null != linkA) {                String link = linkA.attr("href");                System.out.println("The Page link is : "  + link);}            }}    }}private static String getResponseData(String Page) {URL url = null;StringBuffer sb = new StringBuffer();String line = null;BufferedReader buffer = null;try{url = new URL(Page);HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();urlConn.setConnectTimeout(6*1000);buffer = new BufferedReader(new InputStreamReader(urlConn.getInputStream()));while ((line = buffer.readLine()) != null){sb.append(line);}}catch (Exception e){e.printStackTrace();}finally{try{buffer.close();}catch (Exception e){e.printStackTrace();}}return sb.toString();}  }