jsoup 爬虫

来源:互联网 发布:电脑怎么制作软件 编辑:程序博客网 时间:2024/05/12 18:13
MVN: 
  <dependency>            <!-- jsoup HTML parser library @ http://jsoup.org/ -->            <groupId>org.jsoup</groupId>            <artifactId>jsoup</artifactId>            <version>1.9.2</version>        </dependency>        <dependency>            <groupId>org.json</groupId>            <artifactId>json</artifactId>            <version>20160810</version>        </dependency>


程序:

{            Map<String, String> map = new HashMap<>();            System.out.println("urlLink:" + urlLink);            Document document = Jsoup.connect(urlLink).timeout(10000).get();//            System.out.println(" 获取资源结束,开始解析。 document: "+document);            if (document != null) {                System.out.println("解析开始");                Elements elements = document.select(".ad-title");                for (Element e1 : elements) {                    String link = e1.attr("href").trim();                    links.add(link);                }                Elements es2 = document.select(".salary");                for (Element e3 : es2) {                    String sa1 = e3.text().trim();                    if (sa1.length() < 5) {                        sa1 = "3000-4000";                    }                    String sa2 = sa1.substring(0, 4);                    salarys.add(sa2);                }                Elements es3 = document.select(".table-view-cap");                for (Element e4 : es3) {                    String a1 = e4.text().trim();                    String[] a2 = a1.split("/");                    String c1 = a2[0].trim();                    String a3 = a2[1].trim();                    areaName.add(a3);                    contacts.add(c1);                }                for (int i = 0; i < links.size(); i++) {                    getFullDetails(links.get(i), salarys.get(i), areaName.get(i), contacts.get(i));                }            } else {                System.out.println("document is null");            }        }


0 0
原创粉丝点击