Java爬取有道图片

来源:互联网 发布:黑帮之地mac 编辑:程序博客网 时间:2024/05/16 19:27

这里利用了Jsoup。下载地址如下:
jsoup:下载地址
导入jar包的步骤我就不多说了。

废话不多说,直接上代码。

/** * Created by Aiden on 2015/12/9. */import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.File;import java.io.FileWriter;import java.io.IOException;// 爬取有道图片public class doYoudao {    public synchronized static void Geturl(String key, int max) { // key为关键词        FileWriter fileWriter = null;        try {            fileWriter = new FileWriter(new File("img.txt")); // 写入文件        } catch (IOException e) {            e.printStackTrace();        }        String keyfrom = "&keyfrom=image.page";        String start = "&start=";        int page = 1;        while (page < max + 1) { // 爬取            String keyword = Urlbase.youdao + key + keyfrom + page + start + (page - 1) * 24; // 链接            page++;            try {                Document doc = Jsoup.connect(keyword).get(); // 得到源码                Elements divs = doc.getElementsByTag("div"); // 得到div                Elements tables = null;                for (Element ele : divs) {                    try {                        tables = ele.select("table"); // 得到table标签                        for (Element table : tables) {                            try {                                Element tbody = table.select("tbody").first();                                Element tr = tbody.select("tr").first();                                Element td = tr.select("td").first();                                Element ul = td.select("ul").first();                                Element li = ul.select("li").first();                                Element span = li.select("span").first();                                Element center = span.select("center").first();                                Element img = center.select("img").first();                                String src = img.attr("src").toString(); // 得到图片路径                                if (ImageUrl.img.contains(src) == false) {                                    fileWriter.write(src + "\n");                                }                            } catch (Exception e) {                                continue;                            }                        }                    } catch (Exception e) {                        continue;                    }                }            } catch (IOException e) {                e.printStackTrace();                continue;            }        }        System.out.println("有道爬取完毕");        try {            fileWriter.close();        } catch (IOException e) {            e.printStackTrace();        }    }}
1 1