一个简单的图片抓取器

来源：互联网发布：abb机器人编程手册编辑：程序博客网时间：2024/05/18 22:12

今天练习写了一个简单的网络图片抓取器。麻麻再也不用担心我一张一张右键了。

需要导入jsoup包和io包。

下面来看一下具体的代码实现吧。

Util.java

package com.csu.test;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.URL;public class Util {/** * @author CSU-Max * @param strUrl图片的路径 * @param path保存的路径 * @throws IOException */public void download(String strUrl, String path) throws IOException {URL url = null;InputStream is = null;OutputStream os = null;byte[] buffer = new byte[8192];int bytesRead = 0;try {url = new URL(strUrl);is = url.openStream();os = new FileOutputStream(path);while ((bytesRead = is.read(buffer, 0, 8192)) != -1) {os.write(buffer, 0, bytesRead);}} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{is.close();os.close();}}}

Main.java

package com.csu.test;import java.io.File;import java.io.IOException;import java.util.HashSet;import java.util.Iterator;import java.util.Set;import org.apache.commons.io.FilenameUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;/** *  * @author CSU-Max * 抓取指定网页的图片保存在指定文件夹 * */public class Main {public static void main(String[] args) {//抓取的图片存放的文件夹String filePath = "D:/CSU-Max-temp";//要抓取图片的网页地址String webUrl = "http://xiangce.baidu.com/picture/album/list/85062425ab359f0760819f33a14ec718884ce667";File file = new File(filePath);if(file.exists()){System.out.println("文件夹已经存在");}else{file.mkdir();System.out.println("自动创建文件夹");}try {Document doc = Jsoup.connect(webUrl).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();Elements imgElements = doc.getElementsByTag("img");Set<String> imgSrcSet = new HashSet<String>();for (Element img : imgElements) {String imgSrc = img.attr("abs:src");imgSrcSet.add(imgSrc);}System.out.println("图片总数："+imgSrcSet.size());Iterator<String> i = imgSrcSet.iterator();while(i.hasNext()){String imgSrc = (String)i.next();String imgName = FilenameUtils.getName(imgSrc);if (imgName.indexOf(".") != -1) {if (imgName.indexOf("?") > -1) {imgName = imgName.substring(0, imgName.indexOf("?"));}Util util = new Util();String saveImagePath = filePath+"/"+imgName;System.out.println("图片抓取开始：");util.download(imgSrc,saveImagePath);System.out.println("图片抓取结束："+imgSrc+" 保存路径："+saveImagePath);}}  } catch (IOException e) {e.printStackTrace();} }}

哈哈，我下载一下自己百度相册的图片，测试了一下，很好用。

$下载地址：简单的网络图片抓取器 #CSDN@CSU-Max