java批量下载百度贴吧楼主上传的图片

来源:互联网 发布:网络与新媒体 就业 编辑:程序博客网 时间:2024/05/01 08:17

还不够完美,只能下载楼主上传的图片,楼主如果是盗链的则无法识别,还有的流的开闭太频繁了.

import java.io.BufferedReader;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.net.URL;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * 批量下载百度贴吧楼主上传的图片 * 如果图片是楼主使用链接地址则无法下载 * 这时可以重写正则匹配原则 * @author GeenkEmp01 *  */public class GetBaiDuTieBaPicture {private int index = 0;/** * @param args */public static void main(String[] args) {GetBaiDuTieBaPicture getP = new GetBaiDuTieBaPicture();String tieziUrl = "http://tieba.baidu.com/p/1212071711";String imageDirectory = "E:/baiduimage/";getP.getImageUrl(tieziUrl,imageDirectory);}/** *  * @param tieziUrl *            帖子的网址 * @param imageDirectory *            存储下载图片的目录 */public void getImageUrl(String tieziUrl, String imageDirectory) {int pn = getTotalPageNum(tieziUrl);URL url;Pattern p = Pattern.compile("http://imgsrc.baidu.com/forum/pic/item/[\\w,\\d]{40}.jpg");InputStream is = null;BufferedReader br = null;for (int i = 1; i <= pn; i++) {try {url = new URL(tieziUrl+"?pn=" + i);is = (InputStream) url.getContent();br = new BufferedReader(new InputStreamReader(is));String str = null;while ((str = br.readLine()) != null) {Matcher m = p.matcher(str);while (m.find()) {index++;String imageUrl = m.group();System.out.println(imageUrl);System.out.println("正在下载第" + index + "张图片...");downloadImage(imageUrl, imageDirectory + index + ".jpg");}}} catch (Exception e) {e.printStackTrace();} finally {try {if (br != null)br.close();if (is != null)is.close();} catch (IOException e) {e.printStackTrace();}}}System.out.println("共下载了" + index + "张图片");}/** * 获取帖子总共页数 * @param tieziUrl * @return */public int getTotalPageNum(String tieziUrl) {int pageNum = 1;URL url;Pattern p = Pattern.compile("pn=\\d*\">尾页<");InputStream is = null;BufferedReader br = null;try {url = new URL(tieziUrl);is = (InputStream) url.getContent();br = new BufferedReader(new InputStreamReader(is));String str = null;while ((str = br.readLine()) != null) {Matcher m = p.matcher(str);while (m.find()) {String s = m.group();pageNum = Integer.parseInt(s.substring(3, s.length()-5));}}} catch (Exception e) {e.printStackTrace();} finally {try {if (br != null)br.close();if (is != null)is.close();} catch (IOException e) {e.printStackTrace();}}System.out.println("帖子共有"+pageNum+"页");return pageNum;}/** * 下载一张图片 * @param imageUrl * @param saveFile */public void downloadImage(String imageUrl, String saveFile) {URL url = null;OutputStream os = null;InputStream is = null;try {url = new URL(imageUrl);is = url.openStream();os = new FileOutputStream(saveFile);byte[] buff = new byte[1024];int readed;while ((readed = is.read(buff)) != -1) {os.write(buff, 0, readed);}} catch (Exception e) {e.getStackTrace();} finally {try {if (is != null)is.close();if (os != null)os.close();} catch (IOException e) {e.printStackTrace();}}}}


 

原创粉丝点击