实现Java读取网页内容并下载网页中出现的图片
来源:互联网 发布:java方法引用 编辑:程序博客网 时间:2024/06/01 01:33
import java.io.BufferedInputStream;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class GetContentPicture {public void getHtmlPicture(String httpUrl) {URL url;BufferedInputStream in;FileOutputStream file;try { System.out.println("取网络图片"); String fileName = httpUrl.substring(httpUrl.lastIndexOf("/")); String filePath = "./pic/"; url = new URL(httpUrl); in = new BufferedInputStream(url.openStream()); file = new FileOutputStream(new File(filePath+fileName)); int t; while ((t = in.read()) != -1) { file.write(t); } file.close(); in.close(); System.out.println("图片获取成功");} catch (MalformedURLException e) { e.printStackTrace();} catch (FileNotFoundException e) { e.printStackTrace();} catch (IOException e) { e.printStackTrace();}}public String getHtmlCode(String httpUrl) throws IOException {String content ="";URL uu = new URL(httpUrl); // 创建URL类对象BufferedReader ii = new BufferedReader(new InputStreamReader(uu .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象String input;while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值 content += input;}ii.close();return content;}public void get(String url) throws IOException {String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";String content = this.getHtmlCode(url);System.out.println(content);Pattern pattern = Pattern.compile(searchImgReg);Matcher matcher = pattern.matcher(content);while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(url+matcher.group(3));}pattern = Pattern.compile(searchImgReg2);matcher = pattern.matcher(content);while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(matcher.group(3));}// searchImgReg =// "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";}public static void main(String[] args) throws IOException {String url = "http://www.baidu.com/";GetContentPicture gcp = new GetContentPicture();gcp.get(url);}}
0 0
- 实现Java读取网页内容并下载网页中出现的图片
- java下载网页并读取内容
- java读取网页内容,并保存
- java下载读取网页内容方式
- Java读取网页内容并生成静态页面的简单实现
- java下载网页内容和网络图片
- java下载网页内容和网络图片
- java读取网页内容
- java下载并保存网页上的图片
- java读取URL指定的网页内容
- java 下载网页,图片
- android Webview读取网页里的所有图片,并实现图片集观看,图片缩放
- 网页中链接中图片的下载
- 【Java】读取网页中的内容
- java 读取网页内容代码
- php读取word\pdf等文档的内容,并将其保存到网页中
- JAVA读取文件夹中CSV的URL并下载图片
- java读取本地图片并在网页显示
- Git中经常使用的命令小结
- Gym 100796C Minimax Tree
- 413 Request Entity Too Large 的解决方法
- ubuntu 12.04安装telnet和ssh服务
- 51nod 1109 01组成的N的倍数 【dfs+剪枝+vector】
- 实现Java读取网页内容并下载网页中出现的图片
- IDEA 安装过程插件选择
- 关于装msdn网站纯净版win7正版授权问题(已解决)
- ACM竞赛需要注意的一些编程习惯
- 2017.02.18 这一周的面试
- centos7 用root用户重新设置普通用户密码
- FindBugs规则整理
- POJ 3427 Ecology tax G++
- BusyBox制作的文件系统-telnet服务器的配置和开启