java正则获取html的图片连接

来源:互联网 发布:数据分析就业 编辑:程序博客网 时间:2024/06/08 01:17
public class TestReplaceAll {public static void main(String[] args) {String str = "<img alt=\"\" src=\"http://img.pppcar.com/image/getImage/57440fef4d5d3925cf12f228/820/0/80\"/>";Set<String> pics = getImgStr(str);System.out.println(pics);}public static Set<String> getImgStr(String htmlStr) {        Set<String> pics = new HashSet<>();        String img = "";        Pattern p_image;        Matcher m_image;        //     String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址        String regEx_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";        p_image = Pattern.compile                (regEx_img, Pattern.CASE_INSENSITIVE);        m_image = p_image.matcher(htmlStr);        while (m_image.find()) {            // 得到<img />数据            img = m_image.group();            // 匹配<img>中的src数据            Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);            while (m.find()) {                pics.add(m.group(1));            }        }        return pics;    }}