文章标题

来源:互联网 发布:淘宝免费转微信链接 编辑:程序博客网 时间:2024/06/03 22:40

java 正则实现

  1. 去除内容中冗余
  2. 获取尖括号内的内容
import java.util.regex.Matcher;import java.util.regex.Pattern;public class HTMLSpirit {    //保留<body></body>间的内容    public static String delHTMLTag(String htmlStr) {        String regEx_be4body = "<html[^>]+>[\\s\\S]*?<body[^>]*?>";        String regEx_aftbody = "<\\/body>[\\s\\S]*?<\\/html>";        //Pattern.CASE_INSENSITIVE 不区分大小写的UNICODE_CASE样式        Pattern p_be4body = Pattern.compile(regEx_be4body,Pattern.CASE_INSENSITIVE);        Matcher m_be4body = p_be4body.matcher(htmlStr);        htmlStr = m_be4body.replaceAll("");        Pattern p_aftbody = Pattern.compile(regEx_aftbody,Pattern.CASE_INSENSITIVE);        Matcher m_aftbody = p_aftbody.matcher(htmlStr);        htmlStr = m_aftbody.replaceAll("");        return htmlStr.trim();    }    //...<..要取的内容..>...    public static String getCleanAddress(String mailAddrs){        StringBuffer addr=new StringBuffer();        Pattern pattern = Pattern.compile("<(.*?)>");        Matcher matcher = pattern.matcher(mailAddrs);        if(matcher.find()){            addr.append(matcher.group(1)+";");            return addr.toString();        }else {            return mailAddrs;        }    }}
0 0
原创粉丝点击