JAVA过滤html标签的方法

来源：互联网发布：安卓手机windows模拟器编辑：程序博客网时间：2024/05/16 08:59

　　例子

　　正则

　　re="<(\\s)*script[^>]*>([\\s\\S](?!";

　　复制代码

　　代码

　　public class FilterHTMLTags {

　　public static String HtmlText(String inputString) {

　　String htmlStr = inputString; //含html标签的字符串

　　String textStr ="";

　　java.util.regex.Pattern p_script;

　　java.util.regex.Matcher m_script;

　　java.util.regex.Pattern p_style;

　　java.util.regex.Matcher m_style;

　　java.util.regex.Pattern p_html;

　　java.util.regex.Matcher m_html;

　　try {

　　String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; //定义script的正则表达式{或]*?>[\\s\\S]*?<\\/script> }

　　String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; //定义style的正则表达式{或]*?>[\\s\\S]*?<\\/style> }

　　String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式

　　p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);

　　m_script = p_script.matcher(htmlStr);

　　htmlStr = m_script.replaceAll(""); //过滤script标签

　　p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);

　　m_style = p_style.matcher(htmlStr);

　　htmlStr = m_style.replaceAll(""); //过滤style标签

　　p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);

　　m_html = p_html.matcher(htmlStr);

　　htmlStr = m_html.replaceAll(""); //过滤html标签

　　/* 空格 —— */

　　// p_html = Pattern.compile("\\ ", Pattern.CASE_INSENSITIVE);

　　m_html = p_html.matcher(htmlStr);

　　htmlStr = htmlStr.replaceAll(" "," ");

　　textStr = htmlStr;

　　}catch(Exception e) {

　　}

　　return textStr;

　　}

　　复制代码

　　过滤URL网址，邮箱地址，html标签，JS代码，各种转义字符

　　public static final String Upset = "　";

　　public static String killTags(String news) {

　　String s = news.replaceAll("amp;", "").replaceAll("<","<").replaceAll(">", ">");

　　Pattern pattern = Pattern.compile("<(span)?\\sstyle.*?style>|(span)?\\sstyle=.*?>", Pattern.DOTALL);

　　Matcher matcher = pattern.matcher(s);

　　String str = matcher.replaceAll("");

　　Pattern pattern2 = Pattern.compile("(<[^>]+>)",Pattern.DOTALL);

　　Matcher matcher2 = pattern2.matcher(str);

　　String strhttp = matcher2.replaceAll(" ");

　　String regEx = "(((http|https|ftp)(\\s)*((\\:)|：))(\\s)*(//|//)(\\s)*)?"

　　+ "([\\sa-zA-Z0-9(\\.|.)(\\s)*\\-]+((\\:)|(:)[\\sa-zA-Z0-9(\\.|.)&%\\$\\-]+)*@(\\s)*)?"

　　+ "("

　　+ "(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])"

　　+ "(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"

　　+ "(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])"

　　+ "|([\\sa-zA-Z0-9\\-]+(\\.|.)(\\s)*)*[\\sa-zA-Z0-9\\-]+(\\.|.)(\\s)*[\\sa-zA-Z]*"

　　+ ")"

　　+ "((\\s)*(\\:)|(：)(\\s)*[0-9]+)?"

　　+ "(/(\\s)*[^/][\\sa-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$\\=~_\\-@]*)*";

　　Pattern p1 = Pattern.compile(regEx,Pattern.DOTALL);

　　Matcher matchhttp = p1.matcher(strhttp);

　　String strnew = matchhttp.replaceAll("").replaceAll("(if[\\s]*\\(|else|elseif[\\s]*\\().*?;", " ");

　　Pattern patterncomma = Pattern.compile("(&[^;]+;)",Pattern.DOTALL);

　　Matcher matchercomma = patterncomma.matcher(strnew);

　　String strout = matchercomma.replaceAll(" ");

　　String answer = strout.replaceAll("[\\pP‘’“”]", " ")

　　.replaceAll("\r", " ").replaceAll("\n", " ")

　　.replaceAll("\\s", " ").replaceAll(Upset, "");

　　return answer;

　　}

　　复制代码

0 0