JAVA过滤html标签的方法

来源:互联网 发布:安卓手机windows模拟器 编辑:程序博客网 时间:2024/05/16 08:59

  例子

  正则

  re="<(\\s)*script[^>]*>([\\s\\S](?!";

  复制代码

  代码

  public class FilterHTMLTags {

  public static String HtmlText(String inputString) {

  String htmlStr = inputString; //含html标签的字符串

  String textStr ="";

  java.util.regex.Pattern p_script;

  java.util.regex.Matcher m_script;

  java.util.regex.Pattern p_style;

  java.util.regex.Matcher m_style;

  java.util.regex.Pattern p_html;

  java.util.regex.Matcher m_html;

  try {

  String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; //定义script的正则表达式{或]*?>[\\s\\S]*?<\\/script> }

  String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; //定义style的正则表达式{或]*?>[\\s\\S]*?<\\/style> }

  String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式

  p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);

  m_script = p_script.matcher(htmlStr);

  htmlStr = m_script.replaceAll(""); //过滤script标签

  p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);

  m_style = p_style.matcher(htmlStr);

  htmlStr = m_style.replaceAll(""); //过滤style标签

  p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);

  m_html = p_html.matcher(htmlStr);

  htmlStr = m_html.replaceAll(""); //过滤html标签

  /* 空格 —— */

  // p_html = Pattern.compile("\\ ", Pattern.CASE_INSENSITIVE);

  m_html = p_html.matcher(htmlStr);

  htmlStr = htmlStr.replaceAll(" "," ");

  textStr = htmlStr;

  }catch(Exception e) {

  }

  return textStr;

  }

  }

  复制代码

  过滤URL网址,邮箱地址,html标签,JS代码,各种转义字符

  public static final String Upset = " ";

  public static String killTags(String news) {

  String s = news.replaceAll("amp;", "").replaceAll("<","<").replaceAll(">", ">");

  Pattern pattern = Pattern.compile("<(span)?\\sstyle.*?style>|(span)?\\sstyle=.*?>", Pattern.DOTALL);

  Matcher matcher = pattern.matcher(s);

  String str = matcher.replaceAll("");

  Pattern pattern2 = Pattern.compile("(<[^>]+>)",Pattern.DOTALL);

  Matcher matcher2 = pattern2.matcher(str);

  String strhttp = matcher2.replaceAll(" ");

  String regEx = "(((http|https|ftp)(\\s)*((\\:)|:))(\\s)*(//|//)(\\s)*)?"

  + "([\\sa-zA-Z0-9(\\.|.)(\\s)*\\-]+((\\:)|(:)[\\sa-zA-Z0-9(\\.|.)&%\\$\\-]+)*@(\\s)*)?"

  + "("

  + "(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])"

  + "(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"

  + "(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"

  + "(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])"

  + "|([\\sa-zA-Z0-9\\-]+(\\.|.)(\\s)*)*[\\sa-zA-Z0-9\\-]+(\\.|.)(\\s)*[\\sa-zA-Z]*"

  + ")"

  + "((\\s)*(\\:)|(:)(\\s)*[0-9]+)?"

  + "(/(\\s)*[^/][\\sa-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$\\=~_\\-@]*)*";

  Pattern p1 = Pattern.compile(regEx,Pattern.DOTALL);

  Matcher matchhttp = p1.matcher(strhttp);

  String strnew = matchhttp.replaceAll("").replaceAll("(if[\\s]*\\(|else|elseif[\\s]*\\().*?;", " ");

  Pattern patterncomma = Pattern.compile("(&[^;]+;)",Pattern.DOTALL);

  Matcher matchercomma = patterncomma.matcher(strnew);

  String strout = matchercomma.replaceAll(" ");

  String answer = strout.replaceAll("[\\pP‘’“”]", " ")

  .replaceAll("\r", " ").replaceAll("\n", " ")

  .replaceAll("\\s", " ").replaceAll(Upset, "");

  return answer;

  }

  复制代码

0 0
原创粉丝点击