java 后台去除HTML标签
来源:互联网 发布:如何利用微信推广淘宝 编辑:程序博客网 时间:2024/05/22 00:06
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtil {
private final static String REGXP_FOR_HTML= "</?[^>]+>"; // 去除P标签
private final static String REGXP_FOR_IMG_TAG="<img.*src\\s*=\\s*(.*?)[^>]*?>"; // 找出IMG标签
private final static String REGXP_FOR_IMG_TAG_SRC_ATTRIB="src\\s*=\\s*\"?(.*?)(\"|>|\\s+)"; // 找出IMG标签的SRC属性
private final static String REGXP_FOR_ldquo="\\&[a-zA-Z]{1,10};"; //去除“标签
//---------------------------------------------------------------------
public static List<String> getImgStr(String htmlStr){
String img="";
Pattern p_image;
Matcher m_image;
List<String> pics = new ArrayList<String>();
p_image = Pattern.compile
(REGXP_FOR_IMG_TAG,Pattern.CASE_INSENSITIVE);
m_image = p_image.matcher(htmlStr);
while(m_image.find()){
img = img + "," + m_image.group();
Matcher m = Pattern.compile(REGXP_FOR_IMG_TAG_SRC_ATTRIB).matcher(img);
while(m.find()){
String a=m.group(1);
if(a.indexOf("/")!=-1){
a=a.substring(a.lastIndexOf("/")+1,a.length());
}
pics.add(a);
}
}
return pics;
}
/**
* 过滤<img>标签
* @param str
* @return
*/
public static String fiterImgTag(String str) {
Pattern pattern = Pattern.compile(REGXP_FOR_IMG_TAG);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 基本功能:过滤指定标签
*/
public static String fiterHtmlTag(String str, String tag) {
String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";
Pattern pattern = Pattern.compile(regxp);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 过滤所以html标签
* @param str
* @return
* @return
*/
public static String filterHtml(String str) {
Pattern pattern = Pattern.compile(REGXP_FOR_HTML);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return ldquo(sb.toString());
}
/**
* 去除“标签
* @param str
* @return
*/
private static String ldquo(String str){
Pattern pattern = Pattern.compile(REGXP_FOR_ldquo);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
*
* 基本功能:替换指定的标签
* <p>
*
* @param str
* @param beforeTag
* 要替换的标签
* @param tagAttrib
* 要替换的标签属性值
* @param startTag
* 新标签开始标记
* @param endTag
* 新标签结束标记
* @return String
* @如:替换img标签的src属性值为[img]属性值[/img]
*/
public static String replaceHtmlTag(String str, String beforeTag,
String tagAttrib, String startTag, String endTag) {
String regxpForTag = "<\\s*" + beforeTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = tagAttrib + "=\"([^\"]+)\"";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer();
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
matcherForAttrib.appendReplacement(sbreplace, startTag
+ matcherForAttrib.group(1) + endTag);
}
matcherForTag.appendReplacement(sb, sbreplace.toString());
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 切割Sting
* @param str 原字符串
* @param star 开始位置
* @param length 需要切割的长度
* @return
*/
public static String substring(String str,int star,int length){
if((star+length)>str.length()){
str=str.substring(star,str.length());
}else{
str=str.substring(star, star+length);
}
return str;
}
}
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtil {
private final static String REGXP_FOR_HTML= "</?[^>]+>"; // 去除P标签
private final static String REGXP_FOR_IMG_TAG="<img.*src\\s*=\\s*(.*?)[^>]*?>"; // 找出IMG标签
private final static String REGXP_FOR_IMG_TAG_SRC_ATTRIB="src\\s*=\\s*\"?(.*?)(\"|>|\\s+)"; // 找出IMG标签的SRC属性
private final static String REGXP_FOR_ldquo="\\&[a-zA-Z]{1,10};"; //去除“标签
//---------------------------------------------------------------------
public static List<String> getImgStr(String htmlStr){
String img="";
Pattern p_image;
Matcher m_image;
List<String> pics = new ArrayList<String>();
p_image = Pattern.compile
(REGXP_FOR_IMG_TAG,Pattern.CASE_INSENSITIVE);
m_image = p_image.matcher(htmlStr);
while(m_image.find()){
img = img + "," + m_image.group();
Matcher m = Pattern.compile(REGXP_FOR_IMG_TAG_SRC_ATTRIB).matcher(img);
while(m.find()){
String a=m.group(1);
if(a.indexOf("/")!=-1){
a=a.substring(a.lastIndexOf("/")+1,a.length());
}
pics.add(a);
}
}
return pics;
}
/**
* 过滤<img>标签
* @param str
* @return
*/
public static String fiterImgTag(String str) {
Pattern pattern = Pattern.compile(REGXP_FOR_IMG_TAG);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 基本功能:过滤指定标签
*/
public static String fiterHtmlTag(String str, String tag) {
String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";
Pattern pattern = Pattern.compile(regxp);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* 过滤所以html标签
* @param str
* @return
* @return
*/
public static String filterHtml(String str) {
Pattern pattern = Pattern.compile(REGXP_FOR_HTML);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return ldquo(sb.toString());
}
/**
* 去除“标签
* @param str
* @return
*/
private static String ldquo(String str){
Pattern pattern = Pattern.compile(REGXP_FOR_ldquo);
Matcher matcher = pattern.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result1 = matcher.find();
while (result1) {
matcher.appendReplacement(sb, "");
result1 = matcher.find();
}
matcher.appendTail(sb);
return sb.toString();
}
/**
*
* 基本功能:替换指定的标签
* <p>
*
* @param str
* @param beforeTag
* 要替换的标签
* @param tagAttrib
* 要替换的标签属性值
* @param startTag
* 新标签开始标记
* @param endTag
* 新标签结束标记
* @return String
* @如:替换img标签的src属性值为[img]属性值[/img]
*/
public static String replaceHtmlTag(String str, String beforeTag,
String tagAttrib, String startTag, String endTag) {
String regxpForTag = "<\\s*" + beforeTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = tagAttrib + "=\"([^\"]+)\"";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(str);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer();
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
matcherForAttrib.appendReplacement(sbreplace, startTag
+ matcherForAttrib.group(1) + endTag);
}
matcherForTag.appendReplacement(sb, sbreplace.toString());
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 切割Sting
* @param str 原字符串
* @param star 开始位置
* @param length 需要切割的长度
* @return
*/
public static String substring(String str,int star,int length){
if((star+length)>str.length()){
str=str.substring(star,str.length());
}else{
str=str.substring(star, star+length);
}
return str;
}
}
0 0
- java 后台去除HTML标签
- java去除html标签
- java去除HTML标签
- Java String去除HTML标签
- java字符串去除html标签
- Java后台去除前台传递数据中的页面标签(HTML,Javascript,Style),获取文本内容!
- java去除html标签的正则表达
- java正则表达式去除html标签
- java用正则去除html标签
- java用正则去除html标签
- java用正则去除html标签
- java中利用正则去除html标签
- Java中正则表达式去除html标签
- 【Java】中正则表达式去除html标签
- java使用正则去除html标签
- js 去除 HTML 标签
- 去除html标签
- 去除html标签
- 关于问题ld:library not found for -lXXX 和ld:warning:directory not found option 以及duplicate symbol的解决办法
- swift学习
- 关于HashCode
- 什么是面向对象思维方法
- zookeeper
- java 后台去除HTML标签
- map与set容器的使用
- jstl fn标签
- 直接用URLWithString来拼接NSURL有时候得到的NSURL是为nil的
- Java类加载简介
- JavaScript:事件绑定与监听
- zip创建读取压缩文件
- 二步解决win7因没有权限无法删除文件夹问题
- 通过SoapClient调用天气预报