java 去html 的工具类
来源:互联网 发布:激光内雕用什么软件 编辑:程序博客网 时间:2024/05/16 07:01
/**
* @author <a href="mailto:rory.cn@gmail.com">somebody</a>
* @since Jan 23, 2007 1:44:31 PM
* @version $Id MyblogUtil.java$
*/
public class HtmlUtil {
/**
* This method based on code from the String taglib at Apache Jakarta:
* http:/
* /cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs
* /string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
* Copyright (c) 1999 The Apache Software Foundation. Author:
* timster@mac.com
*
* @param str
* @param lower
* @param upper
* @param appendToEnd
* @return
*/
public static String truncateNicely(String str, int lower, int upper,
String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
boolean diff = (str2.length() < str.length());
// quickly adjust the upper if it is set lower than 'lower'
if (upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if (str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if (loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// HTML was removed from original str
if (diff) {
// location of last space in truncated string
loc = str2.lastIndexOf(' ', loc);
// get last "word" in truncated string (add 1 to loc to
// eliminate space
String str3 = str2.substring(loc + 1);
// find this fragment in original str, from 'loc' position
loc = str.indexOf(str3, loc) + str3.length();
// get truncated string from original str, given new 'loc'
str2 = str.substring(0, loc);
// get all the HTML from original str after loc
str3 = extractHTML(str.substring(loc));
// remove any tags which generate visible HTML
// This call is unecessary, all HTML has already been stripped
// str3 = removeVisibleHTMLTags(str3);
// append the appendToEnd String and
// add extracted HTML back onto truncated string
str = str2 + appendToEnd + str3;
} else {
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
}
return str;
}
public static String truncateText(String str, int lower, int upper,
String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
// quickly adjust the upper if it is set lower than 'lower'
if (upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if (str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if (loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
return str;
}
/**
* Remove occurences of html, defined as any text between the characters
* "<" and ">". Replace any HTML tags with a space.
*/
public static String removeHTML(String str) {
return removeHTML(str, false);
}
/**
* Remove occurences of html, defined as any text between the characters
* "<" and ">". Optionally replace HTML tags with a space.
*
* @param str
* @param addSpace
* @return
*/
public static String removeHTML(String str, boolean addSpace) {
if (str == null)
return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
if (beginTag > 0) {
ret.append(str.substring(start, beginTag));
// replace each tag with a space (looks better)
if (addSpace)
ret.append(" ");
}
endTag = str.indexOf(">", beginTag);
// if endTag found move "cursor" forward
if (endTag > -1) {
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, get rest of str and break
else {
ret.append(str.substring(beginTag));
break;
}
}
// append everything after the last endTag
if (endTag > -1 && endTag + 1 < str.length()) {
ret.append(str.substring(endTag + 1));
}
return ret.toString().trim();
}
/**
* Extract (keep) JUST the HTML from the String.
*
* @param str
* @return
*/
public static String extractHTML(String str) {
if (str == null)
return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
endTag = str.indexOf(">", beginTag);
// if endTag found, keep tag
if (endTag > -1) {
ret.append(str.substring(beginTag, endTag + 1));
// move start forward and find another tag
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, break
else {
break;
}
}
return ret.toString();
}
public static void main(String[] arg) {
}
}
* @author <a href="mailto:rory.cn@gmail.com">somebody</a>
* @since Jan 23, 2007 1:44:31 PM
* @version $Id MyblogUtil.java$
*/
public class HtmlUtil {
/**
* This method based on code from the String taglib at Apache Jakarta:
* http:/
* /cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs
* /string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
* Copyright (c) 1999 The Apache Software Foundation. Author:
* timster@mac.com
*
* @param str
* @param lower
* @param upper
* @param appendToEnd
* @return
*/
public static String truncateNicely(String str, int lower, int upper,
String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
boolean diff = (str2.length() < str.length());
// quickly adjust the upper if it is set lower than 'lower'
if (upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if (str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if (loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// HTML was removed from original str
if (diff) {
// location of last space in truncated string
loc = str2.lastIndexOf(' ', loc);
// get last "word" in truncated string (add 1 to loc to
// eliminate space
String str3 = str2.substring(loc + 1);
// find this fragment in original str, from 'loc' position
loc = str.indexOf(str3, loc) + str3.length();
// get truncated string from original str, given new 'loc'
str2 = str.substring(0, loc);
// get all the HTML from original str after loc
str3 = extractHTML(str.substring(loc));
// remove any tags which generate visible HTML
// This call is unecessary, all HTML has already been stripped
// str3 = removeVisibleHTMLTags(str3);
// append the appendToEnd String and
// add extracted HTML back onto truncated string
str = str2 + appendToEnd + str3;
} else {
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
}
return str;
}
public static String truncateText(String str, int lower, int upper,
String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
// quickly adjust the upper if it is set lower than 'lower'
if (upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if (str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if (loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
return str;
}
/**
* Remove occurences of html, defined as any text between the characters
* "<" and ">". Replace any HTML tags with a space.
*/
public static String removeHTML(String str) {
return removeHTML(str, false);
}
/**
* Remove occurences of html, defined as any text between the characters
* "<" and ">". Optionally replace HTML tags with a space.
*
* @param str
* @param addSpace
* @return
*/
public static String removeHTML(String str, boolean addSpace) {
if (str == null)
return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
if (beginTag > 0) {
ret.append(str.substring(start, beginTag));
// replace each tag with a space (looks better)
if (addSpace)
ret.append(" ");
}
endTag = str.indexOf(">", beginTag);
// if endTag found move "cursor" forward
if (endTag > -1) {
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, get rest of str and break
else {
ret.append(str.substring(beginTag));
break;
}
}
// append everything after the last endTag
if (endTag > -1 && endTag + 1 < str.length()) {
ret.append(str.substring(endTag + 1));
}
return ret.toString().trim();
}
/**
* Extract (keep) JUST the HTML from the String.
*
* @param str
* @return
*/
public static String extractHTML(String str) {
if (str == null)
return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
endTag = str.indexOf(">", beginTag);
// if endTag found, keep tag
if (endTag > -1) {
ret.append(str.substring(beginTag, endTag + 1));
// move start forward and find another tag
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, break
else {
break;
}
}
return ret.toString();
}
public static void main(String[] arg) {
}
}
- java 去html 的工具类
- java简单的去HTML标签
- jsoup工具去解析html
- java去html标签
- 一个实用的java字符串工具类(截取,去尾,转码)
- Java里Html转义的工具
- java世界中几种html解析的工具
- Java 读取HTML页面源代码工具类
- java工具类mht转html格式文件
- java解析html工具
- Java实现Html转PDF 和 Java工具类之Apache的Commons Lang和BeanUtils
- Java去除掉HTML里面所有标签,
- html工具类收藏,自己封装的
- HTML相关的正则表达式工具类
- 简单的html解析工具类
- 贴html的工具
- html常用解析工具(java)
- 类的静态方法去作为自己的工具类(系统工具交互的功能需求)
- flexlib项目导入和MDIWindow拖动停靠源码修改
- 苹果配件ios软件的两种方案
- section ("section-name")
- GridView 自动增加序号
- photoshop菜鸟实用入门----选区
- java 去html 的工具类
- Android 源码编译环境搭建
- 深入文本编辑器Vim
- photoshop菜鸟实用入门(2)----常用的一些快捷操作
- 网页自动刷新的三种方法 .
- VB.NET 机房收费系统V1.0总结
- Android应用开发揭秘的第5个程序3_4_cycle修改版和高仿版的源码注释
- SQL Server 查询表的记录数(3种方法,推荐第一种)
- Photoshop菜鸟实用入门 --- 工具箱简介