ASP.NET去除HTML标签并截取指定长度字符串

来源：互联网发布：科密考勤机数据库下载编辑：程序博客网时间：2024/05/23 19:39

当我们使用编辑器存储内容时，保存到数据库的内容可含有html标签，如果要将其查询出来展示在页面上时有时会用到字符串截取，这时我们应该先对其进行html解码，再去html标签，最后再截取指定长度字符串。

/// <summary>/// 去除html标签/// </summary>/// <param name="htmlStr"></param>/// <returns></returns>public static string NoHTML(string htmlStr){    if (htmlStr == null)    {        return "";    }    else    {        //删除脚本        htmlStr = Regex.Replace(htmlStr, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);        //删除HTML        htmlStr = Regex.Replace(htmlStr, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"([rn])[s]+", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"-->", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"<!--.*", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(quot|#34);", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(nbsp|#160);", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, @"&#(d+);", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, "xp_cmdshell", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, " ", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, "/r", "", RegexOptions.IgnoreCase);        htmlStr = Regex.Replace(htmlStr, "/n", "", RegexOptions.IgnoreCase);        //特殊的字符        htmlStr = htmlStr.Replace("<", "");        htmlStr = htmlStr.Replace(">", "");        htmlStr = htmlStr.Replace("*", "");        htmlStr = htmlStr.Replace("-", "");        htmlStr = htmlStr.Replace("?", "");        htmlStr = htmlStr.Replace(",", "");        htmlStr = htmlStr.Replace("/", "");        htmlStr = htmlStr.Replace(";", "");        htmlStr = htmlStr.Replace("*/", "");        htmlStr = htmlStr.Replace("rn", "");        htmlStr = HttpContext.Current.Server.HtmlEncode(htmlStr).Trim();        return htmlStr;    }}/// <summary>/// 截取指定长度中英文字符串方法/// 该方法是按照每个汉字两个字节计算，∴如要截取20个字符，需要将length设置为40/// </summary>/// <param name="stringToSub"></param>/// <param name="length"></param>/// <returns></returns>public static string GetFirstString(string stringToSub, int length){    Regex regex = new Regex("[\u4e00-\u9fa5]+", RegexOptions.Compiled);    char[] stringChar = stringToSub.ToCharArray();    StringBuilder sb = new StringBuilder();    int nLength = 0;    bool isCut = false;    for (int i = 0; i < stringChar.Length; i++)    {        if (regex.IsMatch((stringChar[i]).ToString()))        {            sb.Append(stringChar[i]);            nLength += 2;        }        else        {            sb.Append(stringChar[i]);            nLength = nLength + 1;        }        if (nLength > length)        {            isCut = true;            break;        }    }    if (isCut)        return sb.ToString() + "..";    else        return sb.ToString();}

要显示变量SNote的前20个字，调用方法如下：

GetFirstString(NoHTML(Server.HtmlDecode(SNote)), 40)

阅读全文

0 0