asp.net如何去掉HTML标记

来源:互联网 发布:99网络用语是什么意思 编辑:程序博客网 时间:2024/05/16 12:02
 //   <summary>   
  
///    去除HTML标记   
  
///   </summary>   
  
///   <param    name="NoHTML">包括HTML的源码   </param>   
  
///   <returns>已经去除后的文字</returns>   
  public   static   string    NoHTML(string    Htmlstring)   
  
{   
  
//删除脚本   
   Htmlstring   =    Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase);   
  
//删除HTML   
   Htmlstring   =    Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"([/r/n])[/s]+","",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);   
    
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(quot|#34);","/"",RegexOptions.IgnoreCase);   
   Htmlstring   =    Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(nbsp|#160);","   ",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(iexcl|#161);","/xa1",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(cent|#162);","/xa2",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(pound|#163);","/xa3",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,@"&(copy|#169);","/xa9",RegexOptions.IgnoreCase);   
   Htmlstring   
=    Regex.Replace(Htmlstring,   @"&#(/d+);","",RegexOptions.IgnoreCase);   
    
   Htmlstring.Replace(
"<","");   
   Htmlstring.Replace(
">","");   
   Htmlstring.Replace(
"/r/n","");   
   Htmlstring
=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();   
    
  
return    Htmlstring;   
   }
原创粉丝点击