去除网站中HTML标签的正则表达式

来源:互联网 发布:小黑屋写作软件怎么样 编辑:程序博客网 时间:2024/06/05 07:22

string str = a.ToString();

str = Regex.Replace(str, @"</?span[^>]*>", "", RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"&#[^>]*;", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?marquee[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?object[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?param[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?embed[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?table[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" ","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?tr[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?th[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?p[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?a[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?img[^>]*>","",RegexOptions.IgnoreCase);

str = Regex.Replace(str, @"</?tbody[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?li[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?span[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?div[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?th[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?td[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?script[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"(javascript|jscript|vbscript|vbs):", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"on(mouse|exit|error|click|key)", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<//?xml[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"<//?[a-z]+:[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?font[^>]*>", "", RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?b[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?u[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?i[^>]*>","",RegexOptions.IgnoreCase);
str = Regex.Replace(str, @"</?strong[^>]*>","",RegexOptions.IgnoreCase);