C#中删除HTML标签

来源:互联网 发布:gta5淘宝怎么搜索 编辑:程序博客网 时间:2024/06/13 22:05

  #region 删除HTML标签
        public static string ClearHTMLTags(string HTML)
        {
            string[] Regexs ={
                        @"<script[^>]*?>.*?</script>",
                        @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(//[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
                        @"([/r/n])[/s]+",
                        @"&(quot|#34);",
                        @"&(amp|#38);",
                        @"&(lt|#60);",
                        @"&(gt|#62);",
                        @"&(nbsp|#160);",
                        @"&(iexcl|#161);",
                        @"&(cent|#162);",
                        @"&(pound|#163);",
                        @"&(copy|#169);",
                        @"&#(/d+);",
                        @"-->",
                        @"<!--.*/n"
        };

            string[] Replaces ={
                            "",
                            "",
                            "",
                            "/"",
                            "&",
                            "<",
                            ">",
                            " ",
                            "/xa1", //chr(161),
                            "/xa2", //chr(162),
                            "/xa3", //chr(163),
                            "/xa9", //chr(169),
                            "",
                            "/r/n",
                            ""
        };

            string s = HTML;
            for (int i = 0; i < Regexs.Length; i++)
            {
                s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
            }
            s.Replace("<", "");
            s.Replace(">", "");
            s.Replace("/r/n", "");
            return s;
        }


        #endregion

原创粉丝点击