去除HTML标记C#,JS

来源:互联网 发布:科学数据geodata 编辑:程序博客网 时间:2024/05/14 17:51

C#

        /// <summary>        /// 去除HTML标记        /// </summary>        /// <param name="strHtml"></param>        /// <returns></returns>        public static string StripHTML(string strHtml)        {            string[] aryReg ={                 @"<script[^>]*?>.*?</script>",                           @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",                 @"([ ])[\s]+",                @"&(quot|#34);",                @"&(amp|#38);",                @"&(lt|#60);",                @"&(gt|#62);",                @"&(nbsp|#160);",                @"&(iexcl|#161);",                @"&(cent|#162);",                @"&(pound|#163);",                @"&(copy|#169);",                @"&#(\d+);",                @"-->",                @"<!--.* "};            string[] aryRep = { "", "", "", "\"", "&", "<", ">", " ", "\xa1", "\xa2", "\xa3", "\xa9", "", " ", "" };            string newReg = aryReg[0]; string strOutput = strHtml;            for (int i = 0; i < aryReg.Length; i++)            {                Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase); strOutput = regex.Replace(strOutput, aryRep[i]);            }            strOutput.Replace("<", ""); strOutput.Replace(">", ""); strOutput.Replace(" ", ""); return strOutput;        }