去掉表格HTML标记后的文本

来源:互联网 发布:matlab优化工具箱求解 编辑:程序博客网 时间:2024/04/28 03:32

 

    /// <summary>    /// 去除表格HTML标记    /// </summary>    /// <param name="Htmlstring"></param>    /// <returns>去掉表格HTML标记后的文本</returns>    public string WipeOffTableHTML(string Htmlstring)    {        //删除脚本        Htmlstring = Regex.Replace(Htmlstring, @"<[^>]*?>.*?</>", "", RegexOptions.IgnoreCase);        //删除表格HTML        Htmlstring = Regex.Replace(Htmlstring, @"</?table[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"</?tr[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"</?td[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"</?th[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"</?BLOCKQUOTE[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"</?tbody[^>]*>", "", RegexOptions.IgnoreCase);        Htmlstring = Regex.Replace(Htmlstring, @"<style[^/s]*", "", RegexOptions.IgnoreCase);        return Htmlstring;    }
 
如果需要删除更多的标记 添加就OK