使用SgmlReader将HTML转换为合法的XML

来源:互联网 发布:光纤矩阵切换器 编辑:程序博客网 时间:2024/05/22 01:28

 public static string Convert(string html)
{

    
if (string.IsNullOrEmpty(html.Trim()))
    {

        
return string.Empty;
    }

    
using (SgmlReader reader = new SgmlReader())
    {

        reader.DocType 
= "HTML";
        reader.InputStream 
= new StringReader(html);
        
using (StringWriter stringWriter = new StringWriter())
        {

            
using (XmlTextWriter writer = new XmlTextWriter(stringWriter))
            {

                reader.WhitespaceHandling 
= WhitespaceHandling.None;
                writer.Formatting 
= Formatting.Indented;
                XmlDocument doc 
= new XmlDocument();
                doc.Load(reader);
                
if (doc.DocumentElement == null)
                {

                    
return string.Empty;
                }

                
else
              
{
                    doc.DocumentElement.WriteContentTo(writer);
                }

                writer.Close();
                
string xhtml = stringWriter.ToString();
                
return xhtml;
            }

        }

    }

}

其他相关资料:http://www.kaiyuan8.org/Article/KLNvXMQhmKeyQyVGCfBZ.aspx

                     http://www.eggheadcafe.com/articles/20030317.asp

原创粉丝点击