基于C# 的HTML解析器

来源:互联网 发布:宝鸡时代网络 编辑:程序博客网 时间:2024/06/06 12:09

namespace ParseHTML

{

public class Tag
    {
     
        private string m_TagName = "";
         private string m_FollowedText = "";
        private ArrayList m_TagAttributes = new ArrayList();
         public string TagName
        {
            set
            {
                m_TagName = value;
            }
            get
            {
                return m_TagName;
            }
        }
            public string FollowedText
        {
            set
            {
                m_FollowedText = value;
            }
            get
            {
                return m_FollowedText;
            }
        }
     
        public ArrayList TagAttributes
        {
            set
            {
                m_TagAttributes = value;
            }
            get
            {
                return m_TagAttributes;
            }
        }
    }

    public class ParseHTML : Parse
    {
        public AttributeList GetTag()
        {
            AttributeList tag = new AttributeList();
            tag.Name = m_tag;

            foreach (Attribute x in List)
            {
                tag.Add((Attribute)x.Clone());
            }

            return tag;
        }

        public String BuildTag()
        {
            String buffer = "<";
            buffer += m_tag;
            int i = 0;
            while (this[i] != null)
            {// has attributes
                buffer += " ";
                if (this[i].Value == null)
                {
                    if (this[i].Delim != 0)
                        buffer += this[i].Delim;
                    buffer += this[i].Name;
                    if (this[i].Delim != 0)
                        buffer += this[i].Delim;
                }
                else
                {
                    buffer += this[i].Name;
                    if (this[i].Value != null)
                    {
                        buffer += "=";
                        if (this[i].Delim != 0)
                            buffer += this[i].Delim;
                        buffer += this[i].Value;
                        if (this[i].Delim != 0)
                            buffer += this[i].Delim;
                    }
                }
                i++;
            }
            buffer += ">";
            return buffer;
        }

        protected void ParseTag()
        {
            m_tag = "";
            Clear();

            // Is it a comment?
            if ((GetCurrentChar() == '!') &&
             (GetCurrentChar(1) == '-') &&
             (GetCurrentChar(2) == '-'))
            {
                while (!Eof())
                {
                    if ((GetCurrentChar() == '-') &&
                     (GetCurrentChar(1) == '-') &&
                     (GetCurrentChar(2) == '>'))
                        break;
                    if (GetCurrentChar() != '\r')
                        m_tag += GetCurrentChar();
                    Advance();
                }
                m_tag += "--";
                Advance();
                Advance();
                Advance();
                ParseDelim = (char)0;
                return;
            }
            while (!Eof())
            {
                if (IsWhiteSpace(GetCurrentChar()) || (GetCurrentChar() == '>'))
                    break;
                m_tag += GetCurrentChar();
                Advance();
            }

            EatWhiteSpace();

          while (GetCurrentChar() != '>')
            {
                ParseName = "";
                ParseValue = "";
                ParseDelim = (char)0;

                ParseAttributeName();

                if (GetCurrentChar() == '>')
                {
                    AddAttribute();
                    break;
                }

                // Get the value(if any)
                ParseAttributeValue();
                AddAttribute();
            }
            Advance();
        }
        public char Parse()
        {
            if (GetCurrentChar() == '<')
            {
                Advance();

                char ch = char.ToUpper(GetCurrentChar());
                if ((ch >= 'A') && (ch <= 'Z') || (ch == '!') || (ch == '/'))
                {
                    ParseTag();
                    return (char)0;
                }
                else return (AdvanceCurrentChar());
            }
            else return (AdvanceCurrentChar());
        }
    }

    public class Attribute : ICloneable
    {
        private string m_name;

         private string m_value;
        private char m_delim;

        public Attribute(string name, string value, char delim)
        {
            m_name = name;
            m_value = value;
            m_delim = delim;
        }

         public Attribute()
            : this("", "", (char)0)
        {
        }
         public Attribute(String name, String value)
            : this(name, value, (char)0)
        {
        }

         public char Delim
        {
            get
            {
                return m_delim;
            }

            set
            {
                m_delim = value;
            }
        }

        public string Name
        {
            get
            {
                return m_name;
            }

            set
            {
                m_name = value;
            }
        }

         public string Value
        {
            get
            {
                return m_value;
            }

            set
            {
                m_value = value;
            }
        }


        #region ICloneable Members
        public virtual object Clone()
        {
            return new Attribute(m_name, m_value, m_delim);
        }
        #endregion
    }

    public class AttributeList : Attribute
    {

        protected ArrayList m_list;

         public override Object Clone()
        {
            AttributeList rtn = new AttributeList();

            for (int i = 0; i < m_list.Count; i++)
                rtn.Add((Attribute)this[i].Clone());

            return rtn;
        }

          public AttributeList()
            : base("", "")
        {
            m_list = new ArrayList();
        }


          public void Add(Attribute a)
        {
            m_list.Add(a);
        }

        public void Clear()
        {
            m_list.Clear();
        }

         public bool IsEmpty()
        {
            return (m_list.Count <= 0);
        }

         public void Set(string name, string value)
        {
            if (name == null)
                return;
            if (value == null)
                value = "";

            Attribute a = this[name];

            if (a == null)
            {
                a = new Attribute(name, value);
                Add(a);
            }
            else
                a.Value = value;
        }

        public int Count
        {
            get
            {
                return m_list.Count;
            }
        }

原创粉丝点击