C# Trie树工具类

来源:互联网 发布:verilog调用模块 数组 编辑:程序博客网 时间:2024/06/04 18:43

写了个简单的类,用来检索关键词和查询前缀用。


举例:和谐关键词用。

//string[] keys = { "和谐", "社会", "你我共建" };            //BuildTree bt = new BuildTree(keys);            //string testStr = "和谐,和(干扰)谐,你#我#共#建。";            //this.textBox1.Text += "原文:" + testStr;            //this.textBox1.Text += "\r\n结果:" + bt.Replace(testStr);            //this.textBox1.Text += "\r\n模糊匹配结果:" + bt.Replace(testStr, 4);

还有很多功能自己调试吧。。


附:

public class TireTree    {        public Dictionary<int, TireTree> subNode = new Dictionary<int, TireTree>();        public bool isTerminate = false;        public TireTree Search(int key)        {            if (subNode.ContainsKey(key))            {                return subNode[key];            }            else            {                return null;            }        }        public TireTree Add(int key, bool isTerminate)        {            var tempItem = new TireTree();            tempItem.isTerminate = isTerminate;            subNode.Add(key, tempItem);            return tempItem;        }        ~TireTree()        {            //Debug.Print("节点析构");        }    }    public class BuildTree    {        TireTree innerRoot;        public BuildTree(string[] keys)        {            innerRoot = Build(keys);        }        public TireTree Build(string[] keys)        {            TireTree root = new TireTree();            TireTree pTemp;            foreach (string key in keys)            {                //临时指针指向根                pTemp = root;                //获取一个单词                for (int i = 0; i < key.Length; i++)                {                    char word = key[i];                    //获取该单词的一个单字                    //如果该字不是目标节点的子节点                    if (pTemp.Search((int)word) == null)                    {                        //增加一个子节点,并把这个子节点作为目标节点                        if (i == key.Length - 1)                        {                            //如果这个字是该词最后一个字,标记为终结点                            pTemp = pTemp.Add((int)word, true);                        }                        else                        {                            pTemp = pTemp.Add((int)word, false);                        }                    }                    else                    {                        pTemp = pTemp.Search((int)word);                        if (i == key.Length - 1)                        {                            pTemp.isTerminate = true;                        }                    }                }            }            innerRoot = root;            return root;        }        #region 查找        public WordInText[] Search(string strText)        {            List<WordInText> result = new List<WordInText>();            TireTree pTemp = innerRoot;            int floor = 0;            int wordStart = 0;            //string tempWord="";            for (int i = 0; i < strText.Length; i++)            {                char word = strText[i];                pTemp = pTemp.Search((int)word);                floor += 1;                // tempWord += word;                if (pTemp == null)                {                    pTemp = innerRoot;                    floor = 0;                    wordStart = i + 1;                    //tempWord = "";                    continue;                }                if (pTemp.isTerminate == true)                {                    result.Add(new WordInText() { Start = wordStart, Length = floor });                    // result.Add(tempWord);                }            }            return result.ToArray();        }        public WordInText[] Search(string strText, int wordLength)        {            List<WordInText> result = new List<WordInText>();            var ptemp = innerRoot;            bool isAuto = false;            int tempStep = 0;            int start = 0;            int length = 0;            for (int i = 0; i < strText.Length; i++)            {                char word = strText[i];                var nextNode = ptemp.Search(word);                if (isAuto)                {                    //进入匹配状态,说明上N个字是匹配的。                    length += 1;                    if (nextNode == null && tempStep <= wordLength)                    {                        //匹配丢失,但在可容忍程度内                        //跳过这个字,匹配下一个。                        tempStep += 1;//增加丢失数                        continue;                    }                    else if (nextNode == null && tempStep > wordLength)                    {                        //匹配丢失,但是丢失了很多,不可容忍                        //退出匹配状态                        isAuto = false;                        ptemp = innerRoot;//把匹配指针指根节点                        tempStep = 0;                        length = 0;                        //退出匹配状态时应回溯一位,万一在root可以匹配呢?                        i -= 1;                    }                    else                    {                        //说明匹配到了下个点。                        if (nextNode.isTerminate == true)                        {                            result.Add(new WordInText() { Start = start, Length = length + 1 });                            tempStep += 999; //在终结点不再允许模糊匹配                             }                        ptemp = nextNode;                    }                }                else                {                    if (nextNode == null)                    {                        continue;                    }                    else                    {                        isAuto = true;//第一个字触发进入匹配状态                                     ptemp = nextNode;                        start = i;                    }                }            }            return result.ToArray();        }        public string[] SearchWord(string strText)        {            List<string> result = new List<string>();            TireTree pTemp = innerRoot;            string tempWord = "";            for (int i = 0; i < strText.Length; i++)            {                char word = strText[i];                pTemp = pTemp.Search((int)word);                tempWord += word;                if (pTemp == null)                {                    pTemp = innerRoot;                    tempWord = "";                    continue;                }                if (pTemp.isTerminate == true)                {                    result.Add(tempWord);                }            }            return result.ToArray();        }        #endregion        #region 替换        public string Replace(string strText)        {            var result = Search(strText);            return Replace(result, strText);        }        public string Replace(string strText, int wordLength)        {            var result = Search(strText, wordLength);            return Replace(result, strText);        }        private string Replace(WordInText[] result, string strText)        {            char[] testchar = strText.ToCharArray();            foreach (var item in result)            {                for (int i = item.Start; i < item.Start + item.Length; i++)                {                    testchar[i] = '*';                }            }            return new string(testchar);        }        #endregion        #region 包含        public bool Contains(string strText, int wordLength)        {            var ptemp = innerRoot;            bool isAuto = false;            int tempStep = 0;            for (int i = 0; i < strText.Length; i++)            {                char word = strText[i];                var nextNode = ptemp.Search(word);                if (isAuto)                {                    //进入匹配状态,说明上N个字是匹配的。                    if (nextNode == null && tempStep <= wordLength)                    {                        //匹配丢失,但在可容忍程度内                        //跳过这个字,匹配下一个。                        tempStep += 1;//增加丢失数                        continue;                    }                    else if (nextNode == null && tempStep > wordLength)                    {                        //匹配丢失,但是丢失了很多,不可容忍                        //退出匹配状态                        isAuto = false;                        ptemp = innerRoot;//把匹配指针指根节点                        tempStep = 0;                        //退出匹配状态时应回溯一位,万一在root可以匹配呢?                        i -= 1;                    }                    else                    {                        //说明匹配到了下个点。                        if (nextNode.isTerminate == true)                        {                            return true;                        }                    }                }                else                {                    if (nextNode == null)                    {                        continue;                    }                    else                    {                        isAuto = true;//第一个字触发进入匹配状态                                     ptemp = nextNode;                    }                }            }            return false;        }        public bool Contains(string strText)        {            List<WordInText> result = new List<WordInText>();            TireTree pTemp = innerRoot;            for (int i = 0; i < strText.Length; i++)            {                char word = strText[i];                pTemp = pTemp.Search((int)word);                // tempWord += word;                if (pTemp == null)                {                    pTemp = innerRoot;                    continue;                }                if (pTemp.isTerminate == true)                {                    return true;                }            }            return false;        }        #endregion        #region 子节点        public string[] GetChild(string prefix, int count)        {            List<string> result = new List<string>();            //指针            TireTree ptemp = innerRoot;            //首先找到目标节点            for (int i = 0; i < prefix.Length; i++)            {                char word = prefix[i];                var nextNode = ptemp.Search((int)word);                if (nextNode == null)                {                    break;                }                else                {                    ptemp = nextNode;                }            }            //遍历目标节点            FindSub(ptemp, ref result, prefix);            return result.GetRange(0, count).ToArray();        }        private void FindSub(TireTree rootNode, ref List<string> result, string tempStr)        {            foreach (var item in rootNode.subNode)            {                if (item.Value.isTerminate)                {                    result.Add(tempStr + (char)(item.Key));                }                FindSub(item.Value, ref result, tempStr + (char)(item.Key));            }        }        #endregion        ~BuildTree()        {            //Debug.Print("构造器析构");        }    }    public class WordInText    {        public int Start;        public int Length;    }


0 0