自动补全的字典树搜索

来源:互联网 发布:c语言windows.h库函数 编辑:程序博客网 时间:2024/05/05 18:17
auto-complete自动补全的搜索有一种trie字典树的数据结构搜索,因为其效率很高,所以在在字符串查找、前缀匹配等中应用很广泛,其高效率是以空间为代价的。字典树的原理是利用串构建一个字典树,这个字典树保存了串的公共前缀信息,因此可以降低查询操作的复杂度。下面以英文单词构建的字典树为例,这棵Trie树中每个结点包括26个孩子结点,因为总共有26个英文字母(假设单词都是小写字母组成)。在每个节点存放N个指向其孩子节点的指针。如果给出字符串"abc","ab","bd","dda",根据该字符串序列构建一棵Trie树。则构建的树如下:

    

Trie树的根结点不包含任何信息,第一个字符串为"abc",第一个字母为'a',因此根结点中数组next下标为'a'-97的值不为NULL,其他同理,构建的Trie树如图所示,红色结点表示在该处可以构成一个单词。很显然,如果要查找单词"abc"是否存在,查找长度则为O(len),len为要查找的字符串的长度。而若采用一般的逐个匹配查找,则查找长度为O(len*n),n为字符串的个数。显然基于Trie树的查找效率要高很多。但是却是以空间为代价的,比如图中每个结点所占的空间都为(26*4+1)Byte=105Byte,那么这棵Trie树所占的空间则为105*8Byte=840Byte,而普通的逐个查找所占空间只需(3+2+2+3)Byte=10Byte。

字典树实现自动补全的实现源码为:

using System;using System.Collections.Generic;namespace ConsoleApplication1{    public class Node    {        internal char m_char;        internal Node m_left, m_center, m_right;        internal bool m_wordEnd;        internal string m_cased = null;        internal int m_termId;        public Node(char ch, bool wordEnd)        {            m_char = ch;            m_wordEnd = wordEnd;        }    }    public class TernaryTree    {        public Node m_root = null;        private void Add(string s, int pos, ref Node node, string preservedCase, int termID)        {            if (node == null)            {                node = new Node(s[pos], false);            }            if (s[pos] > node.m_char)            {                Add(s, pos, ref node.m_right, preservedCase, termID);            }            else            {                if (pos + 1 == s.Length)                {                    node.m_wordEnd = true;                    node.m_cased = preservedCase;                    node.m_termId = termID;                }                else                {                    Add(s, pos + 1, ref node.m_center, preservedCase, termID);                }            }        }        public void Add(string toAdd, int termID)        {            if (string.IsNullOrEmpty(toAdd)) throw new ArgumentException();            Add(toAdd.ToLower(), 0, ref m_root, toAdd, termID);        }        private bool Contains(string s)        {            if (string.IsNullOrEmpty(s)) throw new ArgumentException();            int pos = 0;            Node node = m_root;            while (node != null)            {                int cmp = s[pos] - node.m_char;                if (s[pos] > node.m_char)                {                    node = node.m_right;                }                else                {                    if (++pos == s.Length) return node.m_wordEnd;                    node = node.m_center;                }            }            return false;        }        public List<String> AutoComplete(String toMatch)        {            if (string.IsNullOrEmpty(toMatch))                throw new ArgumentException();            toMatch = toMatch.ToLower();            var _suggestionValues = new List<String>();            var _suggestionPoints = new List<int>();            int pos = 0;            Node node = m_root;            while (node != null)            {                int cmp = toMatch[pos] - node.m_char;                if (toMatch[pos] != node.m_char)                {                    if (cmp < 0)                        node = node.m_left;                    else                        node = node.m_right;                }                else                {                    if (++pos == toMatch.Length)                    {                        if (node.m_wordEnd == true)                        {                            _suggestionValues.Add(node.m_cased);                            _suggestionPoints.Add(node.m_termId);                        }                        FindSuggestions(toMatch, _suggestionValues, _suggestionPoints, node.m_center);                        return (_suggestionValues);                    }                    node = node.m_center;                }            }            return (_suggestionValues);        }        private void FindSuggestions(string s, List<String> suggestions, List<int> suggestionPoints, Node node)        {            if (node == null)            {                return;            }            if (node.m_wordEnd == true)            {                suggestions.Add(node.m_cased);                suggestionPoints.Add(node.m_termId);            }            FindSuggestions(s, suggestions, suggestionPoints, node.m_left); // A            FindSuggestions(s + node.m_char, suggestions, suggestionPoints, node.m_center);            FindSuggestions(s, suggestions, suggestionPoints, node.m_right);        }    }}

测试程序:

using System;namespace ConsoleApplication1{    class Program    {        static void Main(string[] args)        {            var tree = new TernaryTree();            tree.Add("ab",1);            tree.Add("abba",2);            tree.Add("abcd",3);            tree.Add("bcd",4);            var lst1 = tree.AutoComplete("b");            var lst2 = tree.AutoComplete("c");            var lst3 = tree.AutoComplete("a");            Console.ReadLine();        }    }}





0 0
原创粉丝点击