Trie树的c++实现

来源:互联网 发布:手机网站建站之星源码 编辑:程序博客网 时间:2024/06/05 16:03

关于Trie树的理论的东西,各位大神都已经说得很多了。参见这里是Trie树的简单介绍或者这里是它和其他一些树结构的使用场景。
Trie树又称“字典树”或“前缀树”,常见于小规模地统计词频,典型的用空间换时间。下面是我的一种c++实现,记录一下。
trie.h头文件

#ifndef _TRIE_STRUCTURE_H#define _TRIE_STRUCTURE_H#include <string>#include <cstdlib>#include <ctype.h>#include <fstream>#include <iostream>#include <sstream>using namespace std;#define MAX_BRANCH  26typedef enum NODE_TYPE{    ROOT_NODE = -1,    NORMAL_NODE = 0,    LEAF_NODE = 1,}NODE_TYPE_e;typedef struct TrieNode{    int freq;//次数    int type;//标志 -1: 根结点; 0: 普通结点; 1: 叶子结点    TrieNode* next[MAX_BRANCH];    TrieNode(int n, int tp): freq(n), type(tp){        for(int i = 0; i < MAX_BRANCH; i++)            next[i] = NULL;    }}TrieNode, *pTrieNode;class DictTree{public:    DictTree();    DictTree(pTrieNode &rt);    ~DictTree();    //将单词插入到树中    bool InsertWord(string &word);    //将单词从树中删除    bool DeleteWord(string &word);    //返回单词的频度,单词不存在返回0    int SearchWord(string &word, int isprefix=false);    //统计前缀为prefix的单词数量    int PrefixCount(string &prefix);    //将文档中的单词分割开,并逐个调用InsertWord方法插入到树中    void SplitWord(string &article);private:    //根结点    pTrieNode root;    //销毁树    void DestroyTree(pTrieNode &tree);    //将string中的大写字母转换为小写    string StringToLower(string &word)    {        const char* p = word.c_str();        string neword;        for(unsigned int i = 0; i < word.length(); i++)        {            neword += tolower(p[i]);        }        return neword;    }};DictTree::DictTree(){    root = new TrieNode(0, ROOT_NODE);    if(!root)        exit(0);}DictTree::DictTree(pTrieNode &rt): root(rt){    cout << "Copy Constructor DictTree()" << endl;}DictTree::~DictTree(){    DestroyTree(root);    root = NULL;}bool DictTree::InsertWord(string &ex_word){    pTrieNode p = root;    string word = StringToLower(ex_word);    const char* letter = word.c_str();    if(!letter)        return false;    for(unsigned int i = 0; i < word.length(); i++)    {        if(!isalpha(letter[i]))            break;        if(p->next[letter[i]-'a'] == NULL)        {            p->next[letter[i]-'a'] = new TrieNode(1, NORMAL_NODE);            if(!p->next[letter[i]-'a'])                exit(0);        }        else        {            p->next[letter[i]-'a']->freq++;        }        p = p->next[letter[i]-'a'];    }    if(p != root)        p->type = LEAF_NODE;    return true;}bool DictTree::DeleteWord(string &ex_word){    pTrieNode p = root;    string word = StringToLower(ex_word);    const char* letter = word.c_str();    if(!letter)        return 0;    for(unsigned int i = 0; i < word.length(); i++)    {        if(!isalpha(letter[i]))            break;        if(p->next[letter[i]-'a'] == NULL)        {            break;        }        else        {            p = p->next[letter[i]-'a'];            p->freq--;            if(p->freq <= 0)            {                free(p);            }        }    }    if(p->type == LEAF_NODE)        return true;    return false;}int DictTree::SearchWord(string &ex_word, int isprefix){    pTrieNode p = root;    string word = StringToLower(ex_word);    const char* letter = word.c_str();    if(!letter)        return 0;    for(unsigned int i = 0; i < word.length(); i++)    {        if(!isalpha(letter[i]))            break;        if(p->next[letter[i]-'a'] == NULL)            return 0;        else            p = p->next[letter[i]-'a'];    }    if(isprefix)        return p->freq;    else if(LEAF_NODE == p->type)        return p->freq;    return 0;}int DictTree::PrefixCount(string &prefix){    return SearchWord(prefix, true);}void DictTree::SplitWord(string &article){    stringstream single_word;    ifstream essay;    essay.open(article.c_str());    single_word << essay.rdbuf();    essay.close();    string word;    while(single_word >> word)    {        InsertWord(word);        word.clear();    }}void DictTree::DestroyTree(pTrieNode &tree){    if(!tree)        return;    for(int i = 0; i < MAX_BRANCH; i++)    {        DestroyTree(tree->next[i]);    }    free(tree);}#endif//_TRIE_STRUCTURE_H

trieTest.cpp文件,写了一个小demo,没有经过充分测试。

#include "trie.h"#define WORD_FILE "article.txt"const int size = 6;string chart[size] = {    "programming",    "you",    "potential",    "Google",    "major",    "professional",};int main(int argc, char const *argv[]){    DictTree container;    string file = WORD_FILE;    container.SplitWord(file);    cout << "start Search: " << endl;    for(int i = 0; i < size; i++)    {        cout << chart[i] << " --> " << container.SearchWord(chart[i]) << endl;    }    container.DeleteWord(chart[1]);    cout << "after DeleteWord: " << endl;    for(int i = 0; i < size; i++)    {        cout << chart[i] << " --> " << container.SearchWord(chart[i]) << endl;    }    string prefix = "pro";    cout << "prefix of 'pro' --> " << container.PrefixCount(prefix) << endl;    return 0;}

单词文件中的内存如下:
programming you programming potential Google goolge google major potential programming You Major professional

下面是打印:
Trie树的打印

0 0
原创粉丝点击