trie后缀树字符串索引

来源:互联网 发布:update sql 编辑:程序博客网 时间:2024/05/21 08:56

Trie树简介:

Trie树又叫字典树,是一种多叉单词查找树,其每个节点都是一个字母,建立好树后,对一单词的查找可迅速完成,查找深度为该单词的长度。

Trie树的建立:

Trie树首先从已有的词库中读取单词来建立树,数的深度为最长单词的长度加一,多出的那个长度为根节点,根节点不包含字符数据,值有指向各子树的指针。

优点:

可利用字符串的公共前缀减小查询时间,减小无谓字符之间的比较,缩短查询的时间。例如,可以用作快速查找一篇文章中是否有某个单词出现。

 

 

//使用trie来建立后缀树进行字符串的索引#include <iostream>using namespace  std;typedef struct stTrieNode{bool bLeaf; //是否为叶子节点,true为叶子节点bool bMidLeaf; //是否为处在中间的隐式叶子节点int firstAppear;struct stTrieNode* toNext[32];}stTrieNode, *LPstTrieNode;//根据字母构建新的trie树节点LPstTrieNode builtNode(LPstTrieNode root, int loc, char szNodeInfo, int len, int firstAppear){root->toNext[loc] = (LPstTrieNode)malloc(sizeof(stTrieNode));memset(root->toNext[loc], 0, sizeof(stTrieNode));//root->firstAppear = firstAppear;root->toNext[loc]->firstAppear = firstAppear;if (len == 1)  //已经是最后一个节点,建立节点时要加上{root->toNext[loc]->bLeaf = 1;}return (LPstTrieNode)(root->toNext[loc]);}//将单词strinfo加入到trie树中void AddToTree(LPstTrieNode root, char *strInfo, int len, int firstAppear){char sztemp = strInfo[0];int loc = 0 + (sztemp - 'a');if (len <= 0){return;}if (root->toNext[loc] == NULL){LPstTrieNode nextNode = builtNode(root, loc, sztemp, len, firstAppear);AddToTree(nextNode, strInfo+1, len-1, firstAppear);}else if (root->toNext[loc] != NULL){AddToTree(root->toNext[loc], strInfo+1,len-1,firstAppear);}}//检查checkword是否在trie树中bool checkWord(LPstTrieNode root, char *checkWord , int *loc){int len = 0;int charloc = 0;len = strlen(checkWord);LPstTrieNode lpTemp = root;while(charloc < len)  //字符串没有检索完{int lpLoc = 0 + (checkWord[charloc] -'a');  if (lpLoc > 26 || lpLoc < 0){return false;}if (lpTemp->toNext[lpLoc] != NULL){lpTemp = lpTemp->toNext[lpLoc];charloc++;if (charloc == len ) //最后一个字符{*loc = lpTemp->firstAppear;return true;}}elsereturn false;}return false;}int main(){char WordContent[128]; //从文本中读出的单词char wordForCheck[128]; //验证单词FILE *fReadIn = fopen("Stringmodle.txt","r+");if (fReadIn == NULL){cout<<"无法打开文件words.txt"<<endl;system("pause");return 0;}LPstTrieNode root = (LPstTrieNode)malloc(sizeof(stTrieNode));memset(root, 0, sizeof(stTrieNode));//读取数据到wordcontent中,建树过程fscanf(fReadIn,"%s",WordContent);int len = strlen(WordContent);for (int i =0; i< len; i++){AddToTree(root, WordContent+i, len-i, i);}fclose(fReadIn);//验证一个单词是否在树中while(true){cout<<"输入要检验的单词:  ";bool nflag;int appearLoc;cin>>wordForCheck;if (wordForCheck[0] == '0') //输入0验证结束{break;}elsenflag = checkWord(root, wordForCheck,&appearLoc);if (nflag){cout<<wordForCheck<<" 存在, 首次出现位置为:"<<appearLoc<<endl<<endl;}elsecout<<wordForCheck<<" 不存在"<<endl<<endl;}return 1;}


 

0 0