C++ trie实现拼写检查

来源:互联网 发布:网络大电影正崛起 编辑:程序博客网 时间:2024/05/22 17:26

trie:用键值的一部分来确定查找路径的树称为trie


节点是一个对象,包含以下成员:一个叶/非叶节点标志,一个单词结束标志,一个指向字符串的指针,以及一个指向指针数组的指针,该指针指向同样类型的结构

class TrieNonLeafNode{public:    TrieNonLeafNode(){ }    TrieNonLeafNode(char);    //~TrieNonLeafNode();private:    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志    char *letters;          //指向字符串的指针    TrieNonLeafNode **ptrs; //指向指针数组的指针    friend class Trie;};

通过读取字典中的单词,来确定输入的单词是不是错误(不区分大小写),测试如下

/*****************dictionary*******************/aaraareareaeerieEireeraereErieireIPApearpeerperpierrearrep

完整代码如下:

/*****************trie.h*******************/#pragma onceclass TrieNonLeafNode{public:    TrieNonLeafNode(){ }    TrieNonLeafNode(char);    //~TrieNonLeafNode();private:    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志    char *letters;          //指向字符串的指针    TrieNonLeafNode **ptrs; //指向指针数组的指针    friend class Trie;};class TrieLeafNode{public:    TrieLeafNode(){ }    TrieLeafNode(char *);    //~TrieLeafNode();private:    bool leaf;    char *word;    friend class Trie;};class Trie{public:    Trie():notFound(-1){ }    Trie(char *);    void printTrie(){        *prefix = '\0';        printTrie(0, root, prefix);    }    void insert(char *);    bool wordFound(char *);private:    TrieNonLeafNode *root;    const int notFound;    char prefix[80];    int position(TrieNonLeafNode *, char);    void addCell(char, TrieNonLeafNode *, int);    void createLeaf(char, char *, TrieNonLeafNode *);    void printTrie(int, TrieNonLeafNode *, char *);};
/*****************trie.cpp*******************/#include <iostream>#include <cstring>#include <cstdlib>#include "trie.h"using namespace std;TrieLeafNode::TrieLeafNode(char *suffix)    //创建叶节点{    leaf = true;    word = new char[strlen(suffix)+1];    if(word == 0){        cerr << "out of memory2\n";        exit(-1);    }    strcpy_s(word, strlen(suffix) + 1, suffix);}TrieNonLeafNode::TrieNonLeafNode(char ch)   //创建非页节点{    ptrs = new TrieNonLeafNode *;    letters = new char[2];    if(ptrs == 0 || letters == 0){        cerr << "out of memory3\n";        exit(1);    }    leaf = false;    endOfWord = false;    *ptrs = 0;    *letters = ch;    *(letters + 1) = '\0';}Trie::Trie(char *word):notFound(-1){    root = new TrieNonLeafNode(*word);    createLeaf(*word, word+1, root);}void Trie::printTrie(int depth, TrieNonLeafNode *p, char *prefix){    int i;    if(p->leaf){    //叶节点        TrieLeafNode *lf = (TrieLeafNode *)p;        for(int i = 1; i <= depth; i++)            cout << " ";        cout << " >>" << prefix << "|" << lf->word << endl;    }    else{   //非叶节点        for(i = strlen(p->letters)-1; i >= 0; i--){            if(p->ptrs[i]!= '\0'){                prefix[depth] = p->letters[i];                prefix[depth+1] = '\0';                printTrie(depth + 1, p->ptrs[i], prefix);            }        }        if(p->endOfWord){            prefix[depth] = '\0';            for(i = 1; i <= depth; i++)                cout << " ";            cout << ">>>" << prefix << endl;        }    }}int Trie::position(TrieNonLeafNode *p, char ch) //寻找字符在指针数组中的位置{    int i;    for(i = 0; i < strlen(p->letters) && p->letters[i] != ch; i++);    if(i < strlen(p->letters))        return i;    else        return notFound;}bool Trie::wordFound(char *word){    TrieNonLeafNode *p = root;    TrieLeafNode *lf;    int pos;    while(true){        if(p->leaf){            lf = (TrieLeafNode *)p;            if(strcmp(word, lf->word) == 0)                return true;            else                return false;        }        else if(*word == '\0'){            if(p->endOfWord)                return true;            else                return false;        }        else if((pos = position(p, *word)) != notFound && p->ptrs[pos] != 0){            p = p->ptrs[pos];            word++;        }        else            return false;    }}void Trie::addCell(char ch, TrieNonLeafNode *p, int stop)   //申请非叶节点空间{    int i, len = strlen(p->letters);    char *s = p->letters;    TrieNonLeafNode **tmp = p->ptrs;    p->letters = new char[len+2];    p->ptrs = new TrieNonLeafNode*[len+1];    if(p->letters == 0 || p->ptrs == 0){        cerr << "out of memory1\n";        exit(1);    }    for(i = 0; i < len + 1; i++)        p->ptrs[i] = 0;    if(stop < len){        for(i = len; i >= stop + 1; i--){            p->ptrs[i] = tmp[i-1];            p->letters[i] = s[i-1];        }    }    p->letters[stop] = ch;    for(i = stop - 1; i >= 0; i--){        p->ptrs[i] = tmp[i];        p->letters[i] = s[i];    }    p->letters[len+1] = '\0';    delete []s;}void Trie::createLeaf(char ch, char *suffix, TrieNonLeafNode *p)    //创建叶节点{    int pos = position(p, ch);    if(pos == notFound){        for(pos = 0; pos < strlen(p->letters) && p->letters[pos] < ch; pos++);        addCell(ch, p, pos);    }    p->ptrs[pos] = (TrieNonLeafNode *) new TrieLeafNode(suffix);}void Trie::insert(char *word){    TrieNonLeafNode *p = root;    TrieLeafNode *lf;    int offset, pos;    char *hold = word;    while(true){        if(*word == '\0'){            if(p->endOfWord)                    cout << "Duplicate entry1 " << hold << endl;            else                p->endOfWord = true;            return ;        }        pos = position(p, *word);        if(pos == notFound){            createLeaf(*word, word+1, p);            return ;        }        else if(pos != notFound && p->ptrs[pos]->leaf){            lf = (TrieLeafNode *)p->ptrs[pos];            if(strcmp(lf->word, word + 1) == 0){                cout << "Duplicate entry2 " << hold << endl;                return ;            }            offset = 0;            do{                pos = position(p, word[offset]);                if(strlen(word) == offset + 1){                    p->ptrs[pos] = new TrieNonLeafNode(word[offset]);                    p->ptrs[pos]->endOfWord = true;                    createLeaf(lf->word[offset], lf->word + offset + 1, p->ptrs[pos]);                    return ;                }                else if(strlen(lf->word) == offset){                    p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);                    p->ptrs[pos]->endOfWord = true;                    createLeaf(word[offset+1], word + offset + 2, p->ptrs[pos]);                    return ;                }                p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);                p = p->ptrs[pos];                offset++;            }while(word[offset] == lf->word[offset-1]);            offset--;            char *s = "";            if(strlen(word) > offset + 2)                s = word + offset + 2;            createLeaf(word[offset+1], s, p);            if(strlen(lf->word) > offset + 1)                s = lf->word + offset + 1;            else                s = "";            createLeaf(lf->word[offset], s, p);            delete [] lf->word;            delete lf;            return ;        }        else{            p = p->ptrs[pos];            word++;        }    }}
/*****************spellcheck.cpp********************/#include <iostream>#include <fstream>#include <cstdlib>#include <cstring>#include <cctype>#include "trie.h"using namespace std;char *Strupr(char *s){    char *ss;    for(ss = s; *s = toupper(*s); s++);     //for循环判断条件为空就退出循环,而toupper()函数对于非字母输入则返回原值;    //所以在遍历完s后读入NULL值的toupper()函数也会返回NULL,导致退出循环    return ss;}int main(int argc, char *argv){    char fileName[25], s[80], ch;    int i, lineNum = 1;    ifstream dictionary("dictionary");    if(dictionary.fail()){        cerr << "cannot open 'dictionary'\n";        exit(-1);    }    dictionary >> s;    Trie trie(Strupr(s));    while(dictionary >> s)        trie.insert(Strupr(s));    trie.printTrie();    if(argc != 2){        cout << "Enter a file name: ";        cin >> fileName;    }    else        strcpy(fileName, &argv[1]);    ifstream textFile(fileName);    if(textFile.fail()){        cout << "cannot open " << fileName << endl;        exit(-1);    }    cout << "misspelled words:\n";    textFile.get(ch);    while(!textFile.eof()){        while(true){            if(!textFile.eof() && !isalpha(ch)){                if(ch == '\n')                    lineNum++;                textFile.get(ch);            }            else break;        }        if(textFile.eof())            break;        for(i = 0; !textFile.eof() && isalpha(ch); i++){            s[i] = toupper(ch);            textFile.get(ch);        }        s[i] = '\0';        if(!trie.wordFound(s))            cout << s << " on line " << lineNum << endl;    }    dictionary.close();    textFile.close();    return 0;}