C++ 二叉树实现词频分析

来源:互联网 发布:优易ip代理软件 编辑:程序博客网 时间:2024/06/05 07:41

通过二叉树存单词,并且对总共的单词数量进行计数,二叉树自适应的将出现频率高的单词往上移动以减少二叉树的搜索时间。
代码如下

/***********************genSplay.h***********************/#ifndef _GENSPLAY_H_#define _GENSPLAY_H_#include <iostream>using namespace std;//树节点template<class T>class SplayingNode{public:    T info;    SplayingNode *left, *right, *parent;    //节点指针    SplayingNode(){        left = right = parent = 0;    }    SplayingNode(const T &el, SplayingNode *l = 0,                 SplayingNode *r = 0, SplayingNode *p = 0)                 :info(el), left(l), right(r), parent(p){ }};//二叉树template<class T>class SplayTree{protected:    SplayingNode<T> *root;    void rotateR(SplayingNode<T> *);    //向右旋转    void rotateL(SplayingNode<T> *);    //向左旋转    void continueRotation(SplayingNode<T> *gr, SplayingNode<T> *par,                          SplayingNode<T> *ch, SplayingNode<T> *desc);  //重新定义父节点指针    void semisplay(SplayingNode<T> *);  //更改树结构,将权值大的向上移动    void inorder(SplayingNode<T> *);    //中序遍历    void virtual visit(SplayingNode<T> *){ }  //虚函数public:    SplayTree(){        root = 0;    }    void inorder(){        inorder(root);    }    T *search(const T &);    void insert(const T &);};template<class T>void SplayTree<T>::continueRotation(SplayingNode<T> *gr, SplayingNode<T> *par,        SplayingNode<T> *ch, SplayingNode<T> *desc){    if(gr != 0){        if(gr->right == ch->parent)            gr->right = ch;        else            gr->left = ch;    }    else        root = ch;    if(desc != 0)        desc->parent = par;    par->parent = ch;    ch->parent = gr;}template<class T>void SplayTree<T>::rotateR(SplayingNode<T> *p){    p->parent->left = p->right;    p->right = p->parent;    continueRotation(p->parent->parent, p->right, p, p->right->left);}template<class T>void SplayTree<T>::rotateL(SplayingNode<T> *p){    p->parent->right = p->left;    p->left = p->parent;    continueRotation(p->parent->parent, p->left, p, p->left->right);}template<class T>void SplayTree<T>::semisplay(SplayingNode<T> *p){    while(p != root){        if(p->parent->parent == NULL){            if(p->parent->left == p)                rotateR(p);            else                rotateL(p);        }        else if(p->parent->left == p){            if(p->parent->parent->left == p->parent){                rotateR(p->parent);                p = p->parent;            }            else{                rotateR(p);                rotateL(p);            }        }        else{            if(p->parent->parent->right == p->parent){                rotateL(p->parent);                p = p->parent;            }            else{                rotateL(p);                rotateR(p);            }        }        if(root == NULL)            root = p;    }}template<class T>T *SplayTree<T>::search(const T &el){    SplayingNode<T> *p = root;    while(p != NULL){        if(p->info == el){            semisplay(p);            return &p->info;        }        else if(el < p->info)            p = p->left;        else            p = p->right;    }    return 0;}template<class T>void SplayTree<T>::insert(const T &el){    SplayingNode<T> *p = root, *prev = NULL, *newNode;    while(p != 0){        prev = p;        if(el < p->info)            p = p->left;        else            p = p->right;    }    if((newNode = new SplayingNode<T>(el, 0, 0, prev)) == 0){        cerr << "no room for new node.\n";        exit(1);    }    if(root == 0)        root = newNode;    else if(el < prev->info)        prev->left = newNode;    else        prev->right = newNode;}template<class T>void SplayTree<T>::inorder(SplayingNode<T> *p){    if(p != 0){        inorder(p->left);        visit(p);        inorder(p->right);    }}#endif // _GENSPLAY_H_
/***********************Splay.cpp***********************/#include <iostream>#include <fstream>#include <cctype>#include <cstring>#include <cstdlib>  //exit(0)#include "genSplay.h"using namespace std;//用作计数对象的类class Word{private:    char *word;    int freq;    friend class WordSplay;    //friend ostream & operator<<(ostream &out, const Word &wd);public:    Word(){        freq = 1;    }    int operator==(const Word &ir) const{        return strcmp(word, ir.word) == 0;    }    int operator<(const Word &ir) const{        return strcmp(word, ir.word) < 0;    }};class WordSplay : public SplayTree<Word>{private:    int differentWords, wordCnt;    void visit(SplayingNode<Word> *);public:    WordSplay(){        differentWords = wordCnt = 0;    }    void run(ifstream &, char *);};void WordSplay::visit(SplayingNode<Word> *p){    differentWords++;    wordCnt += p->info.freq;}void WordSplay::run(ifstream &fin, char *filename){    char ch = ' ', i;    char s[100];    Word rec;    while(!fin.eof()){        while(1){            if(!fin.eof() && !isalpha(ch))                fin.get(ch);            else                break;        }        if(fin.eof())            break;        for(i = 0; !fin.eof() && isalpha(ch); i++){            s[i] = toupper(ch);            fin.get(ch);        }        s[i] = '\0';        if(!(rec.word = new char[strlen(s) + 1])){            cerr << "no room for new words.\n";            exit(1);        }        strcpy(rec.word, s);        Word *p = search(rec);        if(p == 0)            insert(rec);        else            p->freq++;    }    inorder();    cout << "\n\nFile " << filename         << " contains " << wordCnt << " words among which "         << differentWords << " are different.\n";}int main(){    char Filename[80];    WordSplay SplayTree;    cout << "enter a filename: ";    cin >> Filename;    ifstream fin(Filename);    if(fin.fail()){        cerr << "cannot open " << Filename << endl;        return 1;    }    SplayTree.run(fin, Filename);    fin.close();    return 0;}

有空回来补充相应的其他功能:

  1. 将对应的单词和文件写到文件里面去,先序遍历
  2. 优化性能
原创粉丝点击