词频统计(一):C++使用Vector做词频统计

来源:互联网 发布:红蜘蛛软件破解网络 编辑:程序博客网 时间:2024/06/01 14:46

统计圣经中每个单词出现的次数。

#include <iostream>#include <vector>#include <fstream>#include <string>#include <string.h>#include <sstream>struct Record{    std::string word;    int freq;};class WordStatic{public:    //1,传引用,减少开销 2.const修饰,防止修改    void read_file(const std::string & filename);    void write_file(const std::string &  filename);private:    std::vector<Record> vec;};void WordStatic::read_file(const std::string &filename){    //逐行读取,将行读入字符串    std::ifstream ifs(filename.c_str());    std::string str;    if(ifs.good() == 0)    {        std::cout << "open readfile failed" << std::endl;        return;    }    while(getline(ifs,str))    {        std::stringstream ss(str);        std::string word;        while(ss >> word)        {            //std::cout<< word << std::endl;            std::vector<Record>::iterator it;            for(it = vec.begin(); it !=vec.end(); ++it)            {                //string 重载了等号                if(word == (*it).word)                {                    (*it).freq +=1;                    break;                }            }            if(it == vec.end())            {                Record newRe;                newRe.word = word;                newRe.freq = 1;                vec.push_back(newRe);            }        }    }    ifs.close();}void WordStatic::write_file(const std::string & filename){    //从流中写数据    std::vector<Record>::iterator it;    std::ofstream ofs(filename);    if(ofs.good() == 0)    {        std::cout << "open write_file failed" << std::endl;    }    for(it = vec.begin(); it != vec.end(); ++it)    {        ofs << (*it).word << " " << (*it).freq << std::endl;        //std::cout << (*it).word << " " << (*it).freq << std::endl;    }    ofs.close();}int main(void){    WordStatic wd;    wd.read_file("The_Holy_Bible.txt");    wd.write_file("statics.txt");    return 0;}

The_Holy_Bible.txt文件可以从下面下载
https://github.com/Senvenno27/Word-Frequency-staticstics

原创粉丝点击