C++编程,从一个文件中统计所有出现过的单词,并按次数从大到小输出

来源:互联网 发布:访客网络要开启吗 编辑:程序博客网 时间:2024/05/15 03:54

转自 http://zhidao.baidu.com/link?url=cN4SGBB-hTJ4MZA4wZVmcfXOmQkBJadTsGhxOaaT6pSRnggGUWt10qXC8fsH-Be17vX2Fpq1LxYlKMfvtIZlv_


(1)程序的输入文本为每行切词后的结果,每个单词(term)之间以制表符分割。 (2)使用hashmap计算单词和其出现的个数,key为term,value为其出现次数。统计完后将hashmap转换为vector,使用STL的标准排序算法#include <iostream>#include <string>#include <fstream>#include <vector>#include <algorithm>#include <ext/hash_map>using namespace std;using namespace __gnu_cxx; struct str_hash{size_t operator()(const string& str) const{   unsigned long __h = 0;   for (size_t i = 0 ; i < str.size() ; i ++)__h = 5*__h + str[i];return size_t(__h);}};bool less_first(pair<int, string> const& lhs, pair<int, string> const& rhs){    return lhs.first > rhs.first;}vector<pair<int, string> > mirror_map(hash_map<string, int, str_hash> const& m){vector<pair<int, string> > mirror;for (hash_map<string, int, str_hash>::const_iterator i = m.begin(); i != m.end(); ++i){mirror.push_back(pair<int, string>(i->second, i->first));}    std::sort(mirror.begin(), mirror.end(), less_first);    return mirror;}int main(){ifstream input;ofstream output;input.open("word.txt");output.open("wordfrequency.txt");string eachline;hash_map<string,int,str_hash> termtime;while(getline(input,eachline) ){string::size_type start = 0;string::size_type end = eachline.find_first_of("\t");while(end != string::npos){string term = eachline.substr(start, end - start);termtime[term]++;start = end + 1;end = eachline.find_first_of("\t",start);}if(start != eachline.size()){string term = eachline.substr(start);termtime[term]++;}}vector<pair<int, string> > mirror = mirror_map(termtime);for(vector<pair<int, string> >::iterator myiter= mirror.begin(); myiter != mirror.end(); ++myiter){output<<myiter->second<<"\t"<<myiter->first<<endl;}input.close();output.close();  cout << "Done" << endl;system("pause");return 0;}

0 0
原创粉丝点击