C++根据文本建立索引
来源:互联网 发布:美工培训 编辑:程序博客网 时间:2024/06/05 06:53
#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <set>
#include <map>
#include <string>
#include <cctype>//isalpha, isupper,tolower
#include <cstdlib>//exit
using namespace std;
static int s_lineNum = 1;//static linenumber
int main(int argc, char *argv[])
{
if (argc < 3)
{
cout << "usage: " << argv[0] <<"infile" << "outfile" << endl;
cout << "generate a word list from an Englishfile,"
<< "each word is followed by the number of the lines where it occured and then output the resultto a file." << endl;
}
else
{
//common words
string commWord[13] = {"a", "an", "and","are", "in", "is", "of",
"or", "that","the", "this", "to", "have"};
set<string> ignore(commWord, commWord + 13);//ignore the commonwords
set<int> lineNum;
string word;//contain the word extracted from infile
map< string, set<int> > wordlist;//associate word withlineNum
pair< map<string, set<int> >::iterator, bool> pr;
map< string, set<int> >::iterator itWord;
set<int>::iterator itLine;
ifstream infile(argv[1]);//open infile
if (infile.bad())
{
cout << "open "<< argv[1] << " error" << endl;
exit(EXIT_FAILURE);
}
ofstream outfile(argv[2]);
if (outfile.bad())
{
cout << "open " << argv[2] << "error" << endl;
exit(EXIT_FAILURE);
}
char temp;
while(infile.good())
{
temp = infile.get();//get a char
while (isalpha(temp))
{
if (isupper(temp))
{
temp = tolower(temp);
}
word.append(1, temp);
temp = infile.get();//get a newchar
}
//not common word and not empty
if (ignore.count(word) == 0 && word.size() != 0)
{
lineNum.insert(s_lineNum);//insert the line number into lineNum if it isa new word
//return a pair, the firstmember is the iterator, the second is a bool type which indicate whether theinsertion is successful or not
pr = wordlist.insert(pair<string, set<int> >(word, lineNum));
//the map already contained anelement whose key had an equivalent value in the ordering
if (pr.second == false)
{
wordlist[word].insert(s_lineNum);//insert the line number into the setcontainer paired with the word
}
}
word.clear();
lineNum.clear();
if (temp == '\n')
{
s_lineNum++;
}
}
//itWord points to pair< string, set<int> >, the firstmember is word, the second is line number
for (itWord = wordlist.begin(); itWord != wordlist.end(); ++itWord)
{
//set output format
outfile.setf(ios_base::left, ios_base::adjustfield);
outfile.fill('-');
outfile.width(34);
outfile << itWord->first;
outfile << " ";
for (itLine = itWord->second.begin(); itLine !=itWord->second.end(); ++itLine)
{
outfile << *itLine<< ' ';
}
outfile << endl;
}
infile.close();
infile.open(argv[2]);
cout << infile.rdbuf();//streambuf, print the output file
infile.close();
outfile.close();
}
map< int,string > ss;
ss.insert(pair<int,string>(6,"aa"));
ss.insert(pair<int,string>(4,"zz"));
ss.insert(pair<int,string>(5,"gg"));
map< int,string >::iterator pt;
for(pt = ss.begin();pt!=ss.end();++pt)
{
cout<< pt->second<<endl;
}
//system("pause");
return 0;
}
0 0
- C++根据文本建立索引
- Erlang练习:建立文本索引
- 利用DBMS_UTILITY.GET_HASH_VALUE 给文本建立索引
- Lucene之建立索引以及根据索引查询
- solr_对富文本(pdf等)建立索引
- 建立索引
- 建立索引
- 建立索引
- 建立索引
- 索引建立
- 建立索引
- 建立索引
- 建立索引去掉索引
- C语言中如何实现建立和写入文本文档
- eclipse cdt 导入c ++ 工程并建立头头文件 索引
- html抽取文本信息-java版(适合lucene建立索引)
- 根据文本生成位图
- lucene_根据索引搜索
- ## 暑期 后台学习+ C++ Primer 目录
- 勾股定理一日一证连载34
- 算法复杂度速查表
- 欢迎使用CSDN-markdown编辑器
- Leetcode #143 in cpp
- C++根据文本建立索引
- qt 属性动态绑定
- HMM转载系列
- 浮萍自动清理机器人——第一次下水201606
- 开发经验(漫谈)
- Eclipse快捷键大全
- [leetcode] 363. Max Sum of Rectangle No Larger Than K 解题报告
- Crazy Kids
- C语言学习(一)