基于KMP算法的TXT文本查询工具

来源:互联网 发布:talk软件下载 编辑:程序博客网 时间:2024/06/06 20:53
#include<iostream>#include<string>#include<set>#include<map>#include<vector>#include<fstream>#include<sstream>#include<algorithm>#include<math.h>using namespace std;class TextQuery{public:    typedef std::vector<std::string>::size_type line_no;            //line number    //load file into internal memory    void read_file(std::ifstream &is){                                      store_file(is);                                                     build_map();                                                    }    std::set<line_no> run_query(const std::string&) const;          //query the input word from the input file    std::string text_line(line_no) const;                           //catch the words per lineprivate:    void store_file(std::ifstream&);    void build_map();    std::vector<std::string> lines_of_text;                         //a line number of the words    std::map<std::string, std::set<line_no>> word_map;              //store the relastionships between words and line_numbers};void cal_next(const char *str, int *next, int len){    next[0] = -1;//next[0]初始化为-1,-1表示不存在相同的最大前缀和最大后缀    int k = -1;//k初始化为-1    for (int q = 1; q <= len - 1; q++)    {        while (k > -1 && str[k + 1] != str[q])//如果下一个不同,那么k就变成next[k],注意next[k]是小于k的,无论k取任何值。        {            k = next[k];//往前回溯        }        if (str[k + 1] == str[q])//如果相同,k++        {            k = k + 1;        }        next[q] = k;//这个是把算的k的值(就是相同的最大前缀和最大后缀长)赋给next[q]    }}int KMP(const char *str, int slen, const char *ptr, int plen){    int *next = new int[plen];    cal_next(ptr, next, plen);//计算next数组    int k = -1;    for (int i = 0; i < slen; i++)    {        while (k >-1 && ptr[k + 1] != str[i])//ptr和str不匹配,且k>-1(表示ptr和str有部分匹配)            k = next[k];//往前回溯        if (ptr[k + 1] == str[i])            k = k + 1;        if (k == plen - 1)//说明k移动到ptr的最末端        {            //cout << "在位置" << i-plen+1<< endl;            //k = -1;//重新初始化,寻找下一个            //i = i - plen + 2;//i定位到找到位置处的下一个位置(这里默认存在两个匹配字符串可以部分重叠)            return i - plen + 1;//返回相应的位置        }    }    return -1;}//as the name of the function :store the file into a vectorvoid TextQuery::store_file(ifstream &is){    string textline;    while (getline(is, textline)){        lines_of_text.push_back(textline);    }}int position;//create a map ,the keys are words of every line ,the value is line numbervoid TextQuery::build_map(){    for (line_no line_num = 0; line_num != lines_of_text.size(); line_num++){        istringstream line(lines_of_text[line_num]);        string word;        while (line >> word){            word_map[word].insert(line_num);                    //vector<string>::size_type is not supported (+-*/) operation        }    }}set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const{    /*map < string, set<line_no>>::const_iterator loc = word_map.find(query_word);    if (loc == word_map.end()){        return set<line_no>();    }    else{        return loc->second;    }*/    map< string, set<line_no>>::const_iterator loc = word_map.begin();    for (; loc != word_map.end(); ++loc){        if (-1 != KMP(loc->first.c_str(), loc->first.length(), query_word.c_str(), query_word.length())){            return loc->second;        }    }    return set<line_no>();  //return a void set}//return the string of the line_number string TextQuery::text_line(line_no line) const{    if (line < lines_of_text.size()){        return lines_of_text[line];    }    throw std::out_of_range("line number out of range");}//the first elem is the ifstream,the second elem is the filenameifstream& open_file(ifstream &in, const string &file){    in.close();    in.clear();    in.open(file.c_str());    return in;}//to check the word appeared how many times, if it appeared more than one times ,add "s" to the end of the wordstring make_plural(rsize_t cnt, const string& word, const string& words){    return (cnt == 1) ? word : word+words;}//print the result of text-query void print_results(const set<TextQuery::line_no>& locs, const string& sought, const TextQuery &file){    typedef set<TextQuery::line_no> line_nums;    line_nums::size_type size = locs.size();    cout << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl;    line_nums::const_iterator it = locs.begin();    for (; it != locs.end(); ++it){        cout << "\t(line" << ((*it) + 1)<< ")" << file.text_line(*it) << endl;    }}int main(int argc, char **argv){    ifstream infile;    if (argc < 2 || !open_file(infile, argv[1])){        cerr << "No input file!" << endl;        return EXIT_FAILURE;    }    TextQuery tq;    tq.read_file(infile);    while (true){        cout << "enter word to look for , or q to quit:";        string s;        cin >> s;        if (!cin || s == "q") break;        set<TextQuery::line_no> locs = tq.run_query(s);        print_results(locs, s, tq);    }    return 0;}
原创粉丝点击