文本查询程序--继承版与非继承版

来源:互联网 发布:vc mfc编程实例五子棋 编辑:程序博客网 时间:2024/06/18 04:44

注:此内容源于对c++primer第五版第十五章文本查询程序的思考。

1.非继承版本

看完这一节内容之后,我的想法是,增加这三个额外的功能之后,如果不使用继承我该如何完成这个程序?其实只需要在QueryResult类中再添三个友员,功能分别是逻辑与、逻辑或、非,为了让程序自动控制我们的查询命令的组合次序(如,“hair” | "her" & "Alice",其运算次序应该是“hair” |("her" & "Alice")),我们可以重载这三个运算符& | ~,我们知道重载运算符是不会改变运算符的优先级,结合律的,这就能解决这个问题。至于为什么不是&& || !,原因是前两个运算符最好不好重载,所以这里用 & | ~ 代表与或非。当然我们完全可以定义三个命名函数来表示这三个运算符,但这样我们就得手动控制运算符的优先级了,而且函数还不能写成“hair” | "her" & "Alice"这样的连写形式,既不直观,操作还麻烦。废话不多说,看看下面这个例子吧:

// 文本查询程序再探_非继承.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <map>#include <set>#include<iostream>#include <string>#include<vector>#include<memory>#include <fstream>#include <sstream>#include <algorithm>using namespace std;class StrBlob;class StrBlobPtr{//相当于StrBlob的迭代器public :StrBlobPtr();StrBlobPtr(StrBlob &a,size_t sz=0);string &deref()const;StrBlobPtr &incr();bool operator !=(StrBlobPtr &sbp){ return curr!=sbp.curr;}//成员函数中可以使用私有属性private:weak_ptr<vector<string>> wptr;size_t curr;shared_ptr<vector<string>> check(size_t i,const string &msg) const;};class StrBlob{friend class StrBlobPtr;public:typedef vector<string>::size_type size_type;StrBlob():data(make_shared<vector<string>>()){}StrBlob(const string *s1,const string *s2):data(make_shared<vector<string>>(s1,s2)){}size_type size() const{return data->size();}bool empty() const{return data->empty();}void push_back(const string &t){data->push_back(t);}void pop_back(){ check(0,"pop_back on empty strblob");data->pop_back();}string &get(size_t i){check(i,"out of range");return (*data)[i];}string &front()//此处返回引用之所以是对的是因为,返回的内容是在堆里的内容,在函数退出时,其内存不能被销毁{ check(0,"front on empty strblob");return data->front();}string &front () const{ check(0,"front on empty strblob");return data->front();}string &back(){ check(0,"back on empty strblob");return data->back();}string &back() const{check(0,"back on empty strblob");return data->back();}StrBlobPtr begin(){return StrBlobPtr(*this);}StrBlobPtr end(){return StrBlobPtr(*this,this->size());}private:shared_ptr<vector<string>> data;void check(size_type i,const string &msg) const{if(i>=data->size()) throw out_of_range(msg);}};StrBlobPtr::StrBlobPtr():curr(0){}StrBlobPtr::StrBlobPtr(StrBlob &a,size_t sz):wptr(a.data),curr(sz){}string &StrBlobPtr::deref()const{shared_ptr<vector<string>> p=  check(curr,"dereference past end");return (*p)[curr];}StrBlobPtr &StrBlobPtr::incr(){check(curr,"increment past of strblobptr");++curr;return *this;}shared_ptr<vector<string>> StrBlobPtr:: check(size_t i,const string &msg) const{shared_ptr<vector<string>> ret=wptr.lock();if(!ret) {cout<<"-------------------------"<<endl;throw runtime_error("unbound_strblobptr");}if(i>=ret->size()){throw out_of_range(msg);}return ret;}class QueryResult;class TextQuery{public:TextQuery(ifstream &fin):src(make_shared<StrBlob>()){string line;while(getline(fin,line)){src->push_back(line);istringstream istream(line);string word;while(istream>>word){        //cout<<word<<endl;string qword;for(size_t i=0;i<word.size();++i) {if(isalpha(word[i])) qword.push_back(word[i]); }shared_ptr<set<size_t>> &tset=m[qword];//注意:若word是第一次出现,会返回空指针if(!tset){tset=make_shared<set<size_t>>();}tset->insert(src->size()-1);// for(size_t i:tset) cout<<i<<" ";// cout<<endl;}}}QueryResult query(const string &word) const;private:map<string,shared_ptr<set<size_t>>> m;shared_ptr<StrBlob> src;};class QueryResult{friend QueryResult operator |(const QueryResult &lhs,const QueryResult &rhs);friend QueryResult operator &(const QueryResult &lhs,const QueryResult &rhs);friend QueryResult operator ~(const QueryResult &qr);friend ostream& operator<<(ostream &os,const QueryResult qr);public:QueryResult(){}QueryResult(const QueryResult &q):word(q.word),sps(q.sps),spv(q.spv){}QueryResult(const string &word,shared_ptr<set<size_t>> sps,shared_ptr<StrBlob> spv):word(word),sps(sps),spv(spv){} private:string word;shared_ptr<set<size_t>> sps;shared_ptr<StrBlob> spv;};ostream& operator<<(ostream &os,const QueryResult qr){os<<qr.word<<" occurs "<<qr.sps->size()<<(qr.sps->size()>1? " times":" time")<<endl;for(size_t i : *(qr.sps)){cout<<"\t(line "<<i+1<<") "<<qr.spv->get(i)<<endl;}return os;}QueryResult operator |(const QueryResult &lhs,const QueryResult &rhs){string word="("+lhs.word+" | "+rhs.word+")";shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>(lhs.sps->begin(),lhs.sps->end());line_no->insert(rhs.sps->begin(),rhs.sps->end());return  QueryResult(word,line_no,lhs.spv);}QueryResult operator &(const QueryResult &lhs,const QueryResult &rhs){string word="("+lhs.word+" & "+rhs.word+")";shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>();set_intersection(lhs.sps->begin(),lhs.sps->end(),rhs.sps->begin(),rhs.sps->end(),inserter(*line_no,line_no->begin()));return  QueryResult(word,line_no,lhs.spv);}QueryResult operator ~(const QueryResult &qr){string word="~("+qr.word+")";shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>();size_t src_size=qr.spv->size();auto start=qr.sps->begin(),end=qr.sps->end();for(size_t i=0;i<src_size;++i){if(start==end || i!=*start){//此处巧妙的使用了逻辑或的短路line_no->insert(i);}else if(start!=end){++start;}}return  QueryResult(word,line_no,qr.spv);}QueryResult TextQuery::query(const string &word) const{map<string,shared_ptr<set<size_t>>>::const_iterator iter=m.find(word);if(iter==m.cend()){return QueryResult(word,make_shared<set<size_t>>(),src);}else{return QueryResult(word,iter->second,src);}}int _tmain(int argc, _TCHAR* argv[]){ifstream fin("data.in");if(fin){TextQuery tQery(fin);fin.close();//此处无需显示调用,销毁fin对象时会调用    cout<<(tQery.query("fiery")&tQery.query("bird")|~tQery.query("hair")|tQery.query("wind"));}else{cout<<"操作失败"<<endl;}return 0;}

测试数据:

Alice Emma has long flowing red hair.Her Daddy says when the wind blows through her hair, it looks almost alive, like a fiery bird in flight. A beautiful fiery bird, he tells her, magical but untamed. "Daddy, shush, there is no such thing," she tells him, at the same time wanting him to tell her more.Shyly, she asks, "I mean, Daddy, is there?"


2.继承版本

关于继承版本,本人尚且对这个设计不是很理解。先写下一些粗俗的理解。

第一,书中声明本例的目的的学习继承,所以不过多的搭建查询命令,如:我们不让用户输入 hair & bird,只是学习者自己将其写成Query("hair") & Query("bird")的形式。所以这个继承版本的测试命令不是从控制台输入的。如果想要从命令行输入命令,我的想法是,写一个解析命令的工具类,让其进行优先级的解析,生成相应的查询对象,也就是,输入命令字符串,返回查询对象,有点像数据结构里表达式求值的实现。下面的实现我没有实现这个功能,当然,也许还有更好的方法实现这个之。

第二,继承体系的设计。咋一想,觉得直接从TextQuery中派生出  与或非查询  就够了,但是考虑到TextQuery并没有一个接受查询单词的构造函数,那么对于“hair” & "bird" | "bed" 这样的命令,我们无法实现TextQuery("hair") & TextQuery("bird") | TextQuery("bed")这样功能,只能使用解析命令的工具类和函数调用的形式来完成混合查询,无限的增加了程序的复杂度。可能有的人会想,直接往TextQuery中添加一个这样的构造函数不就行了吗?我觉得现实生活中往往是不允许这样做的,因为TextQuery这个类或者说这个模块可能是团队里的其他成员完成的,对于这些东西,我觉得我们应该不能随便去改动吧,可能导致牵一发动全身的麻烦。那么,如果允许我们修改,我们该如何做呢?其实,即使允许,也不仅仅是添加一个构造函数那么简单,请看下面是我的设计思路:

class TextQuery{friend TextQuery operator &(const TextQuery &lhs,const TextQuery &rhs);friend TextQuery operator |(const TextQuery &lhs,const TextQuery &rhs);friend TextQuery operator ~(const TextQuery tq);public:TextQuery(const map<string,shared_ptr<set<size_t>>> &m,const vector<string> *src,const string &word);virtual QueryResult query() const;//此处无需参数,因为函数中并不需要这个参数protected:map<string,shared_ptr<set<size_t>>> m;//单词和单词出现的行vector<string> *src;//存储文件的每一行数据string qword;//查询的单词};class NotQuery:public TextQuery{friend TextQuery operator ~(const TextQuery tq);private:NotQuery(const string &,const TextQuery *);TextQuery *tq;    QueryResult query() const//此处无需参数,因为函数中并不需要这个参数{1.先调用tq的query:QueryResult qresult=tq->query();    2.对qresult求非3.返回结果}};class AndQuery:public TextQuery{friend TextQuery operator&(const TextQuery &lhs,const TextQuery &rhs);private:AndQuery(const string &,const string &,const TextQuery *);TextQuery *tq;QueryResult query() const//此处无需参数,因为函数中并不需要这个参数{1.先调用两次tq的query: QueryResult qresult1=tq->query();     QueryResult qresult2=tq->query();2.对qresult1和qresult2求交集3.返回结果}};class OrQuery:public TextQuery{friend TextQuery operator |(const TextQuery &lhs,const TextQuery &rhs);private:OrQuery(const string &,const string &,const TextQuery *);TextQuery *tq1,*tq2;QueryResult query() const//此处无需参数,因为函数中并不需要这个参数{1.先调用两次tq的query:QueryResult qresult1=tq->query();    QueryResult qresult2=tq->query();2.对qresult1和qresult2求交集3.返回结果}};int _tmain(int argc, _TCHAR* argv[]){    // 调用形式如下: 其中m,src,两个变量需要工具类去解析,这里不累赘TextQuery q= TextQuery(m,src,"hair") & TextQuery(m,src,"bird") | TextQuery(m,src,"bed");return 0;}

从中可以看出,如果希望这样做将付出巨大的代价,我们原来的TextQuery类被改的面目全非了。那若是我们不改动TextQuery,我们该怎么设计呢?根据继承体系中两个非常重要的原理:有一个原理和有一种原理,我们推出,查询一个单词是查询的一种,与查询也是查询的一种,同样非、或查询也是查询的一种,所以,我们把“查询”抽象出来,做为一个基类,其他各种查询作为其派生类,其他四种查询之间是兄弟关系,这就好比那个常见的问题:设计三角形、矩形、球之间的继承体系,在这之中,我们把形状抽象出来作为基类,而在此处也是一个道理。所以课本中类的设计方法。不过,在此之前我们先看一下在不改动TextQery的情况下,我们的常规思维吧(为了简化程序,说明原理,下面这个设计只设计逻辑与的功能):

class TextQuery{//这个类和以往的一样public:TextQuery(ifstream &fin):src(make_shared<StrBlob>()){....}QueryResult query(const string &word) const;private:map<string,shared_ptr<set<size_t>>> m;shared_ptr<StrBlob> src;};class Query_Base{public://必须是public,否则用户代码无法访问,且AndQuery 中的两个指针成员也无法访问基类的query和express方法。virtual QueryResult query(const TextQuery &tq) const=0;virtual string express() const=0;//这个方法的具体实现和书本中的一致,这里不多补充virtual ~Query_Base();};class WordQuery:public Query_Base{//这个类执行基础查询public:WordQuery(const string &);private:string qword;QueryResult query(const TextQuery &tq) const{return tq.query(qword);};string express() const;//这个方法的具体实现和书本中的一致,这里不多补充};class AndQuery:public Query_Base{friend Query_Base operator &(const Query_Base &qb,const Query_Base &qb);//此处将出现严重错误,因为我们根本就没法创建Query_Base类对象,这是抽象基类的性质。private:AndQuery( Query_Base *_qb1, Query_Base *_qb2):qb1(_qb1),qb2(_qb2){};Query_Base *qb1,*qb2;QueryResult query(const TextQuery &tq) const//此处无需参数,因为函数中并不需要这个参数{1.先调用两次tq的query:    QueryResult qresult1=qb1->query(tq);    QueryResult qresult2=qb2->query(tq);2.对qresult1和qresult2求交集3.返回结果}};
在这个设计思路中,我们会发现j两个问题,首先,抽象基类根本就不能不能创建对象,这将导致两个问题,用户层次的代码如何使用我们基类进行查询?类设计者在组合查询Query_Base("hair") & Query_Base("Alice") & Query_Base("bird")如何返回Query_Base对象?其次,即使我们可以返回Query_Base对象,但是从我们对&运算符重载的声明形式中可以看出,返回值会被切割,只留下父类部分。所以,对于&运算符,我们应该返回这样一个对象:这个对象中含有指向父类的指针或引用。那么我们这个思路是不可行的,于是就有了书中的设计方法,引入一个接口,这样不紧解决了上面的几个问题,还使得继承体系对于用户来说完全透明(本人对于实现完全透明的好处没什么直接认识,但经常能听到说“使得XXX对于用户来说是透明的”,可见这应该是个好处,相信以后还会碰到的),设计思路就不说了,综合上面的几个想法以及书中的讲解就能大概理解了,下面是实现代码:


// 文本查询程序_继承.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <map>#include <set>#include<iostream>#include <string>#include<vector>#include<memory>#include <fstream>#include <sstream>#include <algorithm>using namespace std;class StrBlob;class StrBlobPtr{//相当于StrBlob的迭代器public :StrBlobPtr();StrBlobPtr(StrBlob &a,size_t sz=0);string &deref()const;StrBlobPtr &incr();bool operator !=(StrBlobPtr &sbp){ return curr!=sbp.curr;}//成员函数中可以使用私有属性private:weak_ptr<vector<string>> wptr;size_t curr;shared_ptr<vector<string>> check(size_t i,const string &msg) const;};class StrBlob{friend class StrBlobPtr;public:typedef vector<string>::size_type size_type;StrBlob():data(make_shared<vector<string>>()){}StrBlob(const string *s1,const string *s2):data(make_shared<vector<string>>(s1,s2)){}size_type size() const{return data->size();}bool empty() const{return data->empty();}void push_back(const string &t){data->push_back(t);}void pop_back(){ check(0,"pop_back on empty strblob");data->pop_back();}string &get(size_t i){check(i,"out of range");return (*data)[i];}string &front()//此处返回引用之所以是对的是因为,返回的内容是在堆里的内容,在函数退出时,其内存不能被销毁{ check(0,"front on empty strblob");return data->front();}string &front () const{ check(0,"front on empty strblob");return data->front();}string &back(){ check(0,"back on empty strblob");return data->back();}string &back() const{check(0,"back on empty strblob");return data->back();}StrBlobPtr begin(){return StrBlobPtr(*this);}StrBlobPtr end(){return StrBlobPtr(*this,this->size());}private:shared_ptr<vector<string>> data;void check(size_type i,const string &msg) const{if(i>=data->size()) throw out_of_range(msg);}};StrBlobPtr::StrBlobPtr():curr(0){}StrBlobPtr::StrBlobPtr(StrBlob &a,size_t sz):wptr(a.data),curr(sz){}string &StrBlobPtr::deref()const{shared_ptr<vector<string>> p=  check(curr,"dereference past end");return (*p)[curr];}StrBlobPtr &StrBlobPtr::incr(){check(curr,"increment past of strblobptr");++curr;return *this;}shared_ptr<vector<string>> StrBlobPtr:: check(size_t i,const string &msg) const{shared_ptr<vector<string>> ret=wptr.lock();if(!ret) {cout<<"-------------------------"<<endl;throw runtime_error("unbound_strblobptr");}if(i>=ret->size()){throw out_of_range(msg);}return ret;}class QueryResult;class TextQuery{public:TextQuery(ifstream &fin):src(make_shared<StrBlob>()){string line;while(getline(fin,line)){src->push_back(line);istringstream istream(line);string word;while(istream>>word){//cout<<word<<endl;string qword;for(size_t i=0;i<word.size();++i) {if(isalpha(word[i])) qword.push_back(word[i]); }shared_ptr<set<size_t>> &tset=m[qword];//注意:若word是第一次出现,会返回空指针if(!tset){tset=make_shared<set<size_t>>();}tset->insert(src->size()-1);// for(size_t i:tset) cout<<i<<" ";// cout<<endl;}}}QueryResult query(const string &word) const;private:map<string,shared_ptr<set<size_t>>> m;shared_ptr<StrBlob> src;};class QueryResult{friend ostream& operator<<(ostream &os,const QueryResult qr);public:QueryResult(){}QueryResult(const QueryResult &q):word(q.word),sps(q.sps),spv(q.spv){}QueryResult(const string &word,shared_ptr<set<size_t>> sps,shared_ptr<StrBlob> spv):word(word),sps(sps),spv(spv){}set<size_t>::iterator begin() const{return sps->begin();}set<size_t>::iterator end() const{return sps->end();}shared_ptr<StrBlob> get_file() const{        return spv;}private:string word;shared_ptr<set<size_t>> sps;shared_ptr<StrBlob> spv;};ostream& operator<<(ostream &os,const QueryResult qr){os<<qr.word<<" occurs "<<qr.sps->size()<<(qr.sps->size()>1? " times":" time")<<endl;for(size_t i : *(qr.sps)){cout<<"\t(line "<<i+1<<") "<<qr.spv->get(i)<<endl;}return os;}QueryResult TextQuery::query(const string &word) const{map<string,shared_ptr<set<size_t>>>::const_iterator iter=m.find(word);if(iter==m.cend()){return QueryResult(word,make_shared<set<size_t>>(),src);}else{return QueryResult(word,iter->second,src);}}class Query_Base{friend class Query;private:virtual std::string express() const=0;//这个函数的功能就是返回查询表达式:如((hair & bird) | her)virtual QueryResult result(const TextQuery &tq) const=0;protected:virtual ~Query_Base(){}};class WordQuery:public Query_Base{friend class Query;private:WordQuery(const string &word):qword(word){}string express() const{return qword;}QueryResult result(const TextQuery &tq) const{return tq.query(qword);}string qword;};class Query{//该接口隐藏了继承体系,用户觉察不到继承。    friend Query operator &(const Query &lhs,const Query &rhs);friend Query operator |(const Query &lhs,const Query &rhs);    friend Query operator ~(const Query q);public:Query(const string &word):sqb(make_shared<WordQuery>(WordQuery(word))){}std::string express() const{return sqb->express();}QueryResult result(const TextQuery &tq) const{return sqb->result(tq);}private:Query(shared_ptr<Query_Base> t_sqb):sqb(t_sqb){}shared_ptr<Query_Base> sqb;//指向基类指针};class NotQuery:public Query_Base{friend Query operator ~(const Query q);private:NotQuery(const Query &t_q):q(t_q){}string express() const{return "~("+q.express()+")";}QueryResult result(const TextQuery &tq) const{shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>();const QueryResult &qr=q.result(tq);size_t src_size=(*qr.get_file()).size();auto start=qr.begin(),end=qr.end();for(size_t i=0;i<src_size;++i){if(start==end || i!=*start){//此处巧妙的使用了逻辑或的短路line_no->insert(i);}else if(start!=end){++start;}}return  QueryResult(express(),line_no,qr.get_file());}Query q;};class BinaryQuery:public Query_Base{protected:BinaryQuery(const Query &left,const Query &right,string op):lhs(left),rhs(right),ops(op){}string express() const{return "("+lhs.express()+ops+rhs.express()+")";}Query lhs,rhs;string ops;};class AndQuery:public BinaryQuery{ friend Query operator &(const Query &lhs,const Query &rhs);private:AndQuery(const Query &l,const Query &r):BinaryQuery(l,r,"&"){}QueryResult result(const TextQuery &tq) const{ const QueryResult &letf_qr=lhs.result(tq); const QueryResult &right_qr=rhs.result(tq); shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>(); set_intersection(letf_qr.begin(),letf_qr.end(),right_qr.begin(),right_qr.end(),inserter(*line_no,line_no->begin())); return  QueryResult(express(),line_no,right_qr.get_file());}};class OrQuery:public BinaryQuery{friend Query operator |(const Query &lhs,const Query &rhs);private:OrQuery(const Query &l,const Query &r):BinaryQuery(l,r,"|"){}QueryResult result(const TextQuery &tq) const{const QueryResult &letf_qr=lhs.result(tq);const QueryResult &right_qr=rhs.result(tq);shared_ptr<set<size_t>> line_no=make_shared<set<size_t>>(letf_qr.begin(),letf_qr.end());line_no->insert(right_qr.begin(),right_qr.end());return  QueryResult(express(),line_no,right_qr.get_file());}};Query operator &(const Query &lhs,const Query &rhs){return Query(make_shared<AndQuery>(AndQuery(lhs,rhs)));}Query operator |(const Query &lhs,const Query &rhs){return Query(make_shared<OrQuery>(OrQuery(lhs,rhs)));}Query operator ~(const Query q){return Query(make_shared<NotQuery>(NotQuery(q)));}int _tmain(int argc, _TCHAR* argv[]){ifstream fin("data.in");if(fin){TextQuery tQery(fin);fin.close();    Query q=Query("hair") | Query("Alice")|~Query("bird");cout<<q.express()<<endl;cout<<q.result(tQery);}else{cout<<"操作失败"<<endl;}return 0;}

这应该可以是一个继承体系的设计模板了,这个模板普遍适应设计像三角形、矩形、球的体系结构的这类问题




0 0