txt 转换为 html 解析文本 自动打上标签

来源:互联网 发布:淘宝上开店怎么找货源 编辑:程序博客网 时间:2024/05/21 21:35

输入文本

$1结构符号说明
$1章
$2节
$3小标题
$4过渡小标题
$5具体知识点,实心点
$6具体知识点,空心点
$1其他符号说明
$5\转义
$5\~\~代码段
~int main(void)
{
return 0;
}~
$5\@\@@重点内容,红色@
$5\?\??疑问,蓝色?
$5\`\``旁白,黄色`
$5\#\##暂未完成,绿色#

效果如下:

结构符号说明

      1. 小标题
        1. 过渡小标题
          1. 具体知识点,实心点
            1. 具体知识点,空心点

    其他符号说明

    1. \转义
    2. ~~代码段
      int main(void){return 0;}...
    3. @@重点内容,红色
    4. ??疑问,蓝色
    5. ``旁白,黄色
    6. ##暂未完成,绿色...


    代码:

    #include <iostream>#include <fstream>#include <string>#include <vector>using namespace std;class StrSeg{public:StrSeg(int l,string c):level(l),content(c),pre(NULL),next(NULL),taged(false){}//string的深复制,浅复制问题???int level;bool taged;//存在被反复经过的可能,此标记用于识别是否已处理过string content;//文字内容StrSeg* pre;StrSeg* next;};//要处理的文件string FILE_PATH;//各层有序表的样式string styleRule(int level){switch(level){case 2:return "list-style-type:cjk-ideographic;font-size:20px";case 3:return "list-style-type:upper-alpha;font-size:20px";case 4:return "list-style-type:lower-alpha;font-size:20px";case 5:return "list-style-type:disc;font-family:'kaiti';font-size:15px";case 6:return "list-style-type:circle;font-family:'kaiti';font-size:15px";default:return "list-style-type:error!";}return " ";}//对每一个StrSeg的content进行处理void contentRule(string& orgContent, string symb, string leftStr, string rightStr){//@与@之间是强调内容string res;vector<size_t> foundPoses;foundPoses.push_back(0);int foundPos = orgContent.find_first_of(symb,0);while(foundPos!=std::string::npos){if(orgContent[foundPos-1]!='\\')foundPoses.push_back(foundPos);else{//转义orgContent.erase(foundPos-1,1);foundPos = orgContent.find_first_of(symb,foundPos);continue;}foundPos = orgContent.find_first_of(symb,foundPos+1);}cout<<"symb.size = "<<symb.size()<<"  poses.size = "<<foundPoses.size()<<" "<<orgContent<<endl;if(foundPoses.size()!=1){if(foundPoses.size() % 2 !=1){cout<<"***ERROR!***: contentRule miss match!"<<endl;return;}for(int i=2; i<foundPoses.size(); i+=2){res.append(orgContent.substr(foundPoses[i-2],foundPoses[i-1]-foundPoses[i-2] ));//这里是用substr用以要一个str的append,还是用其他的append呢,哪个效率会高些呢?res.append(leftStr+orgContent.substr(foundPoses[i-1]+1,foundPoses[i]-(foundPoses[i-1]+1))+rightStr);}res.append(orgContent.substr(foundPoses.back()+1,foundPoses.size()-(foundPoses.back()+1)));orgContent = res;}return;}/*有类似深度搜索的方法,基于一个链表*///根据输入建立双向链表StrSeg* createList(){ifstream infile;infile.open(FILE_PATH.c_str());//建头节点string istr;getline(infile,istr,'$');//这里就涉及到编码的问题吧,getline是以二进制读走吗?有处理编码吗?cout<<istr<<endl;getline(infile,istr,'$');cout<<"createList: istr = "<<istr<<endl;StrSeg* head = new StrSeg(istr[0]-'0',"<h1>"+istr.substr(1)+"</h1>");StrSeg* res = head;head->taged = true;while(!infile.eof()){string istr;getline(infile,istr,'$');cout<<"createList: istr = "<<istr<<endl;StrSeg* temp = new StrSeg(istr[0]-'0',istr.substr(1));head->next = temp;temp->pre = head;head = temp;}// cout<<istr<<endl;// cout<<istr[1]<<endl;// cout<<istr.substr(2)<<endl;return res;}//遍历链表,处理各元素中的特殊信息void bypassList(StrSeg* head){cout<<"bypassList***************************************************"<<endl;int count=1;while(true){cout<<count<<": "<<head->content<<endl;if(head->level != 1)contentRule(head->content, "<>", "<", ">");contentRule(head->content, "@", "<span style=\"color:red;\">", "</span>");//@重点内容,红色contentRule(head->content, "`", "<span style=\"color:Chocolate;\">", "</span>");//``旁白,黄色contentRule(head->content, "?", "<span style=\"color:blue;\">", "</span>");//??疑问,蓝色contentRule(head->content, "#", "<span style=\"color:green;\">", "...</span>");//##暂未完成,绿色contentRule(head->content, "~", "<pre name=\"code\" class=\"cpp\">", "...</pre>");//~~代码段if(head->next != NULL){head = head->next;count++;}elsebreak;}cout<<"***************************************************"<<endl;while(true){cout<<count<<": "<<head->content<<endl;if(head->pre != NULL){head = head->pre;count--;}elsebreak;}}//删除节点void deleteNode(StrSeg* curSeg){curSeg->pre->next = curSeg->next;if(curSeg->next != NULL)curSeg->next->pre = curSeg->pre;//这里的next也可能是NULLdelete curSeg;}//深度优先搜索方法,实际就是从链表中 根据元素的level和位置关系 构造树形结构void dfsCompact(StrSeg* curSeg){//后面是null了,包装自己,返回上个节点if(curSeg->next == NULL){//cout<<"org = "<<curSeg->content<<" (eof 自包装)   ";if(!curSeg->taged){curSeg->content = "<li>" + curSeg->content + "</li>";curSeg->taged = true;}cout<<"res = "<<curSeg->content<<endl;return;}//路尽头,加上<li>返回,后面是其他分支了,包装自己,返回上个节点if(curSeg->level > curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (叶节点 自包装)  ";if(!curSeg->taged){curSeg->content = "<li>" + curSeg->content + "</li>";curSeg->taged = true;}cout<<"res = "<<curSeg->content<<endl;return;}//否则继续深入dfsCompact(curSeg->next);//是否与下同级,同级合并,返回if(curSeg->next != NULL && curSeg->level == curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (同级合并)    ";string res;if(curSeg->taged)res = curSeg->content + curSeg->next->content;else{res = "<li>" + curSeg->content + "</li>" + curSeg->next->content;curSeg->taged = true;}curSeg->content = res;cout<<"res = "<<curSeg->content<<endl;deleteNode(curSeg->next);return;}//不同级合并,向下尝试,dfsif(curSeg->next != NULL && curSeg->level < curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (不同级合并)    ";string res;//处理多个$1章的情况if(curSeg->level == 1)res = "<h1>"+curSeg->content+"</h1>"  + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol>";else if(curSeg->taged)res = curSeg->content  + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol>";else{res = "<li>"+curSeg->content  + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol></li>";curSeg->taged = true;}curSeg->content = res;cout<<"res = "<<curSeg->content<<endl;deleteNode(curSeg->next);if(curSeg->next != NULL)dfsCompact(curSeg);return;}}int main(int argc,char *argv[]){FILE_PATH = "txt_format_to_html readme.txt";//argv[1];StrSeg* head = createList();bypassList(head);dfsCompact(head);//bypassList(head);ofstream outfile;outfile.open("./test_result.html");outfile<<"<meta http-equiv=\"Content-Type\" content=\"text/html\"; charset=\"unicode\">";outfile<<head->content;return 0;}//


  • 0 0