txt 转换为 html 解析文本 自动打上标签
来源:互联网 发布:淘宝上开店怎么找货源 编辑:程序博客网 时间:2024/05/21 21:35
输入文本:
$1结构符号说明
$1章
$2节
$3小标题
$4过渡小标题
$5具体知识点,实心点
$6具体知识点,空心点
$1其他符号说明
$5\转义
$5\~\~代码段
~int main(void)
{
return 0;
}~
$5\@\@@重点内容,红色@
$5\?\??疑问,蓝色?
$5\`\``旁白,黄色`
$5\#\##暂未完成,绿色#
效果如下:
结构符号说明
章
- 节
- 小标题
- 过渡小标题
- 具体知识点,实心点
- 具体知识点,空心点
- 具体知识点,实心点
- 过渡小标题
- 小标题
其他符号说明
- \转义
- ~~代码段
int main(void){return 0;}...
- @@重点内容,红色
- ??疑问,蓝色
- ``旁白,黄色
- ##暂未完成,绿色...
代码:
#include <iostream>#include <fstream>#include <string>#include <vector>using namespace std;class StrSeg{public:StrSeg(int l,string c):level(l),content(c),pre(NULL),next(NULL),taged(false){}//string的深复制,浅复制问题???int level;bool taged;//存在被反复经过的可能,此标记用于识别是否已处理过string content;//文字内容StrSeg* pre;StrSeg* next;};//要处理的文件string FILE_PATH;//各层有序表的样式string styleRule(int level){switch(level){case 2:return "list-style-type:cjk-ideographic;font-size:20px";case 3:return "list-style-type:upper-alpha;font-size:20px";case 4:return "list-style-type:lower-alpha;font-size:20px";case 5:return "list-style-type:disc;font-family:'kaiti';font-size:15px";case 6:return "list-style-type:circle;font-family:'kaiti';font-size:15px";default:return "list-style-type:error!";}return " ";}//对每一个StrSeg的content进行处理void contentRule(string& orgContent, string symb, string leftStr, string rightStr){//@与@之间是强调内容string res;vector<size_t> foundPoses;foundPoses.push_back(0);int foundPos = orgContent.find_first_of(symb,0);while(foundPos!=std::string::npos){if(orgContent[foundPos-1]!='\\')foundPoses.push_back(foundPos);else{//转义orgContent.erase(foundPos-1,1);foundPos = orgContent.find_first_of(symb,foundPos);continue;}foundPos = orgContent.find_first_of(symb,foundPos+1);}cout<<"symb.size = "<<symb.size()<<" poses.size = "<<foundPoses.size()<<" "<<orgContent<<endl;if(foundPoses.size()!=1){if(foundPoses.size() % 2 !=1){cout<<"***ERROR!***: contentRule miss match!"<<endl;return;}for(int i=2; i<foundPoses.size(); i+=2){res.append(orgContent.substr(foundPoses[i-2],foundPoses[i-1]-foundPoses[i-2] ));//这里是用substr用以要一个str的append,还是用其他的append呢,哪个效率会高些呢?res.append(leftStr+orgContent.substr(foundPoses[i-1]+1,foundPoses[i]-(foundPoses[i-1]+1))+rightStr);}res.append(orgContent.substr(foundPoses.back()+1,foundPoses.size()-(foundPoses.back()+1)));orgContent = res;}return;}/*有类似深度搜索的方法,基于一个链表*///根据输入建立双向链表StrSeg* createList(){ifstream infile;infile.open(FILE_PATH.c_str());//建头节点string istr;getline(infile,istr,'$');//这里就涉及到编码的问题吧,getline是以二进制读走吗?有处理编码吗?cout<<istr<<endl;getline(infile,istr,'$');cout<<"createList: istr = "<<istr<<endl;StrSeg* head = new StrSeg(istr[0]-'0',"<h1>"+istr.substr(1)+"</h1>");StrSeg* res = head;head->taged = true;while(!infile.eof()){string istr;getline(infile,istr,'$');cout<<"createList: istr = "<<istr<<endl;StrSeg* temp = new StrSeg(istr[0]-'0',istr.substr(1));head->next = temp;temp->pre = head;head = temp;}// cout<<istr<<endl;// cout<<istr[1]<<endl;// cout<<istr.substr(2)<<endl;return res;}//遍历链表,处理各元素中的特殊信息void bypassList(StrSeg* head){cout<<"bypassList***************************************************"<<endl;int count=1;while(true){cout<<count<<": "<<head->content<<endl;if(head->level != 1)contentRule(head->content, "<>", "<", ">");contentRule(head->content, "@", "<span style=\"color:red;\">", "</span>");//@重点内容,红色contentRule(head->content, "`", "<span style=\"color:Chocolate;\">", "</span>");//``旁白,黄色contentRule(head->content, "?", "<span style=\"color:blue;\">", "</span>");//??疑问,蓝色contentRule(head->content, "#", "<span style=\"color:green;\">", "...</span>");//##暂未完成,绿色contentRule(head->content, "~", "<pre name=\"code\" class=\"cpp\">", "...</pre>");//~~代码段if(head->next != NULL){head = head->next;count++;}elsebreak;}cout<<"***************************************************"<<endl;while(true){cout<<count<<": "<<head->content<<endl;if(head->pre != NULL){head = head->pre;count--;}elsebreak;}}//删除节点void deleteNode(StrSeg* curSeg){curSeg->pre->next = curSeg->next;if(curSeg->next != NULL)curSeg->next->pre = curSeg->pre;//这里的next也可能是NULLdelete curSeg;}//深度优先搜索方法,实际就是从链表中 根据元素的level和位置关系 构造树形结构void dfsCompact(StrSeg* curSeg){//后面是null了,包装自己,返回上个节点if(curSeg->next == NULL){//cout<<"org = "<<curSeg->content<<" (eof 自包装) ";if(!curSeg->taged){curSeg->content = "<li>" + curSeg->content + "</li>";curSeg->taged = true;}cout<<"res = "<<curSeg->content<<endl;return;}//路尽头,加上<li>返回,后面是其他分支了,包装自己,返回上个节点if(curSeg->level > curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (叶节点 自包装) ";if(!curSeg->taged){curSeg->content = "<li>" + curSeg->content + "</li>";curSeg->taged = true;}cout<<"res = "<<curSeg->content<<endl;return;}//否则继续深入dfsCompact(curSeg->next);//是否与下同级,同级合并,返回if(curSeg->next != NULL && curSeg->level == curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (同级合并) ";string res;if(curSeg->taged)res = curSeg->content + curSeg->next->content;else{res = "<li>" + curSeg->content + "</li>" + curSeg->next->content;curSeg->taged = true;}curSeg->content = res;cout<<"res = "<<curSeg->content<<endl;deleteNode(curSeg->next);return;}//不同级合并,向下尝试,dfsif(curSeg->next != NULL && curSeg->level < curSeg->next->level){//cout<<"org = "<<curSeg->content<<" (不同级合并) ";string res;//处理多个$1章的情况if(curSeg->level == 1)res = "<h1>"+curSeg->content+"</h1>" + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol>";else if(curSeg->taged)res = curSeg->content + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol>";else{res = "<li>"+curSeg->content + "<ol style=\"" + styleRule(curSeg->next->level) +"\">" + curSeg->next->content + "</ol></li>";curSeg->taged = true;}curSeg->content = res;cout<<"res = "<<curSeg->content<<endl;deleteNode(curSeg->next);if(curSeg->next != NULL)dfsCompact(curSeg);return;}}int main(int argc,char *argv[]){FILE_PATH = "txt_format_to_html readme.txt";//argv[1];StrSeg* head = createList();bypassList(head);dfsCompact(head);//bypassList(head);ofstream outfile;outfile.open("./test_result.html");outfile<<"<meta http-equiv=\"Content-Type\" content=\"text/html\"; charset=\"unicode\">";outfile<<head->content;return 0;}//
0 0
- txt 转换为 html 解析文本 自动打上标签
- JAVA 过滤标签将html内容转换为文本
- JAVA 过滤标签将html内容转换为文本
- [Python]HTML转换为TXT的脚本
- Python 将HTML转换为TXT
- [Python]HTML转换为TXT的脚本
- 【HTML】让<pre>标签文本自动换行
- html pre标签使文本自动换行
- selenium webdriver获取不到标签文本及将html特殊符号&转换为正常字符的方法
- html标签转化为纯文本格式
- 文本添加格式转换为html类!
- 文本转换为HTML文字编码
- PHP html 转换为纯文本
- js将html转换为纯文本
- php转换html格式为文本格式
- Unity解析Txt文本
- 将lrc批量转换为txt,同时删除时间标签
- word文档转换为PDF、jpg、HTML、txt、swf
- /sbin/insserv: No such file or directory
- poj 1015 动态规划 难
- 基于Linux的USB子系统学习 --- <基础知识与USB协议概述> ing
- dos命令总结大全
- 黑马程序员——Java集合框架—Set
- txt 转换为 html 解析文本 自动打上标签
- 双链表的基本操作
- 谬论之程序员的眼光看世界
- Java-Map集合
- HDU 1114 (dp 完全背包)
- poj 1273Drainage Ditches(简单网络流(用dinic))
- throw er; // Unhandled 'error' event
- 云计算平台管理的三大利器Nagios、Ganglia和Splunk
- Fiddler 教程