验证HTML 是否是合法的HTML 运行在ubuntu下面eclipse的c++工程

来源:互联网 发布:天书残卷圣物进阶数据 编辑:程序博客网 时间:2024/04/29 16:06

  能判断标签的对称(栈) 以及嵌套关系(树)


HtmlDoc.h


#ifndef HTMLDOC_H_#define HTMLDOC_H_#include<iostream>#include<string>#include"list.h"#include"stack.h"#include<stdio.h>#include<stdlib.h>using namespace std;struct TreeNode{ string nodeName;//node name List<TreeNode*> *childList;//child list node};class HtmlDocument {private:TreeNode *treeRoot;//html节点  对象Stack<TreeNode*>* stack;bool checkAndPushStack(string headNodeName);///遍历树看是否合法如果合法放进栈里bool checkStackTop(string tailNodeName);///和栈顶元素比较如果相等就移除栈顶元素,否则返回falseTreeNode * traverseTree(TreeNode* treeNode, string nodeName);//遍历整个树结构public:HtmlDocument();~HtmlDocument();void release(TreeNode*);bool isLegal(string path);};#endif /* HTMLDOC_H_ */
HtmlDoc.cpp


#include "HtmlDoc.h"HtmlDocument::HtmlDocument(){this->stack = new Stack<TreeNode*>();//建一个存放树节点的栈    //---------以下代码是够造一个树结构 目地--------------------//TreeNode *html = new TreeNode();this->treeRoot = html;html->nodeName = "html";List<TreeNode*> *htmlChildList= new List<TreeNode*>();html->childList = htmlChildList;TreeNode *head = new TreeNode();head->nodeName = "head";List<TreeNode*> *headChildList= new List<TreeNode*>();head->childList = headChildList;htmlChildList->add(head);TreeNode *body = new TreeNode();body->nodeName = "body";List<TreeNode*> *bodyChildList= new List<TreeNode*>();body->childList = bodyChildList;htmlChildList->add(body);TreeNode *title = new TreeNode();title->nodeName = "title";List<TreeNode*> *titleChildList= new List<TreeNode*>();title->childList = titleChildList;headChildList->add(title);TreeNode *h1 = new TreeNode();h1->nodeName = "h1";h1->childList = NULL;bodyChildList->add(h1);TreeNode *p = new TreeNode();p->nodeName = "p";p->childList = NULL;bodyChildList->add(p);TreeNode *center = new TreeNode();center->nodeName = "center";List<TreeNode*> *centerChildList= new List<TreeNode*>();center->childList = centerChildList;bodyChildList->add(center);//leftTreeNode *left = new TreeNode();left->nodeName = "left";left->childList = NULL;bodyChildList->add(left);TreeNode *form = new TreeNode();form->nodeName = "form";form->childList = new List<TreeNode*>();bodyChildList->add(form);//leftTreeNode *table = new TreeNode();table->nodeName = "table";List<TreeNode*> *tableChildList= new List<TreeNode*>();table->childList = tableChildList;bodyChildList->add(table);//leftTreeNode *tr = new TreeNode();tr->nodeName = "tr";List<TreeNode*> *trChildList= new List<TreeNode*>();tr->childList = trChildList;tableChildList->add(tr);//leftTreeNode *td = new TreeNode();td->nodeName = "td";td->childList = new List<TreeNode*>();trChildList->add(td);//left}HtmlDocument::~HtmlDocument(){if(this->stack!=NULL){delete stack;}release(this->treeRoot);}/** * 释放空间 */void HtmlDocument::release(TreeNode* tn){if(tn == NULL || tn->childList == NULL || tn->childList->getLength() == 0){delete tn;return;}for(int i =0; i < tn->childList->getLength(); i++){release(tn->childList->getValue(i));}}/** * 判断html文件内容是否合法 */bool HtmlDocument::isLegal(string path){FILE * sfp;int ti = 0;int li = 0;if((sfp=fopen(path.c_str(),"r"))==NULL)        /*以只读方式打开*/{      printf("HTML File cannot be opened\n");      exit(1);    }char buff[50];char str;//读取头标签的内容,存入缓冲区    while(!feof(sfp))                        /*判断是否文件尾,不是则循环*/    {      str = fgetc(sfp);                 /*读出数据送缓冲区*/      if(str == '<' || ti != 0){      if(str=='/'){      ti = 0;      //continue;      }else{      buff[ti] = str;      ti++;      }      }      if(str == '>' && ti !=0 ){      buff[ti] = str;      char *cNode;      cNode = new char[ti];      for(int j =1;j<ti-1;j++){      cNode[j-1] = buff[j];      }      string hTab(cNode);      delete cNode;//      cout << "头标签:"<<hTab << endl;      //------------------------去遍历树返------------------------//      if(!this->checkAndPushStack(hTab)){      fclose(sfp);      ///cout << "head" << endl;      return false;      }      ti=0;      }      //---------------------------取尾节点------------------    if(str == '<' || li != 0){    if(str != '/' && li == 1){//不是尾标签,是头标签    li=0;}else{buff[li] = str;li++;}  }  if(str == '>' && li != 0){  buff[li] = str;  char *cNode;  cNode = new char[li-1];  for(int j =2;j<li-1;j++){  cNode[j-2] = buff[j];  }  string tTab(cNode);  delete cNode;  //cout << "尾标签:"<<tTab << endl;  //--------------去查寻栈看是否合法合法的话移除栈顶的值---------------// if(!this->checkStackTop(tTab)){ fclose(sfp); return false; }  li=0;  }}    fclose(sfp);return true;}//检查传入节点名字 是否是栈顶的孩子节点 如果是的话返回true else falsebool HtmlDocument::checkAndPushStack(string headNodeName){TreeNode* stackTopNode = this->stack->getTop();//cout << count++ << ":"<<headNodeName<<"父节点:"<< (stackTopNode == NULL?"NULL":stackTopNode->nodeName)<<endl;if(stackTopNode == NULL){TreeNode* node = this->traverseTree(this->treeRoot, headNodeName);if(node!=NULL){this->stack->push(node);}return true;}else{cout << "TopStack:" <<stackTopNode->nodeName<< stackTopNode->childList->getLength() <<headNodeName<<endl;TreeNode* node = this->traverseTree(stackTopNode, headNodeName);if(node==NULL){return false;}else{this->stack->push(node);return true;}}return false;}//遍历整个树结构根据给定的父节点和孩子节点的名字获取孩子节点TreeNode* HtmlDocument::traverseTree(TreeNode* treeNode, string nodeName){//html   headif(treeNode == NULL){return NULL;}if(treeNode->nodeName.compare(nodeName) == 0){return treeNode;}if(treeNode->childList==NULL || treeNode->childList->getLength() == 0){return NULL;}//cout << "OOOOOOOOOOO" << endl;//cout <<treeNode->childList->getValue(0)->nodeName<<"---"<<treeNode->nodeName << treeNode->childList->getValue(1)->nodeName<<endl;for(int i =0; i < treeNode->childList->getLength(); i++){if(treeNode->childList->getValue(i)->nodeName.compare(nodeName) == 0){cout << "找到节点了:" <<treeNode->childList->getValue(i)->nodeName<<"---" << nodeName<<endl;return treeNode->childList->getValue(i);}cout << treeNode->childList->getValue(i)->nodeName<<"不能和"<<nodeName<<"匹配"<<endl;this->traverseTree(treeNode->childList->getValue(i), nodeName);}return NULL;}//和栈顶元素比较如果相等就移除栈顶元素,否则返回falsebool HtmlDocument::checkStackTop(string tialNodeName){cout << this->stack->getTop()->nodeName<<"‘’‘’‘’" << tialNodeName << endl;if(this->stack->getTop()->nodeName.compare(tialNodeName)==0){cout <<"POP:" <<this->stack->getTop()->nodeName<<endl;this->stack->pop();return true;}return false;}

list.h

#ifndef list_HH#define list_HH#include <iostream>#include<stdlib.h>/*author*/using namespace std;template <typename type>struct Node{    type value;    Node *next;};template <typename type>class List{private:    int len;    Node<type> *head;public:    List();    ~List();    bool add(type);    bool remove(int index);    type getValue(int index);    int getIndex(type value);    int getLength();    void traverse();};template <typename type>List<type>::List(){    head = (new Node<type>());    len = -1;    head->next = NULL;    //head->value = NULL;}template <typename type>bool List<type>::add(type value){    try{        if(len == -1)        {            head->value = value;            head->next = NULL;            len = 0;            return true;        }Node<type> *temp = head;while(temp->next != NULL){temp = temp->next;}Node<type> *node = new Node<type>();node->value = value;node->next = NULL;temp->next = node;++len;return true;    }catch(...){}    return false;}template <typename type>bool List<type>::remove(int index){    if(index > len || index < 0)    {        return false;    }    Node<type> *aTemp = head;    Node<type> *pTemp = NULL;    int count = 0;    while(count <= len)    {        if(count == index && index == 0)        {            head =aTemp->next;            free(aTemp);            aTemp = NULL;            len--;            break;        }        if(count == index)        {            pTemp->next = aTemp->next;            free(aTemp);            len--;            aTemp = NULL;            break;        }        pTemp = aTemp;        aTemp = aTemp->next;        count++;    }    return true;}template <typename type>int List<type>::getLength(){    return len+1;}template <typename type>type List<type>::getValue(int index){    if(index < 0 || index > len)    {        return NULL;    }    Node<type> * temp = head;    int count = 0;    while(count <= len)    {        if(count == index)        {            return temp->value;        }        temp = temp->next;        count++;    }    return NULL;}template <typename type>int List<type>::getIndex(type value){    if(len == -1)    {        return -1;    }    Node<type> *temp = head;    int count = 0;    while(count <= len)    {        type v = temp->value;        if(v == value)        {            return count;        }        temp = temp->next;        count++;    }    return -1;}template <typename type>void List<type>::traverse(){    if(len == -1)    {        return;    }    Node<type> *temp = head;    int count = 0;    while(count <= len)    {        cout << temp->value << ", ";        temp = temp->next;        ++count;    }    cout << endl;}template <typename type>List<type>::~List(){    Node<type> *temp = head;    Node<type> *temp1 = NULL;    int count = 0;    while(count <= len)    {        temp1 = temp;        if(temp1 != NULL)        {            free(temp1);            temp1 = NULL;        }        temp = temp->next;        count++;    }}#endif

stack.h

#ifndef STACK_HH#define STACK_HH#include<iostream>#include<stdlib.h>using namespace std;template <typename type>struct StackNode{    type value;    StackNode *next;    StackNode *previous;};template <typename type>class Stack{private:    int len;    StackNode<type> *top;    StackNode<type> *head;    StackNode<type> *tail;    bool isReverse;public:    Stack();    ~Stack();    bool isEmpty();    bool push(type value);    type pop();    type getTop();    bool reverse();    bool isReversed();    int getLength();};template <typename type>Stack<type>::Stack(){    top = NULL;    len = -1;    isReverse = false;}template <typename type>bool Stack<type>::isEmpty(){    if(this->len == -1 || this->top == NULL)    {        return true;    }    return false;}template <typename type>bool Stack<type>::push(type value){    StackNode<type> *vNode = new StackNode<type>();    vNode->value = value;    if(vNode == NULL)    {        return false;    }    if(len == -1 || this->top == NULL)    {        top = vNode;        top->next = NULL;        top->previous = NULL;        this->head = vNode;        this->tail = vNode;        this->len++;        return true;    }    //if the stack was reversed    if(this->isReverse)    {        vNode->previous = this->tail;        this->tail->next = vNode;        this->tail = vNode;        this->top = this->tail;    }    else    {        vNode->next = this->head;        this->head ->previous = vNode;        this->head = vNode;        this->top = this->head;    }    this->len++;    return true;}template <typename type>type Stack<type>::pop(){    if(len == -1 || this->top == NULL)    {        return NULL;    }    type temp = this->top->value;    StackNode<type> *nTemp;    if(this->isReverse)    {        nTemp = this->top;        this->tail = this->top->previous;        this->top = this->tail;        free(nTemp);        nTemp = NULL;    }    else    {        nTemp = this->top;        this->head = this->top->next;        this->top = this->head;        free(nTemp);        nTemp = NULL;    }    this->len--;    return temp;}template <typename type>type Stack<type>::getTop(){if(len == -1 || this->top == NULL){return NULL;}return this->top->value;}template <typename type>bool Stack<type>::reverse(){    try{    if(this->len == -1)    {        return false;    }    if(this->isReverse)    {        this->top = this->head;        this->isReverse = false;    }    else    {        this->top = this->tail;        this->isReverse = true;    }    return true;    }catch(...){        return false;    }}template <typename type>bool Stack<type>::isReversed(){    return this->isReverse;}template <typename type>int Stack<type>::getLength(){    return this->len+1;}template <typename type>Stack<type>::~Stack(){    while(this->pop()) {}}#endif

Test.cpp

#include <iostream>#include "HtmlDoc.h"using namespace std;int main(){HtmlDocument *hd = new HtmlDocument();cout << "HTML 是否合法:"<< (hd->isLegal("/home/long/Desktop/html.html")?"此html文档合法":"此html文档不合法") <<endl;delete hd;return 0;}


原创粉丝点击