解决QtWebKit中QWebElement无法取到TextNode的问题

来源:互联网 发布:网络男女歌手唱的歌 编辑:程序博客网 时间:2024/06/05 17:51
    QWebPage* page = new QWebPage();    this->webpage = page->mainFrame();//    this->webpage->load(url);    this->webpage->setHtml("<font color='red' style='size:14px;'> text in font . <a href='#'>testlink</font>");    QWebElementCollection list = this->webpage->findAllElements("*");    qDebug() << "elements count : " << list.count() << endl;    QWebElement e;    for(int i = 0; i < list.count(); i++){        e = list.at(i);        displayNode(e);    }

以上测试代码,会在console打印出 HEAD BODY FONT A。

发现无法取得html dom tree 中的 Text类型的节点。

研读源码:

in QWebElement.h

QWebElementCollection QWebElement::findAll(const QString &selectorQuery) const{    return QWebElementCollection(*this, selectorQuery);}

QWebElementCollection::QWebElementCollection(const QWebElement &contextElement, const QString &query){    d = QExplicitlySharedDataPointer<QWebElementCollectionPrivate>(QWebElementCollectionPrivate::create(contextElement.m_element, query));}

QWebElementCollectionPrivate* QWebElementCollectionPrivate::create(const PassRefPtr<Node> &context, const QString &query){    if (!context)        return 0;    // Let WebKit do the hard work hehehe    ExceptionCode exception = 0; // ###    RefPtr<NodeList> nodes = context->querySelectorAll(query, exception);    if (!nodes)        return 0;    QWebElementCollectionPrivate* priv = new QWebElementCollectionPrivate;    priv->m_result = nodes;    return priv;}


webcore/dom/Node.h 对文本类型的节点是有定义的,也就是说,在select的时候,qt将该类型的节点做了过滤处理。

    enum NodeType {        ELEMENT_NODE = 1,        ATTRIBUTE_NODE = 2,        TEXT_NODE = 3,        CDATA_SECTION_NODE = 4,        ENTITY_REFERENCE_NODE = 5,        ENTITY_NODE = 6,        PROCESSING_INSTRUCTION_NODE = 7,        COMMENT_NODE = 8,        DOCUMENT_NODE = 9,        DOCUMENT_TYPE_NODE = 10,        DOCUMENT_FRAGMENT_NODE = 11,        NOTATION_NODE = 12,        XPATH_NAMESPACE_NODE = 13    };

Node.cpp

PassRefPtr<NodeList> Node::querySelectorAll(const String& selectors, ExceptionCode& ec){    ................................................    return createSelectorNodeList(this, querySelectorList);}


SelectorNodeList.cpp

PassRefPtr<StaticNodeList> createSelectorNodeList(Node* rootNode, const CSSSelectorList& querySelectorList){    Vector<RefPtr<Node> > nodes;    Document* document = rootNode->document();    CSSSelector* onlySelector = querySelectorList.hasOneSelector() ? querySelectorList.first() : 0;    bool strictParsing = !document->inCompatMode();    CSSStyleSelector::SelectorChecker selectorChecker(document, strictParsing);    if (strictParsing && rootNode->inDocument() && onlySelector && onlySelector->m_match == CSSSelector::Id && !document->containsMultipleElementsWithId(onlySelector->m_value)) {        Element* element = document->getElementById(onlySelector->m_value);        if (element && (rootNode->isDocumentNode() || element->isDescendantOf(rootNode)) && selectorChecker.checkSelector(onlySelector, element))            nodes.append(element);    } else {        for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {            if (n->isElementNode()) {                Element* element = static_cast<Element*>(n);                for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {                    if (selectorChecker.checkSelector(selector, element)) {                        nodes.append(n);                        break;                    }                }            }        }    }        return StaticNodeList::adopt(nodes);}

最终将问题锁定在

if (n->isElementNode()) {inline bool Node::isElement(ConstructionType type){    switch (type) {        case CreateContainer:        case CreateOther:        case CreateText:            return false;        case CreateElement:        case CreateElementZeroRefCount:            return true;    }    ASSERT_NOT_REACHED();    return false;}

这里在执行selectors的时候,将

CreateText
过滤掉了。


最终解决方案:


修改qwebelement.cpp

QString QWebElement::toPlainText() const{/************modified by alex     if (!m_element || !m_element->isHTMLElement())        return QString();    return static_cast<HTMLElement*>(m_element)->innerText();*/    if (!m_element)        return QString();    if (m_element->isHTMLElement())        return static_cast<HTMLElement*>(m_element)->innerText();    return m_element->textContent(true);    }

修改SelectorNodeList.cpp

        for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {            if (n->isElementNode()) {                Element* element = static_cast<Element*>(n);                for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {                    if (selectorChecker.checkSelector(selector, element)) {                        nodes.append(n);                        break;                    }                   }               }               /*add by alex*/            if (n->isTextNode()) {                nodes.append(n);            }               /*end*/        }

make 

make install

build and run project


output :

elem is :  "#text"  /  "font text "


补充,20130725

最近遇到需要给text node 赋值的问题, 需要做改动:

修改qwebelement.cpp

void QWebElement::setPlainText(const QString &text){/** alex    if (!m_element || !m_element->isHTMLElement())        return;    ExceptionCode exception = 0;    static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);*/    if (!m_element)          return;      if (m_element->isHTMLElement()) {        ExceptionCode exception = 0;        static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);                return;    }        ExceptionCode exception = 0;    m_element->setTextContent(text, exception); }


原创粉丝点击