仿chrome实现获取元素xpath

来源:互联网 发布:js无限下拉页面瀑布流 编辑:程序博客网 时间:2024/06/06 07:02

最近做的一个项目是自动化爬虫系统,其中包括了前端chrome插件,在插件中实现了仿chrome获取元素xpath的方式.


1.代码

xh.elementsShareFamily = function(primaryEl, siblingEl) {  var p = primaryEl, s = siblingEl;    //(!p.className || p.className === s.className &&  return (p.tagName === s.tagName &&          (!p.id || p.id === s.id));};xh.getElementIndex = function(el) {  var index = 1;  var sib;  for (sib = el.previousSibling; sib; sib = sib.previousSibling) {    if (sib.nodeType === Node.ELEMENT_NODE && xh.elementsShareFamily(el, sib)) {      index++;    }  }  if (index > 1) {    return index;  }  for (sib = el.nextSibling; sib; sib = sib.nextSibling) {    if (sib.nodeType === Node.ELEMENT_NODE && xh.elementsShareFamily(el, sib)) {      return 1;    }  }  return 0;};xh.makeQueryForElement = function(el) {  var query = '';  for (; el && el.nodeType === Node.ELEMENT_NODE; el = el.parentNode) {    var component = el.tagName.toLowerCase();    var index = xh.getElementIndex(el);    if (el.id) {      component += '[@id=\'' + el.id + '\']';    }       //else if (el.className) {    //  component += '[@class=\'' + el.className + '\']';   // }    if (index >= 1) {      component += '[' + index + ']';    }    // If the last tag is an img, the user probably wants img/@src.    if (query === '' && el.tagName.toLowerCase() === 'img') {      component += '/@src';    }    query = '/' + component + query;  }  return query;};

 在调用时只需要调用xh.makeQueryForElement方法即可,其中注释部分是把标签的className也作为xpath的一部分,由于chrome的获取xpath是不带className信息的,所以打上了注释

2.效果



    可以看到在根列表下的xpath就是通过模拟chrome的方式获取的,下面的内容是通过xpath解析出来的.


3.附录

xpath解析内容函数

xh.evaluateQuery = function(query) {  var xpathResult = null;  var str = '';  var nodeCount = 0;  try {    xpathResult = document.evaluate(query, document, null,                                    XPathResult.ANY_TYPE, null);           // console.log("==================" + xpathResult.resultType);        } catch (e) {    str = '[INVALID XPATH EXPRESSION]';    nodeCount = 0;  }  if (!xpathResult) {    return [str, nodeCount];  }  if (xpathResult.resultType === XPathResult.BOOLEAN_TYPE) {    str = xpathResult.booleanValue ? '1' : '0';    nodeCount = 1;  } else if (xpathResult.resultType === XPathResult.NUMBER_TYPE) {    str = xpathResult.numberValue.toString();    nodeCount = 1;  } else if (xpathResult.resultType === XPathResult.STRING_TYPE) {    str = xpathResult.stringValue;    nodeCount = 1;  } else if (xpathResult.resultType ===             XPathResult.UNORDERED_NODE_ITERATOR_TYPE) {    for (var node = xpathResult.iterateNext(); node;         node = xpathResult.iterateNext()) {      if (null != str && str != '') {        str += '[newline]';      }      str += node.textContent.replace(/[\r\n\t]/g, ' ').replace(/[ ]{2,}/g,' ');      nodeCount++;    }    if (nodeCount === 0) {        //console.log("=====nodeCount0========="+xpathResult);        //console.log(xpathResult.iterateNext())      str = '[NULL]';    }  } else {    // Since we pass XPathResult.ANY_TYPE to document.evaluate(), we should    // never get back a result type not handled above.    str = '[INTERNAL ERROR]';    nodeCount = 0;  }  return [str, nodeCount];};


阅读全文
0 0
原创粉丝点击