让你的Jsoup支持Xpath

来源:互联网 发布:shell编程题 编辑:程序博客网 时间:2024/05/22 17:05

Xpath是专业的xml结构化文档的查询语言,语法功能强大,本文不涉及xpath语法教程。

jsoup 是一款Java 的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据,但是选取某个元素时还是没有xpath那么简单直接,而且xpath带了很多选择库。

然而遗憾的时,jsoup并不支持xpath,于是博主就写了一个让jsoup支持的xpath的工具类,希望能帮助到有需要的朋友!

下图是测试效果


下面贴上源码:

package com.lhh.parse;import java.io.StringWriter;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.concurrent.locks.Lock;import java.util.concurrent.locks.ReentrantLock;import javax.xml.namespace.QName;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import javax.xml.xpath.XPath;import javax.xml.xpath.XPathConstants;import javax.xml.xpath.XPathFactory;import org.apache.commons.lang.StringUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Attribute;import org.jsoup.select.Elements;import org.jsoup.select.NodeTraversor;import org.jsoup.select.NodeVisitor;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.w3c.dom.Comment;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.Node;import org.w3c.dom.NodeList;import org.w3c.dom.Text;import com.sun.org.apache.xerces.internal.dom.ElementImpl;/** * Jsoup的xpath解析工具类 *  * @author liuhh * */@SuppressWarnings("restriction")public class JsoupParserUtils {<span style="white-space:pre"></span>protected final static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();<span style="white-space:pre"></span>private final static Logger log = LoggerFactory.getLogger(JsoupParserUtils.class);<span style="white-space:pre"></span>private final static XPath xPath = XPathFactory.newInstance().newXPath();<span style="white-space:pre"></span>protected static TransformerFactory tf = TransformerFactory.newInstance();<span style="white-space:pre"></span>private static final Lock LOCK = new ReentrantLock();<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 得到该节点的子节点个数<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param ele<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static int getEleChildNum(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.NODESET);<span style="white-space:pre"></span>if (null != res && res instanceof NodeList) {<span style="white-space:pre"></span>NodeList nodeList = (NodeList) res;<span style="white-space:pre"></span>return nodeList == null ? 0 : nodeList.getLength();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("根据xpath:{},获取子节点个数出现错误,错误原因:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return 0;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 判断文档中是否存在xpath节点<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static boolean exists(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.BOOLEAN);<span style="white-space:pre"></span>if (null != res && res instanceof Boolean) {<span style="white-space:pre"></span>return (boolean) res;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return false;<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("检查xpath:{},是否存在时出现错误,!" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return false;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 根据xpath得到w3c的Element对象<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static ElementImpl getW3cElementImpl(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.NODE);<span style="white-space:pre"></span>if (null != res && res instanceof ElementImpl) {<span style="white-space:pre"></span>return (ElementImpl) res;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("根据xpath:{},得到w3c的Element对象出现错误,原因:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 根据xpath得到jsoup的Element对象<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static org.jsoup.nodes.Element getJsoupElement(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.NODE);<span style="white-space:pre"></span>if (null != res && res instanceof ElementImpl) {<span style="white-space:pre"></span>ElementImpl elementImpl = (ElementImpl) res;<span style="white-space:pre"></span>return getJsoupEle(elementImpl);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("根据xpath:{},得到jsoup的Element对象出现错误,原因:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 根据xpath得到jsoup的Elements对象<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static Elements getJsoupElements(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>NodeList nodeList = getNodeList(ele, xpath);<span style="white-space:pre"></span>if (null != nodeList && nodeList.getLength() > 0) {<span style="white-space:pre"></span>int len = nodeList.getLength();<span style="white-space:pre"></span>Elements elements = new Elements();<span style="white-space:pre"></span>for (int i = 0; i < len; i++) {<span style="white-space:pre"></span>Node node = nodeList.item(i);<span style="white-space:pre"></span>if (null != node && node instanceof ElementImpl) {<span style="white-space:pre"></span>org.jsoup.nodes.Element element = getJsoupEle(((ElementImpl) node));<span style="white-space:pre"></span>elements.add(element);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return elements;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("根据xpath:{},得到jsoup的Element对象出现错误,原因:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 从Jsoup的Element中解析出W3C的NodeList<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param ele<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static NodeList getNodeList(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.NODESET);<span style="white-space:pre"></span>if (null != res && res instanceof NodeList) {<span style="white-space:pre"></span>return (NodeList) res;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error(e.getMessage(), e);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 得到节点的某一个属性值<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static String getXpathString(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>int textNum = getEleChildNum(ele, xpath);<span style="white-space:pre"></span>if (1 == textNum) {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.STRING);<span style="white-space:pre"></span>if (null != res) {<span style="white-space:pre"></span>return res.toString();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} else {<span style="white-space:pre"></span>List<String> res = getXpathListString(ele, xpath);<span style="white-space:pre"></span>if (res != null && res.size() > 0) {<span style="white-space:pre"></span>StringBuilder stringBuilder = new StringBuilder();<span style="white-space:pre"></span>for (Iterator<String> iterator = res.iterator(); iterator.hasNext();) {<span style="white-space:pre"></span>String text = iterator.next();<span style="white-space:pre"></span>if (null != text) {<span style="white-space:pre"></span>stringBuilder.append(text.replace("\r\n", "."));<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return stringBuilder.toString();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>e.printStackTrace();<span style="white-space:pre"></span>log.error("根据xpath:{}查询字符串时出现错误:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 查询字符串列表<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param document<span style="white-space:pre"></span> * @param xpath<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static List<String> getXpathListString(final org.jsoup.nodes.Element ele, final String xpath) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Object res = parse(ele, xpath, XPathConstants.NODESET);<span style="white-space:pre"></span>if (null != res && res instanceof NodeList) {<span style="white-space:pre"></span>NodeList nodeList = (NodeList) res;<span style="white-space:pre"></span>int length = nodeList.getLength();<span style="white-space:pre"></span>if (length <= 0) {<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>List<String> list = new ArrayList<>();<span style="white-space:pre"></span>for (int i = 0; i < length; i++) {<span style="white-space:pre"></span>Node node = nodeList.item(i);<span style="white-space:pre"></span>list.add(null == node ? null : node.getNodeValue());<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return list;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.error("根据xpath:{}查询字符串列表时出现错误:" + e.getMessage(), xpath);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 获取xpath解析结果<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param doc<span style="white-space:pre"></span> * @param xPathStr<span style="white-space:pre"></span> * @param qName<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static Object parse(final org.jsoup.nodes.Element doc, final String xPathStr, final QName qName) {<span style="white-space:pre"></span>Node node = fromJsoup(doc);<span style="white-space:pre"></span>return parse(node, xPathStr, qName);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param doc<span style="white-space:pre"></span> * @param xPathStr<span style="white-space:pre"></span> * @param qName<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static Object parse(final Node doc, final String xPathStr, final QName qName) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>if (doc == null) {<span style="white-space:pre"></span>log.warn("解析文档为null!");<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>if (StringUtils.isBlank(xPathStr)) {<span style="white-space:pre"></span>log.warn("解析的Xpath路径为空!");<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>if (null == qName) {<span style="white-space:pre"></span>log.warn("解析类型为null!");<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>LOCK.lock();<span style="white-space:pre"></span>Object res = xPath.evaluate(xPathStr, doc, qName);<span style="white-space:pre"></span>return res;<span style="white-space:pre"></span>} finally {<span style="white-space:pre"></span>// TODO: handle finally clause<span style="white-space:pre"></span>LOCK.unlock();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>log.warn("解析Xpath:{},出现错误,解析类型:{},错误原因:{}!", xPathStr, qName, e.getMessage());<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 根据ElementImpl得到Jsoup的Element<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param elementImpl<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static org.jsoup.nodes.Element getJsoupEle(final ElementImpl elementImpl) {<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>String value = getW3cDocString(elementImpl);<span style="white-space:pre"></span>org.jsoup.nodes.Document document = Jsoup.parse(value);<span style="white-space:pre"></span>return document.body().child(0);<span style="white-space:pre"></span>} catch (Exception e) {<span style="white-space:pre"></span>// TODO: handle exception<span style="white-space:pre"></span>log.error("根据ElementImpl得到Jsoup的Element出现错误,错误原因:" + e.getMessage());<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 将w3c的Document转为jsoup的Document<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param in<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static org.jsoup.nodes.Document fromW3C(final Document doc) throws Exception {<span style="white-space:pre"></span>String string = getW3cDocString(doc);<span style="white-space:pre"></span>org.jsoup.nodes.Document res = Jsoup.parse(string);<span style="white-space:pre"></span>return res;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 将jsoup的Document转为w3c的Document<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param in<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static Node fromJsoup(final org.jsoup.nodes.Element in) {<span style="white-space:pre"></span>DocumentBuilder builder;<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>if (null == in) {<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>builder = factory.newDocumentBuilder();<span style="white-space:pre"></span>Document out = builder.newDocument();<span style="white-space:pre"></span>if (in instanceof org.jsoup.nodes.Document) {<span style="white-space:pre"></span>List<org.jsoup.nodes.Node> childs = in.childNodes();<span style="white-space:pre"></span>if (childs != null && childs.size() > 0) {<span style="white-space:pre"></span>org.jsoup.nodes.Element rootEl = in.child(0);<span style="white-space:pre"></span>NodeTraversor traversor = new NodeTraversor(new W3CBuilder(out));<span style="white-space:pre"></span>traversor.traverse(rootEl);<span style="white-space:pre"></span>return out;<span style="white-space:pre"></span>} else {<span style="white-space:pre"></span>// out.setNodeValue(in.);<span style="white-space:pre"></span>return out;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}else if (in instanceof org.jsoup.nodes.Element) {<span style="white-space:pre"></span>NodeTraversor traversor = new NodeTraversor(new W3CBuilder(out));<span style="white-space:pre"></span>traversor.traverse(in);<span style="white-space:pre"></span>return out;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (ParserConfigurationException e) {<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>return null;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 将W3c的doc转为字符串<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param doc<span style="white-space:pre"></span> * @return<span style="white-space:pre"></span> * @throws Exception<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static String getW3cDocString(final Node doc) throws Exception {<span style="white-space:pre"></span>try (StringWriter writer = new StringWriter()) {<span style="white-space:pre"></span>DOMSource domSource = new DOMSource(doc);<span style="white-space:pre"></span>StreamResult result = new StreamResult(writer);<span style="white-space:pre"></span>LOCK.lock();<span style="white-space:pre"></span>try {<span style="white-space:pre"></span>Transformer transformer = tf.newTransformer();<span style="white-space:pre"></span>transformer.transform(domSource, result);<span style="white-space:pre"></span>return writer.toString();<span style="white-space:pre"></span>} finally {<span style="white-space:pre"></span>LOCK.unlock();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>} catch (TransformerException e) {<span style="white-space:pre"></span>throw new IllegalStateException(e);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>/**<span style="white-space:pre"></span> * 将Jsoup的node属性拷贝到w3c的Element中<span style="white-space:pre"></span> * <span style="white-space:pre"></span> * @param source<span style="white-space:pre"></span> * @param el<span style="white-space:pre"></span> */<span style="white-space:pre"></span>public static void copyAttributes(final org.jsoup.nodes.Node source, final Element el) {<span style="white-space:pre"></span>for (Attribute attribute : source.attributes()) {<span style="white-space:pre"></span>el.setAttribute(attribute.getKey(), attribute.getValue());<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}}class W3CBuilder implements NodeVisitor {<span style="white-space:pre"></span>private final Document doc;<span style="white-space:pre"></span>private Element dest;<span style="white-space:pre"></span>public W3CBuilder(Document doc) {<span style="white-space:pre"></span>this.doc = doc;<span style="white-space:pre"></span>}<span style="white-space:pre"></span>public void head(final org.jsoup.nodes.Node source, int depth) {<span style="white-space:pre"></span>if (source instanceof org.jsoup.nodes.Element) {<span style="white-space:pre"></span>org.jsoup.nodes.Element sourceEl = (org.jsoup.nodes.Element) source;<span style="white-space:pre"></span>Element el = doc.createElement(sourceEl.tagName());<span style="white-space:pre"></span>JsoupParserUtils.copyAttributes(sourceEl, el);<span style="white-space:pre"></span>if (dest == null) {<span style="white-space:pre"></span>doc.appendChild(el);<span style="white-space:pre"></span>} else {<span style="white-space:pre"></span>dest.appendChild(el);<span style="white-space:pre"></span>}<span style="white-space:pre"></span>dest = el;<span style="white-space:pre"></span>} else if (source instanceof org.jsoup.nodes.TextNode) {<span style="white-space:pre"></span>org.jsoup.nodes.TextNode sourceText = (org.jsoup.nodes.TextNode) source;<span style="white-space:pre"></span>Text text = doc.createTextNode(sourceText.getWholeText());<span style="white-space:pre"></span>dest.appendChild(text);<span style="white-space:pre"></span>} else if (source instanceof org.jsoup.nodes.Comment) {<span style="white-space:pre"></span>org.jsoup.nodes.Comment sourceComment = (org.jsoup.nodes.Comment) source;<span style="white-space:pre"></span>Comment comment = doc.createComment(sourceComment.getData());<span style="white-space:pre"></span>dest.appendChild(comment);<span style="white-space:pre"></span>} else if (source instanceof org.jsoup.nodes.DataNode) {<span style="white-space:pre"></span>org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) source;<span style="white-space:pre"></span>Text node = doc.createTextNode(sourceData.getWholeData());<span style="white-space:pre"></span>dest.appendChild(node);<span style="white-space:pre"></span>} else {<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}<span style="white-space:pre"></span>public void tail(final org.jsoup.nodes.Node source, int depth) {<span style="white-space:pre"></span>if (source instanceof org.jsoup.nodes.Element && dest.getParentNode() instanceof Element) {<span style="white-space:pre"></span>dest = (Element) dest.getParentNode();<span style="white-space:pre"></span>}<span style="white-space:pre"></span>}}
测试类

package com.lhh.parse;import java.io.IOException;import java.net.URL;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;public class JsoupParserUtilsTest {public static void main(String[] args) throws Exception, IOException {String url = "http://mil.news.sina.com.cn/china/2016-09-29/doc-ifxwmamy9955666.shtml";Document doc = Jsoup.parse(new URL(url), 10000);String titleXpath = "//*[@id='main_title']/text()";String timeXpath = "//*[@id='page-tools']/span/span[position() = 1]";System.out.println(JsoupParserUtils.exists(doc, "/html/body/div[position>1000000]"));System.out.println(JsoupParserUtils.getXpathString(doc, titleXpath));Element element = JsoupParserUtils.getJsoupElement(doc, timeXpath);System.out.println(element.text());System.out.println(element.attr("class"));}}



0 0
原创粉丝点击