htmlunit爬取js异步加载后的页面

来源：互联网发布：淘宝信用贷款额度降低编辑：程序博客网时间：2024/06/05 04:09

直接上代码：

一、 index.html
调用后台请求获取content中的内容。

<html><head>    <script type="text/javascript" src="./jquery.min.js"></script></head><body><h2>Hello World!</h2><div id="content"></div><script type="text/javascript">$(document).ready(function(){      $.post("/evh/test/testList",{},function(data){          $("#content").text(JSON.stringify(data));      }); });</script></body></html>

二、TestController.java
/test/testList接口从后台数据库获取数据。

package com.everhomes.proxy.controller;import javax.annotation.Resource;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.web.bind.annotation.ExceptionHandler;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RestController;import com.everhomes.proxy.mapper.TestMapper;@RestController@RequestMapping("/test")public class TestController {    private static final Logger logger = LoggerFactory.getLogger(TestController.class);    @Resource    private TestMapper testMapper;    @RequestMapping("testList")    public Object testList(){        return testMapper.testList();    };    @ExceptionHandler(Exception.class)    public Object exception(Exception e){        logger.error("error: ", e);        return "error: " + e.toString();    }}

三、Crawler.java

package com.everhomes.generate;import java.io.IOException;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;public class Crawler {    public static void main(String[] args) throws IOException, InterruptedException {        WebClient webClient = new WebClient(BrowserVersion.CHROME);              webClient.getOptions().setJavaScriptEnabled(true);            webClient.getOptions().setCssEnabled(false);            webClient.getOptions().setRedirectEnabled(true);            webClient.getOptions().setThrowExceptionOnScriptError(false);            webClient.getOptions().setTimeout(50000);            HtmlPage rootPage = webClient.getPage("http://localhost:8080/evh/index.html");              webClient.waitForBackgroundJavaScript(10000);            FileUtils.createFile(DIRECTORY+"cc.html", rootPage.asXml());            webClient.close();    }}

四、pom.xml
添加相关依赖。

    <dependency>        <groupId>commons-lang</groupId>        <artifactId>commons-lang</artifactId>        <version>2.6</version>    </dependency>    <dependency>             <groupId>net.sourceforge.htmlunit</groupId>             <artifactId>htmlunit-core-js</artifactId>             <version>2.23</version>     </dependency>     <dependency>             <groupId>net.sourceforge.htmlunit</groupId>             <artifactId>htmlunit</artifactId>             <version>2.25</version>     </dependency>

阅读全文

0 0