htmlunit爬取js异步加载后的页面
来源:互联网 发布:淘宝信用贷款额度降低 编辑:程序博客网 时间:2024/06/05 04:09
直接上代码:
一、 index.html
调用后台请求获取content中的内容。
<html><head> <script type="text/javascript" src="./jquery.min.js"></script></head><body><h2>Hello World!</h2><div id="content"></div><script type="text/javascript">$(document).ready(function(){ $.post("/evh/test/testList",{},function(data){ $("#content").text(JSON.stringify(data)); }); });</script></body></html>
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
二、TestController.java
/test/testList接口从后台数据库获取数据。
package com.everhomes.proxy.controller;import javax.annotation.Resource;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.web.bind.annotation.ExceptionHandler;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RestController;import com.everhomes.proxy.mapper.TestMapper;@RestController@RequestMapping("/test")public class TestController { private static final Logger logger = LoggerFactory.getLogger(TestController.class); @Resource private TestMapper testMapper; @RequestMapping("testList") public Object testList(){ return testMapper.testList(); }; @ExceptionHandler(Exception.class) public Object exception(Exception e){ logger.error("error: ", e); return "error: " + e.toString(); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
三、Crawler.java
package com.everhomes.generate;import java.io.IOException;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;public class Crawler { public static void main(String[] args) throws IOException, InterruptedException { WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setRedirectEnabled(true); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setTimeout(50000); HtmlPage rootPage = webClient.getPage("http://localhost:8080/evh/index.html"); webClient.waitForBackgroundJavaScript(10000); FileUtils.createFile(DIRECTORY+"cc.html", rootPage.asXml()); webClient.close(); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
四、pom.xml
添加相关依赖。
<dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.6</version> </dependency> <dependency> <groupId>net.sourceforge.htmlunit</groupId> <artifactId>htmlunit-core-js</artifactId> <version>2.23</version> </dependency> <dependency> <groupId>net.sourceforge.htmlunit</groupId> <artifactId>htmlunit</artifactId> <version>2.25</version> </dependency>
阅读全文
0 0
- htmlunit爬取js异步加载后的页面
- htmlunit爬取js异步加载后的页面
- 【Jsoup】配合 htmlunit 爬取异步加载的网页
- 【Jsoup】配合 htmlunit 爬取异步加载的网页
- HtmlUnit爬取页面列表链接
- HtmlUnit爬取动态数据(js相关)
- js 委派(元素异步加载到页面后点击事件)
- htmlunit爬取数据
- 利用htmlunit和jsoup来实现爬取js的动态网页
- 学习htmlunit获取动态网页加载后的代码
- HtmlUnit 获取登录后的页面信息失败
- htmlunit抓取js执行后的网页源码
- htmlunit爬取Ajax动态生成的网页获取不到生成后的结果的问题的解决
- Htmlunit之爬取网页
- WebCollector爬取JS加载的数据
- WebCollector爬取JS加载的数据
- 页面优化——js异步加载
- html页面异步加载js文件
- HashMap巩固二:多线程问题
- 最近点对
- Fork and Join(Java并发编程的思路)
- Eclipse Jetty插件安装
- Hive HBase Integration
- htmlunit爬取js异步加载后的页面
- Word 常用易忘点记录
- 成为个体高手的三个习惯
- 欢迎使用CSDN-markdown编辑器
- LeetCode【3】Longest Substring Without Repeating Characters
- SMTP协议与JavaMail相关类SMTPTransport的学习
- 两个简单程序告诉你学习速率对于BP神经网络收敛的影响
- Markdown语法实例
- 数据脱敏——基于Java自定义注解实现日志字段脱敏