爬取二手房信息v2
来源:互联网 发布:淘宝分销店 编辑:程序博客网 时间:2024/04/27 15:36
项目开源地址 转载请注明出处。
去除重复数据,根据id获取最新数据 问题:发现第三方也存在发布数据重复问题连续insert 预解决方案:比较id时加3可以去除重复.实时通知 使用socket,避免不停刷新页面 前端:arr.push() 放入数组后面 arr.unshift() 放入数组前面
WebSocket 初始化类
/** * Created by daitian on 2017/6/29. */@Configuration@EnableWebSocketMessageBrokerpublic class WsConfig extends AbstractWebSocketMessageBrokerConfigurer { @Override public void registerStompEndpoints(StompEndpointRegistry registry) { registry.addEndpoint("/roomInfo").withSockJS(); } @Override public void configureMessageBroker(MessageBrokerRegistry config) { config.enableSimpleBroker("/topic"); }}
注入发送消息模板
@AutowiredSimpMessagingTemplate template;
58工作类
/** * Created by daitian on 2017/5/31. */@Componentpublic class Task58Test { @Autowired JedisCluster jedisCluster; @Autowired SimpMessagingTemplate template; String one = "http://ty.58.com/ershoufang/0/"; String ones = "http://ty.58.com/ershoufang/11111x.shtml"; @Scheduled(fixedRate = 10000) public void tongcheng() { try { //获取最新消息 Document document = Jsoup.connect(one).get(); Elements element = document.getElementsByClass("house-list-wrap"); String phone, newId, s, oldId, sendtime; Elements li = element.select("li"); for (Iterator<Element> iterator = li.iterator(); iterator.hasNext(); ) { Element next = (Element) iterator.next(); RoomInfo58 roomInfo58 = new RoomInfo58(); newId = next.attr("logr").replaceAll("[\\w]+[\\d]+_([\\d]+)_[\\d]{0,2}_[\\d]{0,2}_.+", "$1");//.substring(19, 33);//正则获取id sendtime = next.select("div[class=time]").text(); //获取最小值,如果newId大于 最小值 加入集合, 控制集合大小 就可以做到无重复.无丢失. //TODO 根据Id获取最新数据 id58 oldId = jedisCluster.get("id58"); if (newId.compareTo(oldId) > 0) { jedisCluster.set("id58", newId); } else { continue; } //如果id存在 continue roomInfo58.setId(newId); roomInfo58.setComefrom("58同城"); roomInfo58.setCreatetime(DateKit.getDateTime()); roomInfo58.setTitle("" + next.select("h2").text()); roomInfo58.setStyle("" + next.select("p[class=baseinfo]").get(0).text()); roomInfo58.setPosition("" + next.select("p[class=baseinfo]").get(1).text()); roomInfo58.setName("" + next.select("span[class=jjrname-outer]").text()); roomInfo58.setPrice(next.select("p[class=sum]").text()); roomInfo58.setArea(next.select("p[class=unit]").text()); roomInfo58.setSendtime(sendtime); String url = ones.replace("11111", newId); roomInfo58.setUrl(url); //获取手机号 Document doc = Jsoup.connect(url).get(); phone = doc.select("p[class=phone-num]").text(); roomInfo58.setPhone(phone); //TODO 如果手机号是null 放地址 //pnumber=jedisCluster.sadd("phones", phone); 屏蔽某人发的信息 s = roomInfo58.toString(); jedisCluster.lpush("iroominfo", s); template.convertAndSend("/topic/rooms", s); } jedisCluster.ltrim("roominfo", 0, 10000); } catch (Exception e) { e.printStackTrace(); } }}
房天下工作类
/** * Created by daitian on 2017/6/1. */@Componentpublic class TaskFangTest { @Autowired JedisCluster jedisCluster; @Autowired SimpMessagingTemplate template; String fang = "http://esf.taiyuan.fang.com/house/a211-h316/"; String fangs = "http://esf.taiyuan.fang.com/"; @Scheduled(fixedRate = 10000) public void fang() { try { //获取最新消息 Document document = Jsoup.connect(fang).get(); Elements element = document.getElementsByClass("houseList").select("dl"); String newId, s, oldId; for (Iterator<Element> iterator = element.iterator(); iterator.hasNext(); ) { Element next = (Element) iterator.next(); newId = next.select("dt[class=img rel floatl]").select("a").attr("href").replaceAll("/chushou/([\\w]+)\\.htm", "$1"); oldId = jedisCluster.get("idf"); if (newId.compareTo(oldId) > 0) { jedisCluster.set("idf", newId); } else { continue; } RoomInfo58 roomInfo58 = new RoomInfo58(); //如果id存在 continue roomInfo58.setId(newId); roomInfo58.setComefrom("房天下"); roomInfo58.setCreatetime(DateKit.getDateTime()); roomInfo58.setTitle("" + next.select("p[class=title]").text()); roomInfo58.setStyle("" + next.select("p[class=mt12]").text() + next.select("div[class=area alignR]").select("p").first().text().replaceAll("�O", "m2")); roomInfo58.setPosition("" + next.select("p[class=mt10]").text()); roomInfo58.setPrice(next.select("p[class=mt5 alignR]").text()); roomInfo58.setArea(next.select("p[class=danjia alignR mt5]").text().replaceAll("�O", "m2")); roomInfo58.setSendtime("新上"); String url = fangs + "/chushou/" + newId + ".htm"; roomInfo58.setUrl(url); Document doc = Jsoup.connect(url).get(); Elements nexts = doc.getElementsByClass("bookTel"); roomInfo58.setPhone(nexts.select("strong").text()); roomInfo58.setName("" + nexts.select("a").text().replaceAll("业主", "")); s = roomInfo58.toString(); jedisCluster.lpush("roominfo", s); template.convertAndSend("/topic/rooms", s); } } catch (IOException e) { e.printStackTrace(); } }}
静态页面
<!DOCTYPE html><html><head> <meta charset="UTF-8"> <meta name="viewport" content="width=980"> <title>试试行不行</title> <script src="js/mobile-util.js"></script> <link rel="stylesheet" type="text/css" href="css/base.css"> <link rel="stylesheet" type="text/css" href="css/reset.css"/> <script type="application/javascript" src="js/sockjs.js"></script> <script type="application/javascript" src="js/stomp.js"></script> <script src="js/vue.js" type="text/javascript" charset="utf-8"></script> <script src="js/axios.js"></script> <script src="js/myjs.js"></script> <script type="text/javascript"> function connect() { var socket = new SockJS('/roomInfo'); var stompClient = Stomp.over(socket); stompClient.connect({}, function (frame) { stompClient.subscribe('/topic/rooms', function (respnose) { /*实时刷新*/ app.lists.unshift(JSON.parse(respnose.body)); //发出声音 }); }); } </script></head><body onload="connect()"><ul id="app-1"> <li v-for="item in lists"> <div @click="a('+{{ item.url }}+')"> <div class="title">{{ item.title }}</div> <div class="id"><p>编号:</p>{{ item.id }}</div> <div class="style"><p>格局:</p>{{ item.style }}</div> <div class="position"><p>位置:</p>{{ item.position }}</div> <div class="xinxi"> <span class="name"><P>姓名:</P>{{ item.name }}</span> <span class="phone"><P>电话:</P>{{ item.phone }}</span> </div> <div class="jiage"> <span class="area"><P>单价:</P>{{ item.area }}</span> <span class="price"><P>总价:</P>{{ item.price }}</span> </div> <div class="timeinfo"> <span class="comefrom"><P>来源:</P>{{ item.comefrom }}</span> <span class="time"><P>时间:</P>{{ item.createtime }}</span> </div> </div> </li></ul></body></html>
my.js
var app = new Vue({ el: '#app-1', data: { lists: [ ] }, created: function () { this.fetchData() }, methods: { fetchData: function () { var self = this; axios.get('http://daitiantian001.cn/findall').then(function (response) { self.lists = response.data; }); }, a:function(url){ // console.log(url) // window.location=url; } }})
引入socket依赖
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-websocket</artifactId></dependency>
阅读全文
0 0
- 爬取二手房信息v2
- 爬取二手房信息
- 简单Python爬取链接二手房信息
- 使用网络爬虫爬取新浪二手房网站中的西安二手房信息
- 爬取58同城的二手房信息
- bs4+phantomjs爬取安居客二手房信息
- scrapy安居客二手房爬取
- python爬取上海链家网二手房数据
- python链家网二手房数据爬取
- 如何用Python爬取分析北京二手房数据?
- python爬虫爬取链家二手房信息
- 【Python爬虫系列】Python 爬取上海链家二手房数据
- 爬取招聘信息
- 大学排名信息爬取
- 题目:北京市二手房交易信息录入程序
- Python数据爬虫,爬链家的二手房信息
- scrapy实战(一)-------------爬取链家网的二手房信息
- 数据分析:pandas分析链家网二手房信息
- 解决java compiler level does not match the version of the installed java project facet /faceted projec
- java中synchronized关键字的用法
- Android布局--相对布局,RTL,用代码实现布局
- Git详解之七 自定义Git
- Spring 框架:org.springframework.beans.factory.BeanCreationException/NotWritablePropertyException
- 爬取二手房信息v2
- 个人软件整理
- 汇编语言 程序设计练习题 答案
- poj1679 The Unique MST(次小生成树)
- 运算符 |、||、&、&&、异或、~、!、<<、
- JavaScript学习总结(14)——12个令人惊叹的JavaScript技巧
- CF831B-Keyboard Layouts
- Java实现基于Redis的分布式锁
- 限制oracle字段长度