爬取二手房信息v2

来源:互联网 发布:淘宝分销店 编辑:程序博客网 时间:2024/04/27 15:36

项目开源地址 转载请注明出处。

去除重复数据,根据id获取最新数据  问题:发现第三方也存在发布数据重复问题连续insert  预解决方案:比较id时加3可以去除重复.实时通知    使用socket,避免不停刷新页面    前端:arr.push() 放入数组后面   arr.unshift() 放入数组前面

WebSocket 初始化类

/** * Created by daitian on 2017/6/29. */@Configuration@EnableWebSocketMessageBrokerpublic class WsConfig extends AbstractWebSocketMessageBrokerConfigurer {    @Override    public void registerStompEndpoints(StompEndpointRegistry registry) {        registry.addEndpoint("/roomInfo").withSockJS();    }    @Override    public void configureMessageBroker(MessageBrokerRegistry config) {        config.enableSimpleBroker("/topic");    }}

注入发送消息模板

@AutowiredSimpMessagingTemplate template;

58工作类

/** * Created by daitian on 2017/5/31. */@Componentpublic class Task58Test {    @Autowired    JedisCluster jedisCluster;    @Autowired    SimpMessagingTemplate template;    String one = "http://ty.58.com/ershoufang/0/";    String ones = "http://ty.58.com/ershoufang/11111x.shtml";    @Scheduled(fixedRate = 10000)    public void tongcheng() {        try {            //获取最新消息            Document document = Jsoup.connect(one).get();            Elements element = document.getElementsByClass("house-list-wrap");            String phone, newId, s, oldId, sendtime;            Elements li = element.select("li");            for (Iterator<Element> iterator = li.iterator(); iterator.hasNext(); ) {                Element next = (Element) iterator.next();                RoomInfo58 roomInfo58 = new RoomInfo58();                newId = next.attr("logr").replaceAll("[\\w]+[\\d]+_([\\d]+)_[\\d]{0,2}_[\\d]{0,2}_.+", "$1");//.substring(19, 33);//正则获取id                sendtime = next.select("div[class=time]").text();                //获取最小值,如果newId大于 最小值 加入集合, 控制集合大小 就可以做到无重复.无丢失.                //TODO 根据Id获取最新数据 id58                oldId = jedisCluster.get("id58");                if (newId.compareTo(oldId) > 0) {                    jedisCluster.set("id58", newId);                } else {                    continue;                }                //如果id存在 continue                roomInfo58.setId(newId);                roomInfo58.setComefrom("58同城");                roomInfo58.setCreatetime(DateKit.getDateTime());                roomInfo58.setTitle("" + next.select("h2").text());                roomInfo58.setStyle("" + next.select("p[class=baseinfo]").get(0).text());                roomInfo58.setPosition("" + next.select("p[class=baseinfo]").get(1).text());                roomInfo58.setName("" + next.select("span[class=jjrname-outer]").text());                roomInfo58.setPrice(next.select("p[class=sum]").text());                roomInfo58.setArea(next.select("p[class=unit]").text());                roomInfo58.setSendtime(sendtime);                String url = ones.replace("11111", newId);                roomInfo58.setUrl(url);                //获取手机号                Document doc = Jsoup.connect(url).get();                phone = doc.select("p[class=phone-num]").text();                roomInfo58.setPhone(phone);                //TODO 如果手机号是null 放地址                //pnumber=jedisCluster.sadd("phones", phone); 屏蔽某人发的信息                s = roomInfo58.toString();                jedisCluster.lpush("iroominfo", s);                template.convertAndSend("/topic/rooms", s);            }            jedisCluster.ltrim("roominfo", 0, 10000);        } catch (Exception e) {            e.printStackTrace();        }    }}

房天下工作类

/** * Created by daitian on 2017/6/1. */@Componentpublic class TaskFangTest {    @Autowired    JedisCluster jedisCluster;    @Autowired    SimpMessagingTemplate template;    String fang = "http://esf.taiyuan.fang.com/house/a211-h316/";    String fangs = "http://esf.taiyuan.fang.com/";    @Scheduled(fixedRate = 10000)    public void fang() {        try {            //获取最新消息            Document document = Jsoup.connect(fang).get();            Elements element = document.getElementsByClass("houseList").select("dl");            String newId, s, oldId;            for (Iterator<Element> iterator = element.iterator(); iterator.hasNext(); ) {                Element next = (Element) iterator.next();                newId = next.select("dt[class=img rel floatl]").select("a").attr("href").replaceAll("/chushou/([\\w]+)\\.htm", "$1");                oldId = jedisCluster.get("idf");                if (newId.compareTo(oldId) > 0) {                    jedisCluster.set("idf", newId);                } else {                    continue;                }                RoomInfo58 roomInfo58 = new RoomInfo58();                //如果id存在 continue                roomInfo58.setId(newId);                roomInfo58.setComefrom("房天下");                roomInfo58.setCreatetime(DateKit.getDateTime());                roomInfo58.setTitle("" + next.select("p[class=title]").text());                roomInfo58.setStyle("" + next.select("p[class=mt12]").text() + next.select("div[class=area alignR]").select("p").first().text().replaceAll("�O", "m2"));                roomInfo58.setPosition("" + next.select("p[class=mt10]").text());                roomInfo58.setPrice(next.select("p[class=mt5 alignR]").text());                roomInfo58.setArea(next.select("p[class=danjia alignR mt5]").text().replaceAll("�O", "m2"));                roomInfo58.setSendtime("新上");                String url = fangs + "/chushou/" + newId + ".htm";                roomInfo58.setUrl(url);                Document doc = Jsoup.connect(url).get();                Elements nexts = doc.getElementsByClass("bookTel");                roomInfo58.setPhone(nexts.select("strong").text());                roomInfo58.setName("" + nexts.select("a").text().replaceAll("业主", ""));                s = roomInfo58.toString();                jedisCluster.lpush("roominfo", s);                template.convertAndSend("/topic/rooms", s);            }        } catch (IOException e) {            e.printStackTrace();        }    }}

静态页面

<!DOCTYPE html><html><head>    <meta charset="UTF-8">    <meta name="viewport" content="width=980">    <title>试试行不行</title>    <script src="js/mobile-util.js"></script>    <link rel="stylesheet" type="text/css" href="css/base.css">    <link rel="stylesheet" type="text/css" href="css/reset.css"/>    <script type="application/javascript" src="js/sockjs.js"></script>    <script type="application/javascript" src="js/stomp.js"></script>    <script src="js/vue.js" type="text/javascript" charset="utf-8"></script>    <script src="js/axios.js"></script>    <script src="js/myjs.js"></script>    <script type="text/javascript">        function connect() {            var socket = new SockJS('/roomInfo');            var stompClient = Stomp.over(socket);            stompClient.connect({}, function (frame) {                stompClient.subscribe('/topic/rooms', function (respnose) {                    /*实时刷新*/                    app.lists.unshift(JSON.parse(respnose.body));                    //发出声音                });            });        }    </script></head><body onload="connect()"><ul id="app-1">    <li v-for="item in lists">        <div @click="a('+{{ item.url }}+')">            <div class="title">{{ item.title }}</div>            <div class="id"><p>编号:</p>{{ item.id }}</div>            <div class="style"><p>格局:</p>{{ item.style }}</div>            <div class="position"><p>位置:</p>{{ item.position }}</div>            <div class="xinxi">                <span class="name"><P>姓名:</P>{{ item.name }}</span>                <span class="phone"><P>电话:</P>{{ item.phone }}</span>            </div>            <div class="jiage">                <span class="area"><P>单价:</P>{{ item.area }}</span>                <span class="price"><P>总价:</P>{{ item.price }}</span>            </div>            <div class="timeinfo">                <span class="comefrom"><P>来源:</P>{{ item.comefrom }}</span>                <span class="time"><P>时间:</P>{{ item.createtime }}</span>            </div>        </div>    </li></ul></body></html>

my.js

var app = new Vue({    el: '#app-1',    data: {        lists: [        ]    },    created: function () {        this.fetchData()    },    methods: {        fetchData: function () {            var self = this;            axios.get('http://daitiantian001.cn/findall').then(function (response) {                self.lists = response.data;            });        },        a:function(url){            // console.log(url)            // window.location=url;        }    }})

引入socket依赖

<dependency>    <groupId>org.springframework.boot</groupId>    <artifactId>spring-boot-starter-websocket</artifactId></dependency>
原创粉丝点击