node.js 抓取全国区域代码、邮政编码、电话区号

来源:互联网 发布:python 定义矩阵 编辑:程序博客网 时间:2024/04/28 05:52

        最近因为有项目需求 需要用到全国各地区的区域代码 网上又不能下载到最新的,正好在一个网站看到可以各个省份去查询,所以就自己用Node.js写了一个简单的抓取程序

var http = require("http");var cheerio = require("cheerio");var BufferHelper = require('bufferhelper');var iconv = require('iconv-lite');var fs = require("fs");var url = "http://www.diqudaima.com/";function download(url, callback) {    http.get(url, function (res) {        var bufferHelper = new BufferHelper();        res.on('data', function (chunk) {            bufferHelper.concat(chunk);        });        res.on("end", function () {            callback(iconv.decode(bufferHelper.toBuffer(), 'GBK'));        });    }).on("error", function (err) {        console.log("err");    });}createTxtFile();download(url, function (data) {    console.time("Start!");    if (data) {        //var str = "";        var $ = cheerio.load(data);        $("div.Count>ul>li").each(function (i, e) {            var link = $(e).find("a");            var newUrl = url + link.attr("href");            console.log("获取" + link.text() + "Url: " + newUrl);            download(newUrl, getCity)        });    }});function createTxtFile() {    fs.open("code.txt", "w", function (err, fd) {        var buf = new Buffer("省份,城市,县/区,地区编码,邮编,电话区号\n");        fs.write(fd, buf, 0, buf.length, 0, function (err, written, buffer) {            console.log(err);        });    })}function getCity(data) {    if (data) {        var str = "";        var $ = cheerio.load(data);        $("div.all>ul>li").each(function (i, e) {            var link = $(e).find("a");            var newUrl = url + link.attr("href");            console.log("获取" + link.text() + "Url: " + newUrl);            download(newUrl, getArea)        });    }}var num = 0;function getArea(data) {    if (data) {        var str = "";        var $ = cheerio.load(data);        // console.log(data);        var provinceName = "";        $("div.title>span").each(function (i, e) {            provinceName = $(e).text().split("-->")[1]+","+$(e).text().split("-->")[2];        });        $("ul>li").each(function (i, e) {            var link = $(e).find("a");            var infoStr = $(e).text().replace(/:/g," ");            var strArr = infoStr.split(" ");            var str = provinceName+","+strArr[0]+","+strArr[2]+","+strArr[4]+","+strArr[6]+"\n";            fs.appendFile("code.txt", str, function(err){                if(err)                    console.log("写入第"+(++num)+"条记失败! " + err);                else                {                    console.log("写入第"+(++num)+"条记录成功!");                }            });            console.log(provinceName+","+strArr[0]+","+strArr[2]+","+strArr[4]+","+strArr[6]);        });    }}


0 0
原创粉丝点击