nodejs 采集

来源:互联网 发布:js元素添加样式类 编辑:程序博客网 时间:2024/05/21 17:06
var http = require("http");var fs   = require("fs");var cheerio = require("cheerio");function downloadText(doc){    var $ = cheerio.load(doc);    $(".content").each(function(idx,obj){        fs.writeFile('1.txt','',function(err){            if (err) throw err;            fs.appendFile('1.txt',$(obj).text());            fs.appendFile('1.txt',"\r\n");console.log($(obj).text());console.log("\r");        });    });    $(".thumb").find("img").each(function(idx,obj){        var src = $(obj).attr("src");        if(/pic.xxxxx.com/.test(src)){            http.get(src, function(res) {                res.setEncoding('binary');                var imageData ='';                res.on('data',function(data){                    imageData += data;                }).on('end',function(){                    var imageName = src.substr(src.length-12,12);                    fs.writeFile('images/'+imageName, imageData, 'binary', function (err) {                        if (err) throw err;                        console.log('图片已保存');                    });                });            }).on('error', function(e) {                console.log("Got error: " + e.message);            });        }    });}function start(){    http.get({hostname:'www.xxxxx.com', port:80, path:'/', headers:{"User-Agent":"Mozilla\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/38.0.2125.104 Safari\/537.36"}}, function (res) {        var html = "";        res.on('data',function(data){            html += data;        }).on('end',function(){            downloadText(html);        });    })} start();




0 0
原创粉丝点击