cheerio爬取网页数据，存储到MySQL数据库

来源：互联网发布：mac 磁盘恢复编辑：程序博客网时间：2024/06/06 03:17

最近在做物流项目成本分析，需要爬取柴油价格数据，使用到了cheerio，cheerio实现了jQuery核心的一个子集。以下为爬取代码。

//getHtml.js，获取HTML页面数据var http = require("http");function gethtml(url,callback){    http.get(url,function(res){        var data = "";        res.on('data',function(chunk){            data += chunk;        });        res.on('end',function(){            callback(data);        });    }).on("error",function(){        callback(null);    })}exports.gethtml = gethtml;

//getData.js，使用cheerio获取数据并存储到mysql数据库const cheerio = require('cheerio');const server = require('./getHtml');var url = 'http://ny.gold600.com/';server.gethtml(url,function(data){    if(data){        var $ = cheerio.load(data);        var data = [];        var list = $('.oilTable tbody tr:nth-child(odd)');        list.each(function(index,element){            let city = $(element).find('a').text().replace(/\s/g,''); // 获取标题            let price = $($(element).find('td')[4]).text().replace(/\s/g,''); // 获取价格            let time = $($(element).find('td')[5]).text().replace(/\s/g,''); // 获取价格            data.push({city,price,time})            // console.log(price)            // console.log(element.children('td')[0].innerTEXT);        })        var str = '';        data.forEach(function(item,index){            if(index == 0){                str += 'VALUES("'+item.city+'",'+item.price+',"'+item.time+'")';            }else{                str += ',("'+item.city+'",'+item.price+',"'+item.time+'")';            }           })        console.log(str)        var connection = mysql.createConnection({            host:'127.0.0.1',            user:'root',            password:'******',            database:'price'        });        console.log(str)        connection.connect();        connection.query('DELETE FROM priceList');        connection.query('ALTER TABLE priceList AUTO_INCREMENT=1');        connection.query('INSERT priceList(city,price,time) '+str,function(error,results,fields){            if(error) throw error;            console.log('The solution is:',results);            let a = results[1];            console.log(a)        })        connection.end();        console.log(value); // Success!    }else{        console.log("error");    }})

需要注意的是使用$(selector).find(selector)查找到的元素需要使用再次使用$()进行包裹才能使用cheerio提供方法,如:text()。

阅读全文

1 0