nodejs 简单爬虫(一)

来源:互联网 发布:淘宝的淘气值怎么提升 编辑:程序博客网 时间:2024/06/05 02:33

package.json:
{  "name": "crawler",  "version": "0.0.0",  "private": true,  "scripts": {    "start": "node ./bin/www"  },  "dependencies": {    "express": "~4.9.0",    "body-parser": "~1.8.1",    "cookie-parser": "~1.3.3",    "morgan": "~1.3.0",    "serve-favicon": "~2.1.3",    "debug": "~2.0.0",    "ejs": "~0.8.5",    "cheerio": "~0.18.0",    "request": "~2.51.0"  }}

关于cheerio

这里准备爬一下https://cnodejs.org/

要爬的东西是这样的


<a class="topic_title" href="/topic/5493e6c59b158a790e21dc5e" title="新手请教node.js不断回调会吃光内存吗?">      新手请教node.js不断回调会吃光内存吗?    </a>

routes/index.js

var express = require('express');var router = express.Router();var request = require('request');var cheerio = require('cheerio')/* GET home page. */router.get('/', function(req, res) {  //res.render('index', { title: 'Express' });  request.get({    url:'https://cnodejs.org/'  }, function (err, response, body) {    if(err) {console.log(err);}    var $ = cheerio.load(body);    var items = [];    console.log(body);    $('a.topic_title').each(function(idx, element) {      var $element = $(element);      items.push({        title: $element.attr('title'),        href: $element.attr('href')      });    });    res.send(items);  });});module.exports = router;
访问一下看看爬到的东西



0 0