如何用Node.js编写长尾关键词的简单爬虫?
- 内容介绍
- 文章标签
- 相关推荐
本文共计231个文字,预计阅读时间需要1分钟。
javascriptserver.jsconst express=require('express');const request=require('superagent');const cheerio=require('cheerio');
const app=express();app.use(express.static('www'));
app.get('/jokes', (req, res)=> { request.get('https://www.example.com') .end((err, response)=> { if (err) return res.status(500).send('Error fetching jokes'); const $=cheerio.load(response.text); // Process and send the jokes res.send('Jokes fetched and processed'); });});
server.jsconst express = require('express'), request = require('superagent'), // cheerio抓取网页数据 cheerio = require('cheerio'); app = express() app.use(express.static('www')) app.get('/jokes', (req, res) => { request .get('www.qiushibaike.com/') .set('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1') .end(function (err, request) { var $ = cheerio.load(request.text) var arr = [] $('main').find('article').each(function (i, el) { arr.push({ author: $(el).find('.username').text(), image: $(el).find('.avatar').css("background-image"), age: $(el).find('.age').text(), text: $(el).find('.text').text(), godname: $(el).find('.comment .item .username').text(), godWord: $(el).find('.comment .item .text').text(), href: $(el).find('.text').attr('href') }) }) res.json({ code: 'success', message: '查询成功', data: arr }) }) }) app.listen(3000, () => { console.log('node is ok') }) 效果展示.png 爬虫代码整合.rar 爬虫代码整合.rar
本文共计231个文字,预计阅读时间需要1分钟。
javascriptserver.jsconst express=require('express');const request=require('superagent');const cheerio=require('cheerio');
const app=express();app.use(express.static('www'));
app.get('/jokes', (req, res)=> { request.get('https://www.example.com') .end((err, response)=> { if (err) return res.status(500).send('Error fetching jokes'); const $=cheerio.load(response.text); // Process and send the jokes res.send('Jokes fetched and processed'); });});
server.jsconst express = require('express'), request = require('superagent'), // cheerio抓取网页数据 cheerio = require('cheerio'); app = express() app.use(express.static('www')) app.get('/jokes', (req, res) => { request .get('www.qiushibaike.com/') .set('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1') .end(function (err, request) { var $ = cheerio.load(request.text) var arr = [] $('main').find('article').each(function (i, el) { arr.push({ author: $(el).find('.username').text(), image: $(el).find('.avatar').css("background-image"), age: $(el).find('.age').text(), text: $(el).find('.text').text(), godname: $(el).find('.comment .item .username').text(), godWord: $(el).find('.comment .item .text').text(), href: $(el).find('.text').attr('href') }) }) res.json({ code: 'success', message: '查询成功', data: arr }) }) }) app.listen(3000, () => { console.log('node is ok') }) 效果展示.png 爬虫代码整合.rar 爬虫代码整合.rar

