1 爬取某个URL页面 var http = require('http')// 所要爬取的页面urlvar url = '...'http.get(url, function (res) {
1 爬取某个URL页面
var http = require('http')// 所要爬取的页面urlvar url = '...'http.get(url, function (res) { var html = '' res.on('data',function (data) { html += data }) res.on('end',function () { console.log(html) })}).on('error',function () { console.log('获取页面数据出错!')})
2 通过cheerio解析数据
首先安装cheerio模块: npm install cheerio
引入cheerio模块:require('cheerio')
var http = require('http')var cheerio = require('cheerio')var url = '...'http.get(url, function (res) { var html = '' res.on('data',function (data) { html += data }) res.on('end',function () { var html_obj = filterData(html) printInfo(html_obj) })}).on('error',function () { console.log('获取页面数据出错!')})function filterData(html) { var $ = cheerio.load(html) var payObj = $('.payModeContent') var pay_types = payObj.find('.payName') var pay_icons = payObj.find('.payIcon img') var pay_type_array = [] var pay_icon_array = [] pay_types.each(function () { var pay_type = $(this).text() pay_type_array.push(pay_type) }) pay_icons.each(function () { var pay_icon = $(this).attr('src').split('images/')[1] pay_icon_array.push(pay_icon) }) var payInfo = { payType:pay_type_array, payIcon:pay_icon_array } return payInfo;}function printInfo(obj) { console.log(obj)}
结果:
{ payType: [ '微信支付方式', '支付宝钱包支付', '储蓄卡支付' ],
payIcon: [ 'weichatIcon.png', 'alipayIcon.png', 'depositcardpayIcon.png' ] }