首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >Nightmare.js和Vo.js -提前退出循环

Nightmare.js和Vo.js -提前退出循环
EN

Stack Overflow用户
提问于 2016-10-13 17:19:50
回答 1查看 671关注 0票数 0

我正在使用Nightmare.js构建一个屏幕抓取器,并使用Vo.js来控制流程。我有一个来自CSV的邮政编码数组,遍历邮政编码,执行搜索,跟踪搜索结果中的每个链接,并抓取该页面。然而,它似乎只使用第一个邮政编码执行搜索,似乎提前退出循环。有什么想法吗?下面是我的代码:

代码语言:javascript
复制
var Nightmare = require('nightmare'),
    vo = require('vo'),
    fs = require('fs'),
    parse = require('csv-parse'),
    csvWriter = require('csv-write-stream'),
    nightmare = Nightmare(),
    writer = csvWriter(),
    path = process.argv[2]

var searchByPostcode = function*(postcode) {
  var result = yield nightmare
    .goto(URL)
    .select('#body_umbBodyContent_BranchSearch_1_ddlRadius', 20)
    .type('input[id=body_umbBodyContent_BranchSearch_1_txtLocation]', postcode)
    .click('#body_umbBodyContent_BranchSearch_1_btnSearch')
    .wait('.resultsarea .result')
    .evaluate(function () {
      var agents = []
      var results = $('.result a, .alternate_result a')
      urls = results.map(function(r) { return BASE_URL + $(this).attr('href') })
      return urls
    })

  return result
}

var getDetail = function*(url) {
  var result = yield nightmare
    .goto(url)
    .wait('.wizard')
    .evaluate(function() {
      var company = $("div.divlabel:contains('Company:')").next('div').find('a').attr('href')
      var name = $('h1.tint').text().trim()
      var address = $('#address_container div:nth-child(2)').text().trim()
      var website = $("div.divlabel:contains('Website:')").next('div').find('a').attr('href')
      var tel = $("div.divlabel:contains('Telephone:')").next('div').text().trim()
      var email = $("div.divlabel:contains('E-Mail:')").next('div').find('a').text().trim()

      return {
        url: document.URL,
        company: company,
        name: name,
        address: address,
        website: website,
        tel: tel,
        email: email
      }
    })

  return result
}

var run = function*() {
  var agents = []
  var postcodes = fs.readFileSync(path, 'utf8').split('\n')

  for (var i = 0, len = postcodes.length; i < len; i++) {
    console.log(postcodes[i])
    var urls = yield searchByPostcode(postcodes[i])
    console.log(urls)

    for (var i = 0, len = urls.length; i < len; i++) {
      var agent = yield getDetail(urls[i])
      if (agent.email == '' & agent.company != '') {
        company = yield getDetail('http://www.arla.co.uk/' + agent.company)
        agent.website = agent.website == undefined ? company.website : agent.website
        agent.email = agent.email == '' ? company.email : agent.email
        agent.tel = agent.tel == '' ? company.tel : agent.tel
      }
      agents.push(agent)
    }

  }

  yield nightmare.end();

  return agents
}

vo(run)()
  .then(function(agents) {
    writer.pipe(fs.createWriteStream('results.csv'))
    for (var i = 0, len = agents.length; i < len; i++) {
      writer.write(agents[i])
    }
    writer.end()
  })
  .catch(e => console.error(e))
EN

回答 1

Stack Overflow用户

发布于 2016-10-14 16:22:54

是啊,结果证明我是个笨蛋。我将循环中的两个索引计数器都设置为i,因此在第一次搜索后,i将设置为url计数,因此一旦解析完所有url,就会退出循环。哦!

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/40016981

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档