在木偶机上运行,都更新了。
预期的过程是转到网站,其中url是url/{search item},并运行搜索名称列表。然后,对于每个搜索项目-->搜索页面,获取每个列表的名称、价格和图像url。现在有错误,它找不到选择器。非常感谢在这方面的任何帮助,非常感谢!
网站数据的布局如下:
<div class="items-box-content">
<section class="items-box">
<a href="https://listingurl">
<figure class="items-box-photo">
<img data-src="https://imageurl.jpg" class=" lazyloaded" src="https://imageurl.jpg">
</figure>
<div class="items-box-main">
<h3 class="items-box-name"> listing name </h3>
<div class="items-box-figure">
<div class="items-price font-4"> $29.95 </div> // item's price
</h3>
</div>和我现在拥有的是(抛出错误):
const puppeteer = require('puppeteer');
const searches = ["a", "b", "c"]; //appended to url
(async () => {
const browser = await puppeteer.launch({ headless: false });
let results =[];
for (const search of searches) {
try {
page = await browser.newPage();
await page.goto(`https://weburl/?keyword=${search}`);
await page.evaluate(() => { document.querySelector('div[class*="items-box"]').scrollIntoView();});
let elements = await page.$$('div[class*="items-box"]');
for (let element of elements) {
let listImg = await element.$eval(('img[class="items-box-photo]'), img => img.getAttribute('src'));
let listTitle = await element.$eval(('d[class="items-box-main"] > h[class="items-box-name"]'), node => node.innerText.trim());
let listPrice = await element.$eval(('d[class="items-box-figure"] > d[class="items-price"]'), node => node.innerText.trim());
let listUrl = await element.$eval(('d[class="items-box-content"] > a[class*="items-box"]'), node => node.getAttribute('href'));
results.push({
listImg,
listTitle,
listPrice,
listUrl
})
return results;
}
} finally {
await page.close
}
}
})();引发的错误是
(节点:5168) UnhandledPromiseRejectionWarning:错误:错误:找不到元素匹配选择器“imgclass=”项-框-照片“
发布于 2019-04-28 12:11:03
问题就在错误消息(Error: failed to find element matching selector ...)中。
选择器在以下几行中是错误的:
let listImg = await element.$eval(('img[class="items-box-photo]'), img => img.getAttribute('src'));
let listTitle = await element.$eval(('d[class="items-box-main"] > h[class="items-box-name"]'), node => node.innerText.trim());
let listPrice = await element.$eval(('d[class="items-box-figure"] > d[class="items-price"]'), node => node.innerText.trim());
let listUrl = await element.$eval(('d[class="items-box-content"] > a[class*="items-box"]'), node => node.getAttribute('href'));根据您提供的HTML代码,这些代码应该是:
let listImg = await element.$eval('img.lazyloaded', img => img.getAttribute('src'));
let listTitle = await element.$eval('h3.items-box-name', node => node.innerText.trim());
let listPrice = await element.$eval('div.items-price', node => node.innerText.trim());
let listUrl = await element.$eval('div.items-box-content a', node => node.getAttribute('href'));注意,与使用[class=...]不同,查询类的正确方法是使用类选择器:.
发布于 2019-04-28 06:48:31
我用测试/调试更新了您的代码。
const puppeteer = require('puppeteer');
const searches = ["a"];
(async () => {
const browser = await puppeteer.launch({ headless: false });
function delay(timeout) {
return new Promise((resolve) => {
setTimeout(resolve, timeout);
});
}
let results = [];
for (const search of searches) {
try {
page = await browser.newPage();
await page.goto(`https:url/`);
await page.evaluate(() => { document.querySelector('section[class*="items-box"]').scrollIntoView(); });
let elements = await page.$$('section[class*="items-box"]');
console.log(elements.length)
console.log('wait 6 seconds')
await delay(6000);
for (let element of elements) {
// await delay(6000);
let listImg = await element.$eval(('img'), img => img.getAttribute('src'));
let listTitle = await element.$eval(('h3[class="items-box-name font-2"]'), node => node.innerText.trim());
let listPrice = await element.$eval(('div[class="items-box-price font-5"]'), node => node.innerText.trim());
let listUrl = await element.$eval(('div[class="items-box-content clearfix"] a'), node => node.getAttribute('href'));
results.push({
listImg,
listTitle,
listPrice,
listUrl
});
}
debugger;
} catch (error) {
console.log(error)
} finally {
//await page.close
await browser.close
}
}
console.log(results)
return results;
})();更新内容:
return result在for循环中的应用for(){
return result;
}=>
for(){
}
return result;querySelectorsection[class*="items-box"]
img // There is only one img tags in "element"
h3[class="items-box-name font-2"] // removed outer 'element'
div[class="items-box-figure"] > div[class="items-price font-4"]
div[class="items-box-price font-5 // updated class name? on my side
items-box-price
div[class="items-box-content clearfix"] a try catch finally
catch帮助您处理下一个步骤,尽管在一个步骤中崩溃。https://stackoverflow.com/questions/55887523
复制相似问题