我一直在研究刮刀。现在我已经创建了两个刮板,它们分别创建了一个对象数组。
现在,我已经设法抓取数据,将它们放在单独的数组中,创建组件parseData,在其中发送这两个对象数组。现在我想创建最后一个对象数组,它将被发送到DB以插入数据。
下面是我试图合并这两个数组的地方:
const result = []
exports.parseData = (apartments) => {
result.push(apartments);
console.log(result);
};这并没有给出我所期望的输出。我预计两个数组将合并,而不是推入一个数组。我明白为什么会发生这种事-因为推法
这两项产出分别是:
apartments in olxScraper
[
{
id: '41676279',
link: 'www.link.com',
description: 'Description of apartment',
price: 387000,
picture: 'link to picture',
squaremeters: 86,
pricepersquaremeter: 4500
},
{
//same object as before but different data
}
]
apartments in santScraper
[
{
id: '3524',
link: 'www.linkSecondScraper.com',
description: 'Description',
price: 150000,
picture: 'picture',
squaremeters: 55,
pricepersquaremeter: 2727
},
{
//same object as before but different data
}
]它们是具有相同键的相同数组:对象中的值对。唯一的区别是数组中的对象数。
那么合并这两者的正确方法是什么呢?我只想要一个数组,其中包含这两个数组中的对象。
这里也是创建数组的地方:
const parsing = require('../data-functions/parseData');
const axios = require('axios');
const cheerio = require('cheerio');
exports.olxScraper = (count) => {
const url = `https://www.olx.ba/pretraga?vrsta=samoprodaja&kategorija=23&sort_order=desc&kanton=9&sacijenom=sacijenom&stranica=${count}`;
const apartments = [];
const getRawData = async () => {
try {
await axios.get(url).then((res) => {
const $ = cheerio.load(res.data);
$('div[id="rezultatipretrage"] > div')
.not('div[class="listitem artikal obicniArtikal i index"]')
.not('div[class="obicniArtikal"]')
.each((index, element) => {
$('span[class="prekrizenacijena"]').remove();
const getLink = $(element)
.find('div[class="naslov"] > a')
.attr('href');
const getDescription = $(element)
.find('div[class="naslov"] > a > p')
.text();
const getPrice = $(element)
.find('div[class="datum"] > span')
.text()
.replace(/\.| ?KM$/g, '')
.replace(' ', '');
const getPicture = $(element)
.find('div[class="slika"] > img')
.attr('src');
apartments[index] = {
id: getLink.substring(27, 35),
link: getLink,
description: getDescription,
price: parseFloat(getPrice),
picture: getPicture,
};
});
const fetchSquaremeters = Promise.all(
apartments.map((item) => {
return axios.get(item.link).then((response) => {
const $ = cheerio.load(response.data);
const getSquaremeters = $('div[class="df2 "]')
.first()
.text()
.replace('m2', '')
.replace(',', '.')
.split('-')[0];
item.squaremeters = parseFloat(getSquaremeters);
item.pricepersquaremeter = Math.round(
parseFloat(item.price) / parseFloat(getSquaremeters)
);
});
})
);
fetchSquaremeters.then(() => {
//console.log(olxApartments);
parsing.parseData(apartments);
});
});
} catch (error) {
console.log(error);
}
};
getRawData();
};const parsing = require('../data-functions/parseData');
const axios = require('axios');
const cheerio = require('cheerio');
exports.santScraper = (count) => {
const url = `https://www.sant.ba/pretraga/prodaja-1/tip-2/cijena_min-20000/stranica-${count}`;
const apartments= [];
const getRawData = async () => {
try {
await axios.get(url).then((response) => {
const $ = cheerio.load(response.data);
$('div[class="col-xxs-12 col-xss-6 col-xs-6 col-sm-6 col-lg-4"]').each(
(index, element) => {
const getLink = $(element).find('a[class="re-image"]').attr('href');
const getDescription = $(element).find('a[class="title"]').text();
const getPrice = $(element)
.find('div[class="prices"] > h3[class="price"]')
.text()
.replace(/\.| ?KM$/g, '')
.replace(',', '.');
const getPicture = $(element).find('img').attr('data-original');
const getSquaremeters = $(element)
.find('span[class="infoCount"]')
.first()
.text()
.replace(',', '.')
.split('m')[0];
const pricepersquaremeter =
parseFloat(getPrice) / parseFloat(getSquaremeters);
apartments[index] = {
id: getLink.substring(42, 46),
link: getLink,
description: getDescription,
price: Math.round(getPrice),
picture: getPicture,
squaremeters: Math.round(getSquaremeters),
pricepersquaremeter: Math.round(pricepersquaremeter),
};
}
);
parsing.parseData(apartments);
});
} catch (error) {
console.log(console.log(error));
}
};
getRawData();
};编辑还有一个问题:我需要以某种方式告诉这个函数等待两个数组的到来,因为它首先单独推送一个数组,然后当第二个数组进入时,它们都被推入。
[
[
{//objects of sant}
]
]
[
[
{//objects of sant}
],
[
{//objects of olx}
]
]发布于 2021-02-16 08:44:35
您的parseData函数例外于2个参数。
exports.parseData = (olxApartments, santApartments) => {
const result = [...olxApartments, ...santApartments];
console.log(result);
};但是,在调用函数的两种情况下,只传递一个参数。
parsing.parseData(olxApartments);
parsing.parseData(santApartments);因此,您将得到这个错误,因为在parseData函数中,santApartments是未定义的,因此不可迭代。两次调用该函数时,只向其传递一个参数,然后将其作为olxApartments引用。
parseData不存储这些参数的值。如果要组合olxApartments和santApartments,则需要在高于此函数的范围内创建一个数组,并将相关数据推送到该数组。
根据问题添加信息。下面的代码可能对您有所帮助:
const apartmentsData = [];
const getOlxApartments = () => {
let olxApartments = [];
olxApartments.push({
id: "41676279",
});
olxApartments.push({
id: "41676280",
});
parseData(olxApartments);
};
const getSantApartments = () => {
let santApartments = [];
santApartments.push({
id: "3524",
});
santApartments.push({
id: "3521",
});
parseData(santApartments);
};
const parseData = (apartments) => {
apartmentsData.push(...apartments);
};
getOlxApartments();
getSantApartments();
console.log(apartmentsData);
// Array(4)
// 0: {id: "41676279"}
// 1: {id: "41676280"}
// 2: {id: "3524"}
// 3: {id: "3521"}https://stackoverflow.com/questions/66221035
复制相似问题