我一直试图从这个网站上提取数据:https://www.webuycars.co.za/buy-a-car。我已经查看了dev tools>network>xhr的回复,但我试图接收更多的数据,而不仅仅是从车辆的第一页的结果。到目前为止,这是代码:
import json
import scrapy
class carSpider(scrapy.Spider):
name = 'car'
body = {"to":24,"size":24,"type":"All","filter_type":"all","subcategory":None,"q":"audi","Make":None,"Roadworthy":None,"Auctions":[],"Model":None,"Variant":None,"DealerKey":None,"FuelType":None,"BodyType":None,"Gearbox":None,"AxleConfiguration":None,"Colour":None,"FinanceGrade":None,"Priced_Amount_Gte":0,"Priced_Amount_Lte":0,"MonthlyInstallment_Amount_Gte":0,"MonthlyInstallment_Amount_Lte":0,"auctionDate":None,"auctionEndDate":None,"auctionDurationInSeconds":None,"Kilometers_Gte":0,"Kilometers_Lte":0,"Priced_Amount_Sort":"","Bid_Amount_Sort":"","Kilometers_Sort":"","Year_Sort":"","Auction_Date_Sort":"","Auction_Lot_Sort":"","Year":[],"Price_Update_Date_Sort":"","Online_Auction_Date_Sort":"","Online_Auction_In_Progress":""}
def start_requests(self):
yield scrapy.Request(
url='https://website-elastic-api.webuycars.co.za/api/search',
callback=self.parse,
body=json.dumps(self.body),
method="POST")
def parse(self, response):
response = json.loads(response.body)
for resp in response['data']:
yield {
'Title': resp['OnlineDescription']
}这是我收到的数据:
2022-05-04 22:21:42 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2020 Nissan Almera 1.5 Acenta Auto'}
2022-05-04 22:21:42 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Citroen C3 Aircross 1.2T Puretech Sine Auto'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Toyota Hilux 2.4 Gd-6 RB Raider Pick Up Double Cab'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2013 Hyundai i10 1.25 Gls/fluid Auto'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2019 SYM Symphony JET 14 200'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2019 Nissan Micra 1.2 Active Visia'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2021 Suzuki Super Carry 1.2i Pick Up Single Cab'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Suzuki AN UB 125 (burgman)'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Honda XRL XR 125l'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Toyota Hilux 2.4 Gd-6 RB Raider Pick Up Double Cab'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Land Rover Defender 110 D300 SE X-Dynamic (221 KW)'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Big Boy TSR 250'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2019 Renault Kwid 1.0 Dynamique 5-Door'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2013 Tata Indigo 1.4 Manza Ignis'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2018 Datsun GO 1.2 LUX'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2021 Renault Kiger 1.0 Energy ZEN'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2020 Crosby Adventure Bike 400cc'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2012 Jeep Compass 2.0 LTD'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2021 Crosby Adventure Bike 400cc'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Renault Kwid 1.0 Climber 5-Door'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2019 Suzuki Swift 1.2 GLX'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2022 Volkswagen Polo Classic GP 1.4 Comfortline'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2020 Renault Kwid 1.0 Climber 5-Door Auto'}
2022-05-04 22:21:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://website-elastic-api.webuycars.co.za/api/search>
{'Title': '2019 Yamaha YZ 450 FX'}但是,我一直试图查询audi,如body变量(“q”:“audi”)中代码的第7行所示。无论我如何尝试,我总是收到同样的数据,这都是从第一页。我想我在scrapy.Request中解析我的身体是有问题的,但我不确定。我尝试过不同的主体格式,比如让它成为一个字符串,直接从响应中复制和粘贴有效负载,也不解析它为json格式,而是一个字符串。任何帮助都将不胜感激。
发布于 2022-05-04 21:21:18
实际上,您希望从多个页面获取数据,这意味着要进行分页,如果是,那么您可以遵循下一个工作解决方案。
import json
import scrapy
from scrapy.crawler import CrawlerProcess
class CarsSpider(scrapy.Spider):
name = 'car'
body = {"to":24,"size":24,"type":"All","filter_type":"all","subcategory":None,"q":"","Make":None,"Roadworthy":None,"Auctions":[],"Model":None,"Variant":None,"DealerKey":None,"FuelType":None,"BodyType":None,"Gearbox":None,"AxleConfiguration":None,"Colour":None,"FinanceGrade":None,"Priced_Amount_Gte":0,"Priced_Amount_Lte":0,"MonthlyInstallment_Amount_Gte":0,"MonthlyInstallment_Amount_Lte":0,"auctionDate":None,"auctionEndDate":None,"auctionDurationInSeconds":None,"Kilometers_Gte":0,"Kilometers_Lte":0,"Priced_Amount_Sort":"","Bid_Amount_Sort":"","Kilometers_Sort":"","Year_Sort":"","Auction_Date_Sort":"","Auction_Lot_Sort":"","Year":[],"Price_Update_Date_Sort":"","Online_Auction_Date_Sort":"","Online_Auction_In_Progress":""}
def start_requests(self):
yield scrapy.Request(
url='https://website-elastic-api.webuycars.co.za/api/search',
callback=self.parse,
body=json.dumps(self.body),
method="POST",
headers= {
"content-type": "application/json",
"User-Agent":"mozilla/5.0"
}
)
def parse(self, response):
response = json.loads(response.body)
for item in range(0,6528,24):
response['total']['value']=item
for resp in response['data']:
yield {
'Title': resp['OnlineDescription']
}
if __name__ == "__main__":
process =CrawlerProcess()
process.crawl()
process.start()输出:
'downloader/response_status_count/200': 1,
'item_scraped_count': 6528,https://stackoverflow.com/questions/72119300
复制相似问题