for page in range(0,26): #Remember to update the number of pages
url = 'https://www.metacritic.com/game/switch/pokemon-sword/user-reviews?page='+str(page)
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36'}
response = requests.get(url, headers = user_agent)
#time.sleep(rand.randint(3,30))
soup = BeautifulSoup(response.text, 'html.parser')
for review in soup.find_all('div', class_='review_content'):
if review.find('div', class_='name') == None:
break
review_dict['name'].append(review.find('div', class_='name').find('a').text)
review_dict['date'].append(review.find('div', class_='date').text)
review_dict['rating'].append(review.find('div', class_='review_grade').find_all('div')[0].text)
if review.find('span', class_='blurb blurb_expanded'):
review_dict['review'].append(review.find('span', class_='blurb blurb_expanded').text)
else:
review_dict['review'].append(review.find('div', class_='review_body').find('span').text)这段代码来自https://towardsdatascience.com/web-scraping-metacritic-reviews-using-beautifulsoup-63801bbe200e
我试图从某个网站获得所有的评论,但我无法获得“评论”来这样做
发布于 2020-05-26 05:26:25
更改用户标头,使其与您链接的示例中的标头匹配。
headers = {'User-agent': 'Mozilla/5.0'}如果没有它,就不会在页面请求的超文本标记语言中返回review-content类。尝试print(soup),您将看到两个超文本标记语言响应之间的差异。看起来服务器正在根据此指定的标头更改响应。
https://stackoverflow.com/questions/62010802
复制相似问题