在这个类中,我为一个加密货币网站制作了一个webscraper,以获取每种货币的名称和价格。也许我也可以使用API,但我认为最好是通过刮擦获得一些知识。
import requests
from bs4 import BeautifulSoup
import json
class Coins:
def __init__(self):
self.url = "https://www.coinbase.com/price"
self.new_list = []
# Check if I get a 200 status code and if so, create instance from bs4
def get_response(self):
self.response = requests.get(self.url)
if self.response:
print("Access granted")
self.soup = BeautifulSoup(self.response.text, 'lxml')
else:
print("Error")
# Select the class and get the data
def get_data(self):
info = self.soup.find_all(class_="Header__StyledHeader-sc-1q6y56a-0 hZxUBM TextElement__Spacer-sc-18l8wi5-0 hpeTzd")
# Here, I only want to get the name and the price of the crypto, I don't want to get other information and thats why I am using the modulo operator.
for x,e in enumerate(info):
if x % 3 == 2:
# print("I dont need this information")
pass
else:
self.new_list.append(e.text)
return self.new_list
# From the information that I got and appended into a list, make a dict
def make_dict(self):
self.my_dict = {self.new_list[x]:self.new_list[x + 1] for x in range(0, len(self.new_list), 2)}
return self.my_dict
# Save the data into a json file
def save_data(self):
with open('data.json','w') as output:
json.dump(self.my_dict, output, indent=2)
if __name__ == '__main__':
test = Coins()
test.get_response()
test.get_data()
test.make_dict()
test.save_data()我关心的一个问题是:我尝试访问方法中的变量,使用self是个好主意吗?例如,在方法save_data()中。我想在get_data()中传递函数json.dump(),但是它没有成功。因此,我在make_dict()中将变量更改为self.my_dict,以便在save_data()方法中访问它,结果得到了结果,但这是一个好的实践还是根本就没有呢?
这是我第一次在课堂上实际工作。我很感激任何反馈意见来提高我自己。
发布于 2019-10-22 03:59:05
以下是另一个实现:
#!/usr/bin/env python3
from bs4 import BeautifulSoup
from pprint import pprint
import re
import requests
class Coins:
table_re = re.compile('^AssetTable__Table-')
def __init__(self, url="https://www.coinbase.com/price"):
self.url = url
self.session = requests.Session()
# Check if I get a 200 status code and if so, create instance from bs4
def _get_response(self) -> BeautifulSoup:
response = self.session.get(self.url)
response.raise_for_status()
return BeautifulSoup(response.text, 'lxml')
# Select the class and get the data
def get_data(self) -> dict:
soup = self._get_response()
table = soup.find('table', attrs={'class': self.table_re})
prices = {}
for row in table.tbody.find_all('tr'):
cells = row.find_all('td')
if len(cells) < 6:
continue
name = cells[1].find('h4').text
price = cells[2].find('h4').text
prices[name] = price
return prices注意:
access granted。除了老生常谈之外,您应该只在失败时抛出异常,而不是在数据检索方法成功的情况下写到stdout。这样的进度指标最好留给上面的调用函数。https://codereview.stackexchange.com/questions/230994
复制相似问题