我写了一个简单的图像刮刀桌面墙纸项目由福克斯是黑色。该网站是一个奇妙的(但,显然已经死了)博客充满了精彩的图片。然而,没有办法下载一整束图像,因此这个简单的刮板。
我要把代码放上去接受检查。代码样式和/或实现改进的领域有哪些?我很感谢你的反馈。
#!/usr/bin/python
import os
import requests
from bs4 import BeautifulSoup as bs
MAX_PAGES = 1 # Max. number of pages is 41
SAVE_DIRECTORY = 'fox_backgrounds'
BASE_URL = "http://www.thefoxisblack.com/category/the-desktop-wallpaper-project/page"
RESOLUTIONS = [
"1280x800", "1440x900", "1680x1050", "1920x1200", "2560x1440",
"iphone", "iphone-5", "iphone6", "iphone-6-plus", "iphone6plus",
"ipad",
]
def fetch_url(url):
return requests.get(url).text
def clip_url(href):
return href[href.rfind('/'):]
def save_image(href):
print(f"Downloading: {clip_url(href)}")
request = requests.get(href)
with open(f"fox_backgrounds{clip_url(href)}", 'wb') as output:
output.write(request.content)
def get_images_from_page(url):
html = fetch_url(url)
soup = bs(html, "html.parser")
for link in soup.find_all("a", class_="btn_download"):
href = link["href"]
for resolution in RESOLUTIONS:
if resolution in href:
save_image(href)
def make_dir():
os.makedirs(SAVE_DIRECTORY, exist_ok=True)
def get_backgrounds():
make_dir()
for page in range(0, MAX_PAGES):
get_images_from_page(BASE_URL + str(page + 1))
def main():
get_backgrounds()
if __name__ == '__main__':
main()发布于 2019-07-10 14:27:51
建议:
#!/usr/bin/python
import os
import requests
from bs4 import BeautifulSoup as bs
from pathlib import Path
from shutil import copyfileobj
MAX_PAGES = 1 # Max. number of pages is 41
SAVE_DIRECTORY = Path('fox_backgrounds')
BASE_URL = 'http://www.thefoxisblack.com/category/the-desktop-wallpaper-project/page'
RESOLUTIONS = {
'1280x800', '1440x900', '1680x1050', '1920x1200', '2560x1440',
'iphone', 'iphone-5', 'iphone6', 'iphone-6-plus', 'iphone6plus',
'ipad'
}
def fetch_url(url):
return requests.get(url).text
def clip_part(href):
return href.rpartition('/')[-1]
def save_image(href):
part = clip_part(href)
print(f' Downloading: {part}')
fn = SAVE_DIRECTORY / part
with requests.get(href, stream=True) as response, \
open(fn, 'wb') as output:
copyfileobj(response.raw, output)
def get_images_from_page(url):
html = fetch_url(url)
soup = bs(html, 'html.parser')
for link in soup.find_all('a', class_='btn_download'):
href = link['href']
if any(href.endswith(f'-{res}.jpg') for res in RESOLUTIONS):
save_image(href)
else:
print(f'Unknown resolution {href}')
def make_dir():
os.makedirs(SAVE_DIRECTORY, exist_ok=True)
def get_backgrounds():
make_dir()
for page in range(1, MAX_PAGES+1):
print(f'Fetching page {page}...')
get_images_from_page(f'{BASE_URL}{page}')
def main():
get_backgrounds()
if __name__ == '__main__':
main()评论:
SAVE_DIRECTORY,但随后将其用于创建而不是文件写入。RESOLUTIONS应该是一个集合,或者是元组。clip_url有点误导;它返回一个URL部件,而不是一个完整的URL。RESOLUTIONS检查有点令人费解。也许这是个验证步骤?但是,如果这是一个验证步骤,您将无声地失败,而不是打印警告。而且,即使在找到正确的分辨率之后,也要继续迭代。我重写了这个文件,只检查了当前的分辨率,并且对文件名中显示的位置也要小心一点。range(0, ...)是多余的,但是对于您的用例,您最好还是使用range(1。rpartition与您编写的基本相同,但不需要任何花哨的数组切片。clip_path打两次电话下面的版本合理地使用了生成器,这样迭代函数只需要知道它们的迭代,而不需要知道内部业务逻辑。
此外,对于站点上的许多文件,您的分辨率检查需要不区分大小写;并且站点有gif和png图像以及jpg。你错过了一些决议和一些替代的iPhone拼写。我不认为这是值得做一个决议检查,特别是在这些边缘的情况下,但我留下了它。
#!/usr/bin/python
import os
import requests
from bs4 import BeautifulSoup as bs
from pathlib import Path
from shutil import copyfileobj
MAX_PAGES = 1 # Max. number of pages is 41
SAVE_DIRECTORY = Path('fox_backgrounds')
BASE_URL = 'http://www.thefoxisblack.com/category/the-desktop-wallpaper-project/page'
RESOLUTIONS = {
'1280x800', '1440x900', '1680x1050', '1920x1200', '2560x1440', '3840x2400',
'iphone', 'iphone5', 'iphone-5', 'iphone6', 'iphone-6-plus', 'iphone6plus', 'iphone6-plus',
'ipad'
}
def clip_part(href):
return href.rpartition('/')[-1]
def save_image(href):
part = clip_part(href)
print(f' Downloading: {part}')
fn = SAVE_DIRECTORY / part
with requests.get(href, stream=True) as response, \
open(fn, 'wb') as output:
copyfileobj(response.raw, output)
def urls_from_page(url):
soup = bs(requests.get(url).text, 'html.parser')
for link in soup.find_all('a', class_='btn_download'):
href = link['href']
if any(href.lower().contains(f'-{res}.') for res in RESOLUTIONS):
yield href
else:
print(f'Unknown resolution {href}')
def make_dir():
os.makedirs(SAVE_DIRECTORY, exist_ok=True)
def all_urls():
for page in range(1, MAX_PAGES+1):
print(f'Fetching page {page}...')
yield from urls_from_page(f'{BASE_URL}{page}')
def main():
make_dir()
for url in all_urls():
save_image(url)
if __name__ == '__main__':
main()https://codereview.stackexchange.com/questions/223858
复制相似问题