iCrawler非常适合下载图片。但是,我想不出如何保存图像源urls来进行归属。我计划用图像制作幻灯片,并将qr代码嵌入到图像中,以便归属。我正在使用进行修改,如下所示:
#!/usr/bin/env python3
from icrawler.builtin import GoogleImageCrawler
import os
import time
import random
from PIL import Image
from PIL.ExifTags import TAGS
import json
wordlist = open("/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/good_words.txt").read().splitlines()
pausecounter=0
for word in wordlist:
with open('/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/data/add.json') as f:
data = json.load(f)
example = data[0]["meanings"][0]["definitions"][0]["example"]
pausecounter = pausecounter + 1;
pathish="/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/images/"
pathish2="/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/images-compressed/"
pathish3="/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/images-videoed/"
if (os.path.exists(pathish3+word+'.mp4')):
print("found word: "+word)
time.sleep(0.033)
continue
searchphrase = word+" "+example
google_Crawler = GoogleImageCrawler(storage = {'root_dir': r'images'})
google_Crawler.crawl(keyword = searchphrase, max_num = 15, filters = dict(
license='commercial,modify'))
for number in range(1,16):
filename=pathish+str.zfill(str(number), 6)
newfilename=pathish+word+"_"+str.zfill(str(number), 6)
compressedpath="/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/images-compressed/"+word+"_"+str.zfill(str(number), 6)+".jpg"
compressedpathqmp4="/home/scott/unity-projects/spelling_bee/spelling-game-python-prototype/images-videoed/"+word+".mp4"
print(newfilename)
print(filename)
if (os.path.exists(filename+'.jpg')):
os.rename(filename+'.jpg', newfilename+'.jpg')
os.system("convert "+newfilename+".jpg -resize 512x512> -compose Copy -gravity center -extent 512x512 -quality 98 "+compressedpath)
if (os.path.exists(filename+'.png')):
os.rename(filename+'.png', newfilename+'.png')
os.system("convert "+newfilename+".png -resize 512x512> -background white -alpha remove -alpha off -compose Copy -gravity center -extent 512x512 -quality 98 "+compressedpath)
os.system("ffmpeg -i images-compressed/"+word+"_%06d.jpg -vf \"scale=512:512,setpts=80*PTS\" -crf 38 -profile:v high -level:v 5.1 -pix_fmt yuv420p "+compressedpathqmp4)
time.sleep(random.random()*pausecounter)
if pausecounter > 10:
pausecounter = 0对于保存的每个图像,我想将一个包含其源url的文件保存在一个单独的文件中。
发布于 2021-12-21 22:37:33
我黑了一些文档的例子。我不是蟒蛇专家,所以我无法解释这是如何工作的,但这就是我解决问题的方法。希望您可以跟随变量名称。因为我不确定这是怎么回事,但确实如此。唯一的缺陷是它释放了一个额外的url文件。
#!/usr/bin/env python3
import base64
from icrawler import ImageDownloader
from icrawler.builtin import GoogleImageCrawler
from six.moves.urllib.parse import urlparse
class getAttribution(ImageDownloader):
def get_filename(self, task, default_ext):
global word
url = urlparse(task['file_url'])[0] + "://" +urlparse(task['file_url'])[1] + urlparse(task['file_url'])[2]
filename = super(getAttribution, self).get_filename(
task, default_ext)
print(url +" "+ filename)
file = open("test-delete/"+word +"_"+ filename+".txt","w")
file.write(url)
file.close()
return word +"_"+ filename
google_crawler = GoogleImageCrawler(
downloader_cls=getAttribution,
downloader_threads=1,
storage={'root_dir': 'test-delete'})
word='sand'`enter code here`
google_crawler.crawl(word, max_num=12, filters = dict(
license='commercial,modify'))https://stackoverflow.com/questions/70384322
复制相似问题