如何使用python.please帮助me.this中的scrapy从网站获取图像url是我的代码
from scrapy.spiders import CrawlSpider, Rule
#from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.item import Item, Field
class MyItem(Item):
url= Field()
class someSpider(CrawlSpider):
name = 'crawltest'
allowed_domains = ['bambeeq.com']
start_urls = ['http://www.bambeeq.com/']
rules = (Rule(LinkExtractor(allow=()), callback='parse_obj', follow=True),)
def parse_obj(self,response):
item = MyItem()
item['url'] = []
for link in LinkExtractor(allow=(),deny = self.allowed_domains).extract_links(response):
item['url'].append(link.url)
#item['image'].append(link.img)
return item发布于 2016-03-10 00:57:32
您提取的是链接('a‘元素),而不是图像('img’元素)。试试这个:
# iterate over the list of images
for image in response.xpath('//img/@src').extract():
# make each one into a full URL and add to item[]
item['url'].append(response.urljoin(image))
yield itemhttps://stackoverflow.com/questions/35896008
复制相似问题