我的蜘蛛: autospd.py
class AutospdSpider(scrapy.Spider):
name = 'autospd'
start_urls = ['http://news.dayoo.com/guangzhou/150960_2.shtml']
dt_ft = "%Y-%m-%d %H:%M"
def parse(self, response):
list_objs = response.css("div.dy-list>div")
for li in list_objs:
loader = AutopjtItemLoader(item=AutopjtItem(), selector=li, context=self.dt_ft)
print(loader.context.items()) #please see print-1
loader.nested_css("h2>a").add_css("title", "::text")
loader.nested_css("h2>a").add_css("url", "::attr(href)")
loader.nested_css("div.txt-area>div.news-time").add_xpath("pub_time", "string()")
yield loader.load_item()打印-1:dict_items(‘context’,‘%Y-%m- %H:%M'),(’选择器‘,\r\n '>),(’响应‘,无),('item',{})
items.py
def func(value, loader_context):
print(loader_context.items()) # please see print-2
# ft = loader_context.get("context")
# time_dt = datetime.strptime(value, ft)
return value
class AutopjtItemLoader(ItemLoader):
default_output_processor = TakeFirst()
pub_time_in = MapCompose(func)
class AutopjtItem(scrapy.Item):
title = scrapy.Field()
url = scrapy.Field()
pub_time = scrapy.Field()打印-2:[(“选择器”,2019-06-12 08:59< '>),(“响应”,无),(“项目”,{})]
为什么在loader_context中没有“上下文”?
发布于 2019-06-20 01:24:10
def nested_xpath(self, xpath, **context):
selector = self.selector.xpath(xpath)
context.update(selector=selector)
subloader = self.__class__(
item=self.item, parent=self, **context
)
return subloader
def nested_css(self, css, **context):
selector = self.selector.css(css)
context.update(selector=selector)
subloader = self.__class__(
item=self.item, parent=self, **context
)
return subloader从scrapy的源代码中,如果使用nested_css或nested_xpath,则必须添加上下文。例:
loader.nested_css("div.txt-area>div.news-time", dt_ft=self.dt_ft).add_xpath("pub_time", "string()")https://stackoverflow.com/questions/56661794
复制相似问题