我正在尝试子类Scrapy的XPathSelector和补丁,以支持CSS3选择器。
XPathSelector的定义如下:
class XPathSelector(object_ref):
__slots__ = ['doc', 'xmlNode', 'expr', '__weakref__']
def __init__(self, response=None, text=None, node=None, parent=None, expr=None):
if parent is not None:
self.doc = parent.doc
...I子类XPathSelector和重写__init__
class CSSSelector(XPathSelector):
def __init__(self, *args, **kwargs):
translator = kwargs.get('translator', 'html').lower()
if 'translator' in kwargs:
del kwargs['translator']
super(XPathSelector, self).__init__(*args, **kwargs)当我尝试使用CSSSelector时,我会得到doc、xmlNode和expr的AttributeError错误。手动将这些插槽添加到CSSSelector中也无济于事。
用__slot__s子类的正确方法是什么?
我的完整代码在这里:
"""
Extends `XPathSelector` to allow CSS3 selectors via the `cssselect` library.
"""
from cssselect import HTMLTranslator, GenericTranslator
from scrapy.selector import XPathSelector, XPathSelectorList
__all__ = ['CSSSelector', 'CSSSelectorList']
class CSSSelector(XPathSelector):
__slots__ = ['doc', 'xmlNode', 'expr', 'translator']
def __init__(self, *args, **kwargs):
translator = kwargs.get('translator', 'html').lower()
if 'translator' in kwargs:
del kwargs['translator']
super(CSSSelector, self).__init__(*args, **kwargs)
if translator == 'html':
self.translator = HTMLTranslator()
elif translator == 'xhtml':
self.translator = HTMLTranslator(xhtml=True)
elif translator == 'xml':
self.translator = GenericTranslator()
else:
raise ValueError("Invalid translator: %s. Valid translators are 'html' (default), 'xhtml' and 'xml'." % translator)
def _select_xpath(self, xpath):
if hasattr(self.xmlNode, 'xpathEval'):
self.doc.xpathContext.setContextNode(self.xmlNode)
xpath = unicode_to_str(xpath, 'utf-8')
try:
xpath_result = self.doc.xpathContext.xpathEval(xpath)
except libxml2.xpathError:
raise ValueError("Invalid XPath: %s" % xpath)
if hasattr(xpath_result, '__iter__'):
return CSSSelectorList([self.__class__(node=node, parent=self, expr=xpath) for node in xpath_result])
else:
return CSSSelectorList([self.__class__(node=xpath_result, parent=self, expr=xpath)])
else:
return CSSSelectorList([])
def select(self, selector):
xpath = self.translator.css_to_xpath(selector)
return self._select_xpath(xpath)
def attribute(self, name):
return self._select_xpath('self::@' + name)
def text(self):
return self._select_xpath('self::text()')
class CSSSelectorList(XPathSelectorList):
def attribute(self, name):
return [x.attribute(name) for x in self]
def text(self, name):
return [x.text() for x in self]我可以很好地初始化这个类:
>>> css_selector = CSSSelector(response)但我发现AttributeError到处都是:
>>> css_selector.select('title')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-150-d21b0f17d4cc> in <module>()
----> 1 css_selector.select('title')
<ipython-input-147-c855c7eaf9fa> in select(self, selector)
57
58
---> 59 return self._select_xpath(xpath)
60
61
<ipython-input-147-c855c7eaf9fa> in _select_xpath(self, xpath)
34
35 def _select_xpath(self, xpath):
---> 36 if hasattr(self.xmlNode, 'xpathEval'):
37 self.doc.xpathContext.setContextNode(self.xmlNode)
38 xpath = unicode_to_str(xpath, 'utf-8')
AttributeError: xmlNode发布于 2013-01-06 22:47:12
使用__slots__没有任何问题。问题是您没有从子类调用__init__ of XPathSelector。
而不是super(XPathSelector, self),应该有super(CSSSelector, self)
class CSSSelector(XPathSelector):
def __init__(self, *args, **kwargs):
# ...
super(CSSSelector, self).__init__(*args, **kwargs)请参阅Python中有关超级代码的一个很好的主题:()方法。
UPD。
值得注意的是,如果使用__slots__扩展类,通常也应该在子类中添加__slots__,至少是空类。否则,无论如何都会创建一个每个实例的字典,使得基类的__slots__实际上是无用的.来自Python引用
__slots__声明的操作仅限于定义它的类。因此,子类将有一个__dict__,除非它们也定义__slots__(必须只包含任何附加插槽的名称)。
https://stackoverflow.com/questions/14187571
复制相似问题