本文整理匯總了Python中scrapy.utils.misc.arg_to_iter方法的典型用法代碼示例。如果您正苦於以下問題:Python misc.arg_to_iter方法的具體用法?Python misc.arg_to_iter怎麽用?Python misc.arg_to_iter使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scrapy.utils.misc
的用法示例。
在下文中一共展示了misc.arg_to_iter方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_value
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def get_value(self, value, *processors, **kw):
regex = kw.get('re', None)
if regex:
value = arg_to_iter(value)
value = flatten(extract_regex(regex, x) for x in value)
for proc in processors:
if value is None:
break
_proc = proc
proc = wrap_loader_context(proc, self.context)
try:
value = proc(value)
except Exception as e:
raise ValueError("Error with processor %s value=%r error='%s: %s'" %
(_proc.__class__.__name__, value,
type(e).__name__, str(e)))
return value
示例2: __call__
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def __call__(self, value, loader_context=None):
values = arg_to_iter(value)
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
for v in values:
try:
next_values += arg_to_iter(func(v))
except Exception as e:
raise ValueError("Error in MapCompose with "
"%s value=%r error='%s: %s'" %
(str(func), value, type(e).__name__,
str(e)))
values = next_values
return values
示例3: __init__
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
restrict_xpaths, canonicalize, deny_extensions, restrict_css, restrict_text):
self.link_extractor = link_extractor
self.allow_res = [x if isinstance(x, _re_type) else re.compile(x)
for x in arg_to_iter(allow)]
self.deny_res = [x if isinstance(x, _re_type) else re.compile(x)
for x in arg_to_iter(deny)]
self.allow_domains = set(arg_to_iter(allow_domains))
self.deny_domains = set(arg_to_iter(deny_domains))
self.restrict_xpaths = tuple(arg_to_iter(restrict_xpaths))
self.restrict_xpaths += tuple(map(self._csstranslator.css_to_xpath,
arg_to_iter(restrict_css)))
self.canonicalize = canonicalize
if deny_extensions is None:
deny_extensions = IGNORED_EXTENSIONS
self.deny_extensions = {'.' + e for e in arg_to_iter(deny_extensions)}
self.restrict_text = [x if isinstance(x, _re_type) else re.compile(x)
for x in arg_to_iter(restrict_text)]
示例4: __init__
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
tags=('a', 'area'), attrs=('href',), canonicalize=False,
unique=True, process_value=None, deny_extensions=None, restrict_css=(),
strip=True, restrict_text=None):
tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
tag_func = lambda x: x in tags
attr_func = lambda x: x in attrs
lx = LxmlParserLinkExtractor(
tag=tag_func,
attr=attr_func,
unique=unique,
process=process_value,
strip=strip,
canonicalized=canonicalize
)
super(LxmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
allow_domains=allow_domains, deny_domains=deny_domains,
restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
canonicalize=canonicalize, deny_extensions=deny_extensions,
restrict_text=restrict_text)
示例5: __init__
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
tags=('a', 'area'), attrs=('href',), canonicalize=False, unique=True,
process_value=None, deny_extensions=None, restrict_css=(),
strip=True, restrict_text=()):
warnings.warn(
"SgmlLinkExtractor is deprecated and will be removed in future releases. "
"Please use scrapy.linkextractors.LinkExtractor",
ScrapyDeprecationWarning, stacklevel=2,
)
tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
tag_func = lambda x: x in tags
attr_func = lambda x: x in attrs
with warnings.catch_warnings():
warnings.simplefilter('ignore', ScrapyDeprecationWarning)
lx = BaseSgmlLinkExtractor(tag=tag_func, attr=attr_func,
unique=unique, process_value=process_value, strip=strip,
canonicalized=canonicalize)
super(SgmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
allow_domains=allow_domains, deny_domains=deny_domains,
restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
canonicalize=canonicalize, deny_extensions=deny_extensions,
restrict_text=restrict_text)
示例6: another_process_item
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def another_process_item(self, result, item, info):
"""
custom process_item func,so it will manage the Request result.
"""
assert isinstance(result, (Item, Request)), \
"WoaiduBookFile pipeline' item_completed must return Item or Request, got %s" % \
(type(result))
if isinstance(result, Item):
return result
elif isinstance(result, Request):
dlist = [self._process_request(r, info) for r in arg_to_iter(result)]
dfd = DeferredList(dlist, consumeErrors=1)
dfd.addCallback(self.item_completed, item, info)
# XXX:This will cause one item maybe return many times,it depends on how many
# times the download url failed.But it doesn't matter.Because when raise errors,
# the items are no longer processed by further pipeline components.And when all
# url download failed we can drop that item which book_file or book_file_url are
# empty.
return dfd.addCallback(self.another_process_item, item, info)
else:
raise NofilesDrop
示例7: process_item
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def process_item(self, item, spider):
if 'meta' not in spider.name:
return item
info = self.spiderinfo
requests = arg_to_iter(self.get_media_requests(item, info))
dlist = [self._process_request(r, info) for r in requests]
dfd = DeferredList(dlist, consumeErrors=1)
return dfd.addCallback(self.item_completed, item, info)
示例8: process_item
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def process_item(self, item, spider):
info = self.spiderinfo
requests = arg_to_iter(self.get_media_requests(item, info))
dlist = [self._process_request(r, info, item, spider) for r in requests]
dfd = DeferredList(dlist, consumeErrors=1)
return dfd.addCallback(self.item_completed, item, info)
示例9: _add_value
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def _add_value(self, field_name, value):
value = arg_to_iter(value)
processed_value = self._process_input_value(field_name, value)
if processed_value:
self._values[field_name] += arg_to_iter(processed_value)
示例10: _get_xpathvalues
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def _get_xpathvalues(self, xpaths, **kw):
self._check_selector_method()
xpaths = arg_to_iter(xpaths)
return flatten(self.selector.xpath(xpath).getall() for xpath in xpaths)
示例11: _get_cssvalues
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def _get_cssvalues(self, csss, **kw):
self._check_selector_method()
csss = arg_to_iter(csss)
return flatten(self.selector.css(css).getall() for css in csss)
示例12: process_item
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def process_item(self, item, spider):
info = self.spiderinfo
requests = arg_to_iter(self.get_media_requests(item, info))
dlist = [self._process_request(r, info) for r in requests]
dfd = DeferredList(dlist, consumeErrors=1)
return dfd.addCallback(self.item_completed, item, info)
示例13: iterate_spider_output
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def iterate_spider_output(result):
return arg_to_iter(result)
示例14: __init__
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True, process_value=None,
deny_extensions=None):
tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
tag_func = lambda x: x in tags
attr_func = lambda x: x in attrs
lx = LxmlParserLinkExtractor(tag=tag_func, attr=attr_func,
unique=unique, process=process_value)
super(LxmlLinkExtractor, self).__init__(lx, allow, deny,
allow_domains, deny_domains, restrict_xpaths, canonicalize,
deny_extensions)
示例15: process_item
# 需要導入模塊: from scrapy.utils import misc [as 別名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 別名]
def process_item(self, item, spider):
"""
custom process_item func,so it will manage the Request result.
"""
info = self.spiderinfo
requests = arg_to_iter(self.get_media_requests(item, info))
dlist = [self._process_request(r, info) for r in requests]
dfd = DeferredList(dlist, consumeErrors=1)
return dfd.addCallback(self.item_completed, item, info)