当前位置: 首页>>代码示例>>Python>>正文


Python misc.arg_to_iter方法代码示例

本文整理汇总了Python中scrapy.utils.misc.arg_to_iter方法的典型用法代码示例。如果您正苦于以下问题:Python misc.arg_to_iter方法的具体用法?Python misc.arg_to_iter怎么用?Python misc.arg_to_iter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.utils.misc的用法示例。


在下文中一共展示了misc.arg_to_iter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_value

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def get_value(self, value, *processors, **kw):
        regex = kw.get('re', None)
        if regex:
            value = arg_to_iter(value)
            value = flatten(extract_regex(regex, x) for x in value)

        for proc in processors:
            if value is None:
                break
            _proc = proc
            proc = wrap_loader_context(proc, self.context)
            try:
                value = proc(value)
            except Exception as e:
                raise ValueError("Error with processor %s value=%r error='%s: %s'" %
                                 (_proc.__class__.__name__, value,
                                  type(e).__name__, str(e)))
        return value 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:20,代码来源:__init__.py

示例2: __call__

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def __call__(self, value, loader_context=None):
        values = arg_to_iter(value)
        if loader_context:
            context = ChainMap(loader_context, self.default_loader_context)
        else:
            context = self.default_loader_context
        wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
        for func in wrapped_funcs:
            next_values = []
            for v in values:
                try:
                    next_values += arg_to_iter(func(v))
                except Exception as e:
                    raise ValueError("Error in MapCompose with "
                                     "%s value=%r error='%s: %s'" %
                                     (str(func), value, type(e).__name__,
                                      str(e)))
            values = next_values
        return values 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:21,代码来源:processors.py

示例3: __init__

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
                 restrict_xpaths, canonicalize, deny_extensions, restrict_css, restrict_text):

        self.link_extractor = link_extractor

        self.allow_res = [x if isinstance(x, _re_type) else re.compile(x)
                          for x in arg_to_iter(allow)]
        self.deny_res = [x if isinstance(x, _re_type) else re.compile(x)
                         for x in arg_to_iter(deny)]

        self.allow_domains = set(arg_to_iter(allow_domains))
        self.deny_domains = set(arg_to_iter(deny_domains))

        self.restrict_xpaths = tuple(arg_to_iter(restrict_xpaths))
        self.restrict_xpaths += tuple(map(self._csstranslator.css_to_xpath,
                                          arg_to_iter(restrict_css)))

        self.canonicalize = canonicalize
        if deny_extensions is None:
            deny_extensions = IGNORED_EXTENSIONS
        self.deny_extensions = {'.' + e for e in arg_to_iter(deny_extensions)}
        self.restrict_text = [x if isinstance(x, _re_type) else re.compile(x)
                              for x in arg_to_iter(restrict_text)] 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:25,代码来源:__init__.py

示例4: __init__

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
                 tags=('a', 'area'), attrs=('href',), canonicalize=False,
                 unique=True, process_value=None, deny_extensions=None, restrict_css=(),
                 strip=True, restrict_text=None):
        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
        tag_func = lambda x: x in tags
        attr_func = lambda x: x in attrs
        lx = LxmlParserLinkExtractor(
            tag=tag_func,
            attr=attr_func,
            unique=unique,
            process=process_value,
            strip=strip,
            canonicalized=canonicalize
        )

        super(LxmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
                                                allow_domains=allow_domains, deny_domains=deny_domains,
                                                restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
                                                canonicalize=canonicalize, deny_extensions=deny_extensions,
                                                restrict_text=restrict_text) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:23,代码来源:lxmlhtml.py

示例5: __init__

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
                 tags=('a', 'area'), attrs=('href',), canonicalize=False, unique=True,
                 process_value=None, deny_extensions=None, restrict_css=(),
                 strip=True, restrict_text=()):
        warnings.warn(
            "SgmlLinkExtractor is deprecated and will be removed in future releases. "
            "Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning, stacklevel=2,
        )

        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
        tag_func = lambda x: x in tags
        attr_func = lambda x: x in attrs

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
            lx = BaseSgmlLinkExtractor(tag=tag_func, attr=attr_func,
                                       unique=unique, process_value=process_value, strip=strip,
                                       canonicalized=canonicalize)

        super(SgmlLinkExtractor, self).__init__(lx, allow=allow, deny=deny,
                                                allow_domains=allow_domains, deny_domains=deny_domains,
                                                restrict_xpaths=restrict_xpaths, restrict_css=restrict_css,
                                                canonicalize=canonicalize, deny_extensions=deny_extensions,
                                                restrict_text=restrict_text) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:27,代码来源:sgml.py

示例6: another_process_item

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def another_process_item(self, result, item, info):
        """
            custom process_item func,so it will manage the Request result.
        """

        assert isinstance(result, (Item, Request)), \
            "WoaiduBookFile pipeline' item_completed must return Item or Request, got %s" % \
            (type(result))
        if isinstance(result, Item):
            return result
        elif isinstance(result, Request):
            dlist = [self._process_request(r, info) for r in arg_to_iter(result)]
            dfd = DeferredList(dlist, consumeErrors=1)
            dfd.addCallback(self.item_completed, item, info)
            # XXX:This will cause one item maybe return many times,it depends on how many
            # times the download url failed.But it doesn't matter.Because when raise errors,
            # the items are no longer processed by further pipeline components.And when all
            # url download failed we can drop that item which book_file or book_file_url are
            # empty.
            return dfd.addCallback(self.another_process_item, item, info)
        else:
            raise NofilesDrop 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:24,代码来源:file.py

示例7: process_item

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def process_item(self, item, spider):
        if 'meta' not in spider.name:
            return item
        info = self.spiderinfo
        requests = arg_to_iter(self.get_media_requests(item, info))
        dlist = [self._process_request(r, info) for r in requests]
        dfd = DeferredList(dlist, consumeErrors=1)
        return dfd.addCallback(self.item_completed, item, info) 
开发者ID:csuldw,项目名称:AntSpider,代码行数:10,代码来源:pipelines.py

示例8: process_item

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def process_item(self, item, spider):
        info = self.spiderinfo
        requests = arg_to_iter(self.get_media_requests(item, info))
        dlist = [self._process_request(r, info, item, spider) for r in requests]
        dfd = DeferredList(dlist, consumeErrors=1)
        return dfd.addCallback(self.item_completed, item, info) 
开发者ID:xgfone,项目名称:snippet,代码行数:8,代码来源:pipelines.py

示例9: _add_value

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def _add_value(self, field_name, value):
        value = arg_to_iter(value)
        processed_value = self._process_input_value(field_name, value)
        if processed_value:
            self._values[field_name] += arg_to_iter(processed_value) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:7,代码来源:__init__.py

示例10: _get_xpathvalues

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def _get_xpathvalues(self, xpaths, **kw):
        self._check_selector_method()
        xpaths = arg_to_iter(xpaths)
        return flatten(self.selector.xpath(xpath).getall() for xpath in xpaths) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:6,代码来源:__init__.py

示例11: _get_cssvalues

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def _get_cssvalues(self, csss, **kw):
        self._check_selector_method()
        csss = arg_to_iter(csss)
        return flatten(self.selector.css(css).getall() for css in csss) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:6,代码来源:__init__.py

示例12: process_item

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def process_item(self, item, spider):
        info = self.spiderinfo
        requests = arg_to_iter(self.get_media_requests(item, info))
        dlist = [self._process_request(r, info) for r in requests]
        dfd = DeferredList(dlist, consumeErrors=1)
        return dfd.addCallback(self.item_completed, item, info) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:8,代码来源:media.py

示例13: iterate_spider_output

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def iterate_spider_output(result):
    return arg_to_iter(result) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:4,代码来源:spider.py

示例14: __init__

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
                 tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True, process_value=None,
                 deny_extensions=None):
        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
        tag_func = lambda x: x in tags
        attr_func = lambda x: x in attrs
        lx = LxmlParserLinkExtractor(tag=tag_func, attr=attr_func,
                                     unique=unique, process=process_value)

        super(LxmlLinkExtractor, self).__init__(lx, allow, deny,
                                                allow_domains, deny_domains, restrict_xpaths, canonicalize,
                                                deny_extensions) 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:14,代码来源:lxmlhtml.py

示例15: process_item

# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import arg_to_iter [as 别名]
def process_item(self, item, spider):
        """
            custom process_item func,so it will manage the Request result.
        """
        info = self.spiderinfo
        requests = arg_to_iter(self.get_media_requests(item, info))
        dlist = [self._process_request(r, info) for r in requests]
        dfd = DeferredList(dlist, consumeErrors=1)
        return dfd.addCallback(self.item_completed, item, info) 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:11,代码来源:file.py


注:本文中的scrapy.utils.misc.arg_to_iter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。