当前位置: 首页>>代码示例>>Python>>正文


Python exceptions.NotConfigured方法代码示例

本文整理汇总了Python中scrapy.exceptions.NotConfigured方法的典型用法代码示例。如果您正苦于以下问题:Python exceptions.NotConfigured方法的具体用法?Python exceptions.NotConfigured怎么用?Python exceptions.NotConfigured使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.exceptions的用法示例。


在下文中一共展示了exceptions.NotConfigured方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def parse(self, response):
        if not hasattr(self, 'parse_node'):
            raise NotConfigured('You must define parse_node method in order to scrape this XML feed')

        response = self.adapt_response(response)
        if self.iterator == 'iternodes':
            nodes = self._iternodes(response)
        elif self.iterator == 'xml':
            selector = Selector(response, type='xml')
            self._register_namespaces(selector)
            nodes = selector.xpath('//%s' % self.itertag)
        elif self.iterator == 'html':
            selector = Selector(response, type='html')
            self._register_namespaces(selector)
            nodes = selector.xpath('//%s' % self.itertag)
        else:
            raise NotSupported('Unsupported node iterator')

        return self.parse_nodes(response, nodes) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:21,代码来源:feed.py

示例2: from_settings

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_settings(cls, settings, crawler=None):
        mwlist = cls._get_mwlist_from_settings(settings)
        middlewares = []
        enabled = []
        for clspath in mwlist:
            try:
                mwcls = load_object(clspath)
                mw = create_instance(mwcls, settings, crawler)
                middlewares.append(mw)
                enabled.append(clspath)
            except NotConfigured as e:
                if e.args:
                    clsname = clspath.split('.')[-1]
                    logger.warning("Disabled %(clsname)s: %(eargs)s",
                                   {'clsname': clsname, 'eargs': e.args[0]},
                                   extra={'crawler': crawler})

        logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
                    {'componentname': cls.component_name,
                     'enabledlist': pprint.pformat(enabled)},
                    extra={'crawler': crawler})
        return cls(*middlewares) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:24,代码来源:middleware.py

示例3: __init__

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, crawler):
        if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
            raise NotConfigured
        if not TWISTED_CONCH_AVAILABLE:
            raise NotConfigured(
                'TELNETCONSOLE_ENABLED setting is True but required twisted '
                'modules failed to import:\n' + _TWISTED_CONCH_TRACEBACK)
        self.crawler = crawler
        self.noisy = False
        self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
        self.host = crawler.settings['TELNETCONSOLE_HOST']
        self.username = crawler.settings['TELNETCONSOLE_USERNAME']
        self.password = crawler.settings['TELNETCONSOLE_PASSWORD']

        if not self.password:
            self.password = binascii.hexlify(os.urandom(8)).decode('utf8')
            logger.info('Telnet Password: %s', self.password)

        self.crawler.signals.connect(self.start_listening, signals.engine_started)
        self.crawler.signals.connect(self.stop_listening, signals.engine_stopped) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:22,代码来源:telnet.py

示例4: __init__

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, settings):
        self.settings = settings
        self.urifmt = settings['FEED_URI']
        if not self.urifmt:
            raise NotConfigured
        self.format = settings['FEED_FORMAT'].lower()
        self.export_encoding = settings['FEED_EXPORT_ENCODING']
        self.storages = self._load_components('FEED_STORAGES')
        self.exporters = self._load_components('FEED_EXPORTERS')
        if not self._storage_supported(self.urifmt):
            raise NotConfigured
        if not self._exporter_supported(self.format):
            raise NotConfigured
        self.store_empty = settings.getbool('FEED_STORE_EMPTY')
        self._exporting = False
        self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
        self.indent = None
        if settings.get('FEED_EXPORT_INDENT') is not None:
            self.indent = settings.getint('FEED_EXPORT_INDENT')
        uripar = settings['FEED_URI_PARAMS']
        self._uripar = load_object(uripar) if uripar else lambda x, y: None 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:23,代码来源:feedexport.py

示例5: _load_handler

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_handler(self, scheme, skip_lazy=False):
        path = self._schemes[scheme]
        try:
            dhcls = load_object(path)
            if skip_lazy and getattr(dhcls, 'lazy', True):
                return None
            dh = dhcls(self._crawler.settings)
        except NotConfigured as ex:
            self._notconfigured[scheme] = str(ex)
            return None
        except Exception as ex:
            logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"',
                         {"clspath": path, "scheme": scheme},
                         exc_info=True, extra={'crawler': self._crawler})
            self._notconfigured[scheme] = str(ex)
            return None
        else:
            self._handlers[scheme] = dh
            return dh 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:21,代码来源:__init__.py

示例6: from_crawler

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_crawler(cls, crawler):
        s = crawler.settings
        proxy_path = s.get('ROTATING_PROXY_LIST_PATH', None)
        if proxy_path is not None:
            with codecs.open(proxy_path, 'r', encoding='utf8') as f:
                proxy_list = [line.strip() for line in f if line.strip()]
        else:
            proxy_list = s.getlist('ROTATING_PROXY_LIST')
        if not proxy_list:
            raise NotConfigured()
        mw = cls(
            proxy_list=proxy_list,
            logstats_interval=s.getfloat('ROTATING_PROXY_LOGSTATS_INTERVAL', 30),
            stop_if_no_proxies=s.getbool('ROTATING_PROXY_CLOSE_SPIDER', False),
            max_proxies_to_try=s.getint('ROTATING_PROXY_PAGE_RETRY_TIMES', 5),
            backoff_base=s.getfloat('ROTATING_PROXY_BACKOFF_BASE', 300),
            backoff_cap=s.getfloat('ROTATING_PROXY_BACKOFF_CAP', 3600),
            crawler=crawler,
        )
        crawler.signals.connect(mw.engine_started,
                                signal=signals.engine_started)
        crawler.signals.connect(mw.engine_stopped,
                                signal=signals.engine_stopped)
        return mw 
开发者ID:TeamHG-Memex,项目名称:scrapy-rotating-proxies,代码行数:26,代码来源:middlewares.py

示例7: _load_jsonschema_validator

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_jsonschema_validator(cls, schema):
        if isinstance(schema, six.string_types):
            schema = get_schema_from(schema)
        if not isinstance(schema, dict):
            raise NotConfigured(
                "Invalid schema, jsonschemas must be defined as:\n"
                "- a python dict.\n"
                "- an object path to a python dict.\n"
                "- an object path to a JSON string.\n"
                "- a path to a JSON file."
            )
        return JSONSchemaValidator(schema) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:14,代码来源:pipelines.py

示例8: _load_schematics_validator

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_schematics_validator(cls, model_path):
        model_class = load_object(model_path)
        if not issubclass(model_class, Model):
            raise NotConfigured(
                "Invalid model, models must subclass schematics.models.Model"
            )
        return SchematicsValidator(model_class) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:9,代码来源:pipelines.py

示例9: skip_if_no_boto

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def skip_if_no_boto():
    try:
        is_botocore()
    except NotConfigured as e:
        raise SkipTest(e) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:7,代码来源:test.py

示例10: project_data_dir

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def project_data_dir(project='default'):
    """Return the current project data dir, creating it if it doesn't exist"""
    if not inside_project():
        raise NotConfigured("Not inside a project")
    cfg = get_config()
    if cfg.has_option(DATADIR_CFG_SECTION, project):
        d = cfg.get(DATADIR_CFG_SECTION, project)
    else:
        scrapy_cfg = closest_scrapy_cfg()
        if not scrapy_cfg:
            raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
        d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
    if not exists(d):
        os.makedirs(d)
    return d 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:17,代码来源:project.py

示例11: _load_components

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_components(self, setting_prefix):
        conf = without_none_values(self.settings.getwithbase(setting_prefix))
        d = {}
        for k, v in conf.items():
            try:
                d[k] = load_object(v)
            except NotConfigured:
                pass
        return d 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:11,代码来源:feedexport.py

示例12: _storage_supported

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _storage_supported(self, uri):
        scheme = urlparse(uri).scheme
        if scheme in self.storages:
            try:
                self._get_storage(uri)
                return True
            except NotConfigured as e:
                logger.error("Disabled feed storage scheme: %(scheme)s. "
                             "Reason: %(reason)s",
                             {'scheme': scheme, 'reason': str(e)})
        else:
            logger.error("Unknown feed storage scheme: %(scheme)s",
                         {'scheme': scheme}) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:15,代码来源:feedexport.py

示例13: __init__

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, crawler):
        settings = crawler.settings
        spider = crawler.spider

        if not any(
            self.__class__.__name__ in s
            for s in settings.getwithbase('SPIDER_MIDDLEWARES').keys()
        ):
            raise ValueError(
                '%s must be in SPIDER_MIDDLEWARES' % (
                    self.__class__.__name__,))
        if not settings.getbool('AUTOUNIT_ENABLED'):
            raise NotConfigured('scrapy-autounit is not enabled')
        if settings.getint('CONCURRENT_REQUESTS') > 1:
            logger.warn(
                'Recording with concurrency > 1! '
                'Data races in shared object modification may create broken '
                'tests.'
            )

        self.max_fixtures = settings.getint(
            'AUTOUNIT_MAX_FIXTURES_PER_CALLBACK',
            default=10
        )
        self.max_fixtures = \
            self.max_fixtures if self.max_fixtures >= 10 else 10

        self.base_path = settings.get(
            'AUTOUNIT_BASE_PATH',
            default=os.path.join(get_project_dir(), 'autounit')
        )
        create_dir(self.base_path, exist_ok=True)
        clear_fixtures(self.base_path, sanitize_module_name(spider.name))

        self.fixture_counters = {} 
开发者ID:scrapinghub,项目名称:scrapy-autounit,代码行数:37,代码来源:middleware.py

示例14: from_crawler

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_crawler(cls, crawler):
        s = crawler.settings
        if not s.getbool('CRAWL_ONCE_ENABLED', True):
            raise NotConfigured()
        path = data_path(s.get('CRAWL_ONCE_PATH', 'crawl_once'),
                         createdir=True)
        default = s.getbool('CRAWL_ONCE_DEFAULT', default=False)
        o = cls(path, crawler.stats, default)
        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
        return o 
开发者ID:TeamHG-Memex,项目名称:scrapy-crawl-once,代码行数:13,代码来源:middlewares.py

示例15: test_not_configured

# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def test_not_configured():
    crawler = get_crawler(settings_dict={'CRAWL_ONCE_ENABLED': False})
    with pytest.raises(NotConfigured):
        CrawlOnceMiddleware.from_crawler(crawler) 
开发者ID:TeamHG-Memex,项目名称:scrapy-crawl-once,代码行数:6,代码来源:test_middleware.py


注:本文中的scrapy.exceptions.NotConfigured方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。