本文整理汇总了Python中scrapy.exceptions.NotConfigured方法的典型用法代码示例。如果您正苦于以下问题:Python exceptions.NotConfigured方法的具体用法?Python exceptions.NotConfigured怎么用?Python exceptions.NotConfigured使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.exceptions
的用法示例。
在下文中一共展示了exceptions.NotConfigured方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def parse(self, response):
if not hasattr(self, 'parse_node'):
raise NotConfigured('You must define parse_node method in order to scrape this XML feed')
response = self.adapt_response(response)
if self.iterator == 'iternodes':
nodes = self._iternodes(response)
elif self.iterator == 'xml':
selector = Selector(response, type='xml')
self._register_namespaces(selector)
nodes = selector.xpath('//%s' % self.itertag)
elif self.iterator == 'html':
selector = Selector(response, type='html')
self._register_namespaces(selector)
nodes = selector.xpath('//%s' % self.itertag)
else:
raise NotSupported('Unsupported node iterator')
return self.parse_nodes(response, nodes)
示例2: from_settings
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_settings(cls, settings, crawler=None):
mwlist = cls._get_mwlist_from_settings(settings)
middlewares = []
enabled = []
for clspath in mwlist:
try:
mwcls = load_object(clspath)
mw = create_instance(mwcls, settings, crawler)
middlewares.append(mw)
enabled.append(clspath)
except NotConfigured as e:
if e.args:
clsname = clspath.split('.')[-1]
logger.warning("Disabled %(clsname)s: %(eargs)s",
{'clsname': clsname, 'eargs': e.args[0]},
extra={'crawler': crawler})
logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
{'componentname': cls.component_name,
'enabledlist': pprint.pformat(enabled)},
extra={'crawler': crawler})
return cls(*middlewares)
示例3: __init__
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, crawler):
if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
raise NotConfigured
if not TWISTED_CONCH_AVAILABLE:
raise NotConfigured(
'TELNETCONSOLE_ENABLED setting is True but required twisted '
'modules failed to import:\n' + _TWISTED_CONCH_TRACEBACK)
self.crawler = crawler
self.noisy = False
self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
self.host = crawler.settings['TELNETCONSOLE_HOST']
self.username = crawler.settings['TELNETCONSOLE_USERNAME']
self.password = crawler.settings['TELNETCONSOLE_PASSWORD']
if not self.password:
self.password = binascii.hexlify(os.urandom(8)).decode('utf8')
logger.info('Telnet Password: %s', self.password)
self.crawler.signals.connect(self.start_listening, signals.engine_started)
self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
示例4: __init__
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, settings):
self.settings = settings
self.urifmt = settings['FEED_URI']
if not self.urifmt:
raise NotConfigured
self.format = settings['FEED_FORMAT'].lower()
self.export_encoding = settings['FEED_EXPORT_ENCODING']
self.storages = self._load_components('FEED_STORAGES')
self.exporters = self._load_components('FEED_EXPORTERS')
if not self._storage_supported(self.urifmt):
raise NotConfigured
if not self._exporter_supported(self.format):
raise NotConfigured
self.store_empty = settings.getbool('FEED_STORE_EMPTY')
self._exporting = False
self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
self.indent = None
if settings.get('FEED_EXPORT_INDENT') is not None:
self.indent = settings.getint('FEED_EXPORT_INDENT')
uripar = settings['FEED_URI_PARAMS']
self._uripar = load_object(uripar) if uripar else lambda x, y: None
示例5: _load_handler
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_handler(self, scheme, skip_lazy=False):
path = self._schemes[scheme]
try:
dhcls = load_object(path)
if skip_lazy and getattr(dhcls, 'lazy', True):
return None
dh = dhcls(self._crawler.settings)
except NotConfigured as ex:
self._notconfigured[scheme] = str(ex)
return None
except Exception as ex:
logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"',
{"clspath": path, "scheme": scheme},
exc_info=True, extra={'crawler': self._crawler})
self._notconfigured[scheme] = str(ex)
return None
else:
self._handlers[scheme] = dh
return dh
示例6: from_crawler
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_crawler(cls, crawler):
s = crawler.settings
proxy_path = s.get('ROTATING_PROXY_LIST_PATH', None)
if proxy_path is not None:
with codecs.open(proxy_path, 'r', encoding='utf8') as f:
proxy_list = [line.strip() for line in f if line.strip()]
else:
proxy_list = s.getlist('ROTATING_PROXY_LIST')
if not proxy_list:
raise NotConfigured()
mw = cls(
proxy_list=proxy_list,
logstats_interval=s.getfloat('ROTATING_PROXY_LOGSTATS_INTERVAL', 30),
stop_if_no_proxies=s.getbool('ROTATING_PROXY_CLOSE_SPIDER', False),
max_proxies_to_try=s.getint('ROTATING_PROXY_PAGE_RETRY_TIMES', 5),
backoff_base=s.getfloat('ROTATING_PROXY_BACKOFF_BASE', 300),
backoff_cap=s.getfloat('ROTATING_PROXY_BACKOFF_CAP', 3600),
crawler=crawler,
)
crawler.signals.connect(mw.engine_started,
signal=signals.engine_started)
crawler.signals.connect(mw.engine_stopped,
signal=signals.engine_stopped)
return mw
示例7: _load_jsonschema_validator
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_jsonschema_validator(cls, schema):
if isinstance(schema, six.string_types):
schema = get_schema_from(schema)
if not isinstance(schema, dict):
raise NotConfigured(
"Invalid schema, jsonschemas must be defined as:\n"
"- a python dict.\n"
"- an object path to a python dict.\n"
"- an object path to a JSON string.\n"
"- a path to a JSON file."
)
return JSONSchemaValidator(schema)
示例8: _load_schematics_validator
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_schematics_validator(cls, model_path):
model_class = load_object(model_path)
if not issubclass(model_class, Model):
raise NotConfigured(
"Invalid model, models must subclass schematics.models.Model"
)
return SchematicsValidator(model_class)
示例9: skip_if_no_boto
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def skip_if_no_boto():
try:
is_botocore()
except NotConfigured as e:
raise SkipTest(e)
示例10: project_data_dir
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def project_data_dir(project='default'):
"""Return the current project data dir, creating it if it doesn't exist"""
if not inside_project():
raise NotConfigured("Not inside a project")
cfg = get_config()
if cfg.has_option(DATADIR_CFG_SECTION, project):
d = cfg.get(DATADIR_CFG_SECTION, project)
else:
scrapy_cfg = closest_scrapy_cfg()
if not scrapy_cfg:
raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
if not exists(d):
os.makedirs(d)
return d
示例11: _load_components
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _load_components(self, setting_prefix):
conf = without_none_values(self.settings.getwithbase(setting_prefix))
d = {}
for k, v in conf.items():
try:
d[k] = load_object(v)
except NotConfigured:
pass
return d
示例12: _storage_supported
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def _storage_supported(self, uri):
scheme = urlparse(uri).scheme
if scheme in self.storages:
try:
self._get_storage(uri)
return True
except NotConfigured as e:
logger.error("Disabled feed storage scheme: %(scheme)s. "
"Reason: %(reason)s",
{'scheme': scheme, 'reason': str(e)})
else:
logger.error("Unknown feed storage scheme: %(scheme)s",
{'scheme': scheme})
示例13: __init__
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def __init__(self, crawler):
settings = crawler.settings
spider = crawler.spider
if not any(
self.__class__.__name__ in s
for s in settings.getwithbase('SPIDER_MIDDLEWARES').keys()
):
raise ValueError(
'%s must be in SPIDER_MIDDLEWARES' % (
self.__class__.__name__,))
if not settings.getbool('AUTOUNIT_ENABLED'):
raise NotConfigured('scrapy-autounit is not enabled')
if settings.getint('CONCURRENT_REQUESTS') > 1:
logger.warn(
'Recording with concurrency > 1! '
'Data races in shared object modification may create broken '
'tests.'
)
self.max_fixtures = settings.getint(
'AUTOUNIT_MAX_FIXTURES_PER_CALLBACK',
default=10
)
self.max_fixtures = \
self.max_fixtures if self.max_fixtures >= 10 else 10
self.base_path = settings.get(
'AUTOUNIT_BASE_PATH',
default=os.path.join(get_project_dir(), 'autounit')
)
create_dir(self.base_path, exist_ok=True)
clear_fixtures(self.base_path, sanitize_module_name(spider.name))
self.fixture_counters = {}
示例14: from_crawler
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def from_crawler(cls, crawler):
s = crawler.settings
if not s.getbool('CRAWL_ONCE_ENABLED', True):
raise NotConfigured()
path = data_path(s.get('CRAWL_ONCE_PATH', 'crawl_once'),
createdir=True)
default = s.getbool('CRAWL_ONCE_DEFAULT', default=False)
o = cls(path, crawler.stats, default)
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
return o
示例15: test_not_configured
# 需要导入模块: from scrapy import exceptions [as 别名]
# 或者: from scrapy.exceptions import NotConfigured [as 别名]
def test_not_configured():
crawler = get_crawler(settings_dict={'CRAWL_ONCE_ENABLED': False})
with pytest.raises(NotConfigured):
CrawlOnceMiddleware.from_crawler(crawler)