本文整理汇总了Python中scrapy.utils.misc.load_object方法的典型用法代码示例。如果您正苦于以下问题:Python misc.load_object方法的具体用法?Python misc.load_object怎么用?Python misc.load_object使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.utils.misc
的用法示例。
在下文中一共展示了misc.load_object方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: open
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def open(self, spider):
self.spider = spider
try:
self.queue = load_object(self.queue_cls)(
server=self.server,
spider=spider,
key=self.queue_key % {'spider': spider.name},
serializer=self.serializer,
)
except TypeError as e:
raise ValueError("Failed to instantiate queue class '%s': %s",
self.queue_cls, e)
try:
self.df = load_object(self.dupefilter_cls)(
server=self.server,
key=self.dupefilter_key % {'spider': spider.name},
debug=spider.settings.getbool('DUPEFILTER_DEBUG'),
)
except TypeError as e:
raise ValueError("Failed to instantiate dupefilter class '%s': %s",
self.dupefilter_cls, e)
if self.flush_on_start:
self.flush()
if len(self.queue):
spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
示例2: get_schema_from
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def get_schema_from(source):
if is_schema_url(source):
schema = get_contents(source)
try:
return json.loads(schema)
except Exception as e:
logger.exception(
str(e) + "\nCould not parse schema from '{}'".format(source)
)
elif source.endswith(".json"):
with open(source, "r") as f:
try:
return json.load(f)
except Exception as e:
logger.exception(
str(e) + "\nCould not parse schema in '{}'".format(source)
)
else:
schema = load_object(source)
if isinstance(schema, six.string_types):
return json.loads(schema)
return schema
示例3: from_settings
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def from_settings(cls, settings, crawler=None):
mwlist = cls._get_mwlist_from_settings(settings)
middlewares = []
enabled = []
for clspath in mwlist:
try:
mwcls = load_object(clspath)
mw = create_instance(mwcls, settings, crawler)
middlewares.append(mw)
enabled.append(clspath)
except NotConfigured as e:
if e.args:
clsname = clspath.split('.')[-1]
logger.warning("Disabled %(clsname)s: %(eargs)s",
{'clsname': clsname, 'eargs': e.args[0]},
extra={'crawler': crawler})
logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
{'componentname': cls.component_name,
'enabledlist': pprint.pformat(enabled)},
extra={'crawler': crawler})
return cls(*middlewares)
示例4: __init__
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def __init__(self, settings):
self.settings = settings
self.urifmt = settings['FEED_URI']
if not self.urifmt:
raise NotConfigured
self.format = settings['FEED_FORMAT'].lower()
self.export_encoding = settings['FEED_EXPORT_ENCODING']
self.storages = self._load_components('FEED_STORAGES')
self.exporters = self._load_components('FEED_EXPORTERS')
if not self._storage_supported(self.urifmt):
raise NotConfigured
if not self._exporter_supported(self.format):
raise NotConfigured
self.store_empty = settings.getbool('FEED_STORE_EMPTY')
self._exporting = False
self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None
self.indent = None
if settings.get('FEED_EXPORT_INDENT') is not None:
self.indent = settings.getint('FEED_EXPORT_INDENT')
uripar = settings['FEED_URI_PARAMS']
self._uripar = load_object(uripar) if uripar else lambda x, y: None
示例5: from_crawler
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def from_crawler(cls, crawler):
settings = crawler.settings
dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
dupefilter = create_instance(dupefilter_cls, settings, crawler)
pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
if pqclass is PriorityQueue:
warnings.warn("SCHEDULER_PRIORITY_QUEUE='queuelib.PriorityQueue'"
" is no longer supported because of API changes; "
"please use 'scrapy.pqueues.ScrapyPriorityQueue'",
ScrapyDeprecationWarning)
from scrapy.pqueues import ScrapyPriorityQueue
pqclass = ScrapyPriorityQueue
dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS',
settings.getbool('SCHEDULER_DEBUG'))
return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
stats=crawler.stats, pqclass=pqclass, dqclass=dqclass,
mqclass=mqclass, crawler=crawler)
示例6: _load_handler
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def _load_handler(self, scheme, skip_lazy=False):
path = self._schemes[scheme]
try:
dhcls = load_object(path)
if skip_lazy and getattr(dhcls, 'lazy', True):
return None
dh = dhcls(self._crawler.settings)
except NotConfigured as ex:
self._notconfigured[scheme] = str(ex)
return None
except Exception as ex:
logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"',
{"clspath": path, "scheme": scheme},
exc_info=True, extra={'crawler': self._crawler})
self._notconfigured[scheme] = str(ex)
return None
else:
self._handlers[scheme] = dh
return dh
示例7: __init__
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def __init__(self, settings):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False
self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
self._fail_on_dataloss = settings.getbool('DOWNLOAD_FAIL_ON_DATALOSS')
self._disconnect_timeout = 1
示例8: _load_policy_class
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def _load_policy_class(policy, warning_only=False):
"""
Expect a string for the path to the policy class,
otherwise try to interpret the string as a standard value
from https://www.w3.org/TR/referrer-policy/#referrer-policies
"""
try:
return load_object(policy)
except ValueError:
try:
return _policy_classes[policy.lower()]
except KeyError:
msg = "Could not load referrer policy %r" % policy
if not warning_only:
raise RuntimeError(msg)
else:
warnings.warn(msg, RuntimeWarning)
return None
示例9: get_application
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def get_application(config=None):
"""Overide default get_application in Scrapy."""
if config is None:
config = Config()
# Override http_port by $PORT environment variable in Heroku.
# Override bind_address to 0.0.0.0 if $PORT exists
# Note that the http_port has to be a string intead of int.
config.cp['scrapyd'].update(
http_port=os.environ.get('PORT', config.get('http_port')),
bind_address='0.0.0.0' if os.environ.get('PORT') else config.get('bind_address')
)
apppath = config.get('application', 'scrapyd.app.application')
appfunc = load_object(apppath)
return appfunc(config)
# Create Twisted application.
示例10: request_from_dict
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def request_from_dict(d, spider=None):
def _get_method(obj, name):
name = str(name)
try:
return getattr(obj, name)
except AttributeError:
raise ValueError("Method %r not found in: %s" % (name, obj))
cb = d['callback']
if cb and spider:
cb = _get_method(spider, cb)
eb = d['errback']
if eb and spider:
eb = _get_method(spider, eb)
request_cls = load_object(d['_class']) if '_class' in d else Request
_cls = request_cls(
url=to_native_str(d['url']),
callback=cb,
errback=eb,
method=d['method'],
headers=d['headers'],
body=d['body'],
cookies=d['cookies'],
meta=d['meta'],
encoding=d['_encoding'],
priority=d['priority'],
dont_filter=d['dont_filter'],
flags=d.get('flags'))
_cls._plusmeta = d['_plusmeta']
return _cls
示例11: get_redis_from_settings
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def get_redis_from_settings(settings):
params = defaults.REDIS_PARAMS.copy()
params.update(settings.getdict('REDIS_PARAMS'))
for source, dest in SETTINGS_PARAMS_MAP.items():
val = settings.get(source)
if val:
params[dest] = val
if isinstance(params.get('redis_cls'), six.string_types):
params['redis_cls'] = load_object(params['redis_cls'])
return connection.get_redis(**params)
示例12: from_settings
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def from_settings(cls, settings):
params = {
'server': connection.from_settings(settings),
}
if settings.get('REDIS_ITEMS_KEY'):
params['key'] = settings['REDIS_ITEMS_KEY']
if settings.get('REDIS_ITEMS_SERIALIZER'):
params['serialize_func'] = load_object(
settings['REDIS_ITEMS_SERIALIZER']
)
return cls(**params)
示例13: _load_schematics_validator
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def _load_schematics_validator(cls, model_path):
model_class = load_object(model_path)
if not issubclass(model_class, Model):
raise NotConfigured(
"Invalid model, models must subclass schematics.models.Model"
)
return SchematicsValidator(model_class)
示例14: load_suite
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def load_suite(self, suite_to_load):
try:
suite_class = load_object(suite_to_load)
except Exception as e:
raise e # TO-DO
if not issubclass(suite_class, MonitorSuite):
raise Exception # TO-DO
return suite_class(crawler=self.crawler)
示例15: load_expression_suite
# 需要导入模块: from scrapy.utils import misc [as 别名]
# 或者: from scrapy.utils.misc import load_object [as 别名]
def load_expression_suite(self, suite_to_load, monitor_class=None):
if monitor_class:
monitor_class = load_object(monitor_class)
else:
monitor_class = ExpressionsMonitor
monitor = factory.create_monitor_class_from_dict(
monitor_dict=suite_to_load, monitor_class=monitor_class
)
suite = MonitorSuite(crawler=self.crawler)
suite.add_monitor(monitor)
return suite