本文整理汇总了Python中scrapy.settings.Settings类的典型用法代码示例。如果您正苦于以下问题:Python Settings类的具体用法?Python Settings怎么用?Python Settings使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Settings类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_from_settings_constructs_middleware_with_the_specified_settings
def test_from_settings_constructs_middleware_with_the_specified_settings():
settings = Settings()
settings.set('HTML_STORAGE', {'test': 'settings'})
downloader = HtmlStorageMiddleware.from_settings(settings)
assert_that(downloader.settings, is_({'test': 'settings'}))
示例2: get_fetch
def get_fetch(log=False):
settings = Settings()
settings.set('LOG_ENABLED', log)
crawler_process = CrawlerProcess(settings)
crawler = crawler_process.create_crawler()
crawler_process.start_crawling()
t = Thread(target=crawler_process.start_reactor)
t.daemon = True
t.start()
shell = Shell(crawler)
shell.code = 'adsf'
import threading
lock = threading.Lock()
def fetch(url_or_request):
lock.acquire()
try:
shell.fetch(url_or_request)
response = shell.vars.get('response')
return response
finally:
lock.release()
return fetch
示例3: __init__
def __init__(self, store_uri, download_func=None, settings=None):
if not store_uri:
raise NotConfigured
if isinstance(settings, dict) or settings is None:
settings = Settings(settings)
cls_name = "FilesPipeline"
self.store = self._get_store(store_uri)
resolve = functools.partial(self._key_for_pipe,
base_class_name=cls_name,
settings=settings)
self.expires = settings.getint(
resolve('FILES_EXPIRES'), self.EXPIRES
)
if not hasattr(self, "FILES_URLS_FIELD"):
self.FILES_URLS_FIELD = self.DEFAULT_FILES_URLS_FIELD
if not hasattr(self, "FILES_RESULT_FIELD"):
self.FILES_RESULT_FIELD = self.DEFAULT_FILES_RESULT_FIELD
self.files_urls_field = settings.get(
resolve('FILES_URLS_FIELD'), self.FILES_URLS_FIELD
)
self.files_result_field = settings.get(
resolve('FILES_RESULT_FIELD'), self.FILES_RESULT_FIELD
)
super(FilesPipeline, self).__init__(download_func=download_func)
示例4: __init__
def __init__(self, spidercls, settings=None):
if isinstance(settings, dict) or settings is None:
settings = Settings(settings)
self.spidercls = spidercls
self.settings = settings.copy()
self.spidercls.update_settings(self.settings)
self.signals = SignalManager(self)
self.stats = load_object(self.settings['STATS_CLASS'])(self)
handler = LogCounterHandler(self, level=settings.get('LOG_LEVEL'))
logging.root.addHandler(handler)
# lambda is assigned to Crawler attribute because this way it is not
# garbage collected after leaving __init__ scope
self.__remove_handler = lambda: logging.root.removeHandler(handler)
self.signals.connect(self.__remove_handler, signals.engine_stopped)
lf_cls = load_object(self.settings['LOG_FORMATTER'])
self.logformatter = lf_cls.from_crawler(self)
self.extensions = ExtensionManager.from_crawler(self)
self.settings.freeze()
self.crawling = False
self.spider = None
self.engine = None
示例5: create_root
def create_root(config):
from scrapy import log
from scrapy.settings import Settings
from slyd.crawlerspec import (CrawlerSpecManager,
create_crawler_spec_resource)
from slyd.bot import create_bot_resource
import slyd.settings
from slyd.projects import ProjectsResource
root = Resource()
root.putChild("static", File(config['docroot']))
crawler_settings = Settings()
crawler_settings.setmodule(slyd.settings)
spec_manager = CrawlerSpecManager(crawler_settings)
# add project management at /projects
projects = ProjectsResource(crawler_settings)
root.putChild('projects', projects)
# add crawler at /projects/PROJECT_ID/bot
log.msg("Slybot specs loading from %s/[PROJECT]" % spec_manager.basedir,
level=log.DEBUG)
projects.putChild("bot", create_bot_resource(spec_manager))
# add spec at /projects/PROJECT_ID/spec
spec = create_crawler_spec_resource(spec_manager)
projects.putChild("spec", spec)
return root
示例6: create_spec_manager
def create_spec_manager(projects_dir=None):
"""Create a CrawlerSpecManager configured to use test settings"""
crawler_settings = ScrapySettings()
crawler_settings.setmodule(test_settings)
projects_dir = projects_dir or test_settings.SPEC_DATA_DIR
test_settings.SPEC_FACTORY['PARAMS']['location'] = projects_dir
return SpecManager(crawler_settings)
示例7: make_downloader
def make_downloader(save_html_on_codes=[]):
settings = Settings()
settings.set('HTML_STORAGE', {
'gzip_output': True,
'save_html_on_codes': save_html_on_codes
})
return HtmlStorageMiddleware(settings)
示例8: FilesPipelineTestCaseCustomSettings
class FilesPipelineTestCaseCustomSettings(unittest.TestCase):
def setUp(self):
self.tempdir = mkdtemp()
self.pipeline = FilesPipeline(self.tempdir)
self.default_settings = Settings()
def tearDown(self):
rmtree(self.tempdir)
def test_expires(self):
another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
'FILES_EXPIRES': 42}))
self.assertEqual(self.pipeline.expires, self.default_settings.getint('FILES_EXPIRES'))
self.assertEqual(another_pipeline.expires, 42)
def test_files_urls_field(self):
another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
'FILES_URLS_FIELD': 'funny_field'}))
self.assertEqual(self.pipeline.files_urls_field, self.default_settings.get('FILES_URLS_FIELD'))
self.assertEqual(another_pipeline.files_urls_field, 'funny_field')
def test_files_result_field(self):
another_pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir,
'FILES_RESULT_FIELD': 'funny_field'}))
self.assertEqual(self.pipeline.files_result_field, self.default_settings.get('FILES_RESULT_FIELD'))
self.assertEqual(another_pipeline.files_result_field, 'funny_field')
示例9: test_getdict_autodegrade_basesettings
def test_getdict_autodegrade_basesettings(self):
settings = Settings()
mydict = settings.getdict('TEST_DICT')
self.assertIsInstance(mydict, dict)
self.assertEqual(len(mydict), 1)
self.assertIn('key', mydict)
self.assertEqual(mydict['key'], 'val')
示例10: get_project_settings
def get_project_settings():
scrapy_module = "uris.urispider.settings"
settings = Settings()
settings.setmodule(scrapy_module)
return settings
示例11: __init__
def __init__(self, spidercls, settings):
if isinstance(settings, dict):
settings = Settings(settings)
self.spidercls = spidercls
self.settings = settings.copy()
self.signals = SignalManager(self)
self.stats = load_object(self.settings['STATS_CLASS'])(self)
handler = LogCounterHandler(self, level=settings.get('LOG_LEVEL'))
logging.root.addHandler(handler)
self.signals.connect(lambda: logging.root.removeHandler(handler),
signals.engine_stopped)
lf_cls = load_object(self.settings['LOG_FORMATTER'])
self.logformatter = lf_cls.from_crawler(self)
self.extensions = ExtensionManager.from_crawler(self)
self.spidercls.update_settings(self.settings)
self.settings.freeze()
self.crawling = False
self.spider = None
self.engine = None
示例12: test_autopromote_dicts
def test_autopromote_dicts(self):
settings = Settings()
mydict = settings.get('TEST_DICT')
self.assertIsInstance(mydict, BaseSettings)
self.assertIn('key', mydict)
self.assertEqual(mydict['key'], 'val')
self.assertEqual(mydict.getpriority('key'), 0)
示例13: __init__
def __init__(self, spider):
Process.__init__(self)
setting = Settings()
setting.setmodule(s)
self.crawler = Crawler(setting)
self.crawler.configure()
self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
self.spider = spider
示例14: _crawl
def _crawl(self):
settings = Settings()
settings.set('ITEM_PIPELINES', {
'app.pipelines.JsonWriterPipeline': 300
})
self.process = CrawlerProcess(settings)
self.process.crawl(self, self.name, self.start_urls)
self.process.start()
示例15: test_contructor_sets_default_settings_values_when_no_settings_are_specified
def test_contructor_sets_default_settings_values_when_no_settings_are_specified(
setting_name, expected):
settings = Settings()
settings.set('HTML_STORAGE', {})
downloader = HtmlStorageMiddleware(settings)
assert_that(downloader.__dict__[setting_name], is_(expected))