本文整理汇总了Python中scrapy.settings.Settings.set方法的典型用法代码示例。如果您正苦于以下问题:Python Settings.set方法的具体用法?Python Settings.set怎么用?Python Settings.set使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.settings.Settings
的用法示例。
在下文中一共展示了Settings.set方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_from_settings_constructs_middleware_with_the_specified_settings
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_from_settings_constructs_middleware_with_the_specified_settings():
settings = Settings()
settings.set('HTML_STORAGE', {'test': 'settings'})
downloader = HtmlStorageMiddleware.from_settings(settings)
assert_that(downloader.settings, is_({'test': 'settings'}))
示例2: make_downloader
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def make_downloader(save_html_on_codes=[]):
settings = Settings()
settings.set('HTML_STORAGE', {
'gzip_output': True,
'save_html_on_codes': save_html_on_codes
})
return HtmlStorageMiddleware(settings)
示例3: get_fetch
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_fetch(log=False):
settings = Settings()
settings.set('LOG_ENABLED', log)
crawler_process = CrawlerProcess(settings)
crawler = crawler_process.create_crawler()
crawler_process.start_crawling()
t = Thread(target=crawler_process.start_reactor)
t.daemon = True
t.start()
shell = Shell(crawler)
shell.code = 'adsf'
import threading
lock = threading.Lock()
def fetch(url_or_request):
lock.acquire()
try:
shell.fetch(url_or_request)
response = shell.vars.get('response')
return response
finally:
lock.release()
return fetch
示例4: test_contructor_sets_default_settings_values_when_no_settings_are_specified
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_contructor_sets_default_settings_values_when_no_settings_are_specified(
setting_name, expected):
settings = Settings()
settings.set('HTML_STORAGE', {})
downloader = HtmlStorageMiddleware(settings)
assert_that(downloader.__dict__[setting_name], is_(expected))
示例5: _crawl
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def _crawl(self):
settings = Settings()
settings.set('ITEM_PIPELINES', {
'app.pipelines.JsonWriterPipeline': 300
})
self.process = CrawlerProcess(settings)
self.process.crawl(self, self.name, self.start_urls)
self.process.start()
示例6: runSpider
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def runSpider(self, spider):
configure_logging({'LOG_FORMAT': '%(asctime)s [%(name)s] %(levelname)s: %(message)s'})
settings = Settings()
settings.set('FEED_URI', 'output.json')
settings.set('FEED_FORMAT', 'json')
runner = CrawlerRunner(settings)
dfd = runner.crawl(spider)
dfd.addBoth(lambda _: reactor.stop())
示例7: start
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def start(self):
settings = Settings()
# crawl responsibly
settings.set("USER_AGENT", "test")
crawler_obj = Spider()
crawler = Crawler(crawler_obj, settings)
# stop reactor when spider closes
crawler.signals.connect(self.stop, signal=signals.spider_closed)
crawler.crawl()
示例8: get_crawler_class
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_crawler_class(self, crawler):
"""
Searches through the modules in self.__crawer_module for a crawler with
the name passed along.
:param str crawler: Name of the crawler to load
:rtype: crawler-class
"""
settings = Settings()
settings.set('SPIDER_MODULES', [self.__crawer_module])
spider_loader = SpiderLoader(settings)
return spider_loader.load(crawler)
示例9: qwebkit_settings
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def qwebkit_settings(settings=None):
if settings is None:
settings = Settings()
elif settings.getbool("__WT__"):
return settings
else:
settings = settings.copy()
settings.frozen = False
for name in dir(defs):
if name.startswith("WT_") and settings.get(name) is None:
settings.set(name, getattr(defs, name))
settings.set("__WT__", True)
return settings
示例10: test_constructor_extracts_expected_settings
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_constructor_extracts_expected_settings():
settings = Settings()
save_html_on_codes = make_allowed_response_codes_list()
settings.set('HTML_STORAGE', {
'gzip_output': True,
'save_html_on_codes': save_html_on_codes
})
downloader = HtmlStorageMiddleware(settings)
assert_that(downloader, has_properties(dict(
gzip_output=True,
save_html_on_codes=save_html_on_codes
)))
示例11: run
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def run(self):
dispatcher.connect(self.stop_reactor, signal=signals.spider_closed)
spider = PriceSpider(self.str)
testset = Settings()
testset.set("ITEM_PIPELINES",{
'tutorial.pipelines.MySQLStorePipeline': 1
})
crawler = Crawler(testset)
crawler.configure()
crawler.crawl(spider)
crawler.start()
log.start()
log.msg('Running reactor...')
reactor.run(installSignalHandlers=0) # the script will block here until the spider is closed
log.msg('Reactor stopped.')
示例12: run
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def run():
log.start(loglevel=log.DEBUG)
settings = Settings()
# crawl responsibly
settings.set("USER_AGENT", "Gitlaw-ca Scraper (+https://github.com/JasonMWhite/gitlawca-scraper)")
settings.set("ITEM_PIPELINES", {'gitlawca.scraper.pipelines.LawscraperPipeline': 100})
crawler = Crawler(settings)
# stop reactor when spider closes
crawler.signals.connect(spider_closing, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(CanadaLawSpider())
crawler.start()
reactor.run()
示例13: get_new_settings
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_new_settings(directory = HTTPCACHE_DIR,
backend = DEFAULT,
depth = 1):
s = Settings()
s.set('HTTPCACHE_ENABLED', True)
s.set('HTTPCACHE_DIR', directory)
s.set('HTTPCACHE_STORAGE', backend)
s.set('DEPTH_LIMIT', depth)
return s
示例14: get_spider_settings
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_spider_settings(flask_app_config):
"""
For the given spider_pipelines(dict) create a scrapy Settings object with
the common settings for each spider/crawler.
Returns:
Scrapy settings class instance
"""
settings = Settings()
# set the pipelines for exporting to JSON and CSV
pipelines = {}
if flask_app_config['EXPORT_JSON']:
pipelines['arachne.pipelines.ExportJSON'] = 100
if flask_app_config['EXPORT_CSV']:
pipelines['arachne.pipelines.ExportCSV'] = 200
settings.set('ITEM_PIPELINES', pipelines)
return settings
示例15: run_spider
# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def run_spider():
settings = Settings()
settings.set('ITEM_PIPELINES', {
'__main__.JsonWriterPipeline': 100
})
# enable remote sever certificate verification
# see http://doc.scrapy.org/en/latest/topics/settings.html#downloader-clientcontextfactory
settings.set('DOWNLOADER_CLIENTCONTEXTFACTORY',
'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'
)
# uncomment below line to enable the logging for debug
# configure_logging()
crawler = Crawler(JenkinsJobSpider, settings)
crawler.signals.connect(callback, signal=signals.spider_closed)
crawler.crawl()
reactor.run()