当前位置: 首页>>代码示例>>Python>>正文


Python Settings.set方法代码示例

本文整理汇总了Python中scrapy.settings.Settings.set方法的典型用法代码示例。如果您正苦于以下问题:Python Settings.set方法的具体用法?Python Settings.set怎么用?Python Settings.set使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.settings.Settings的用法示例。


在下文中一共展示了Settings.set方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_from_settings_constructs_middleware_with_the_specified_settings

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_from_settings_constructs_middleware_with_the_specified_settings():
    settings = Settings()
    settings.set('HTML_STORAGE', {'test': 'settings'})

    downloader = HtmlStorageMiddleware.from_settings(settings)

    assert_that(downloader.settings, is_({'test': 'settings'}))
开发者ID:povilasb,项目名称:scrapy-html-storage,代码行数:9,代码来源:test_html_storage_middleware.py

示例2: make_downloader

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def make_downloader(save_html_on_codes=[]):
    settings = Settings()
    settings.set('HTML_STORAGE', {
        'gzip_output': True,
        'save_html_on_codes': save_html_on_codes
    })
    return HtmlStorageMiddleware(settings)
开发者ID:povilasb,项目名称:scrapy-html-storage,代码行数:9,代码来源:test_html_storage_middleware.py

示例3: get_fetch

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_fetch(log=False):
    settings = Settings()
    settings.set('LOG_ENABLED', log)

    crawler_process = CrawlerProcess(settings)
    crawler = crawler_process.create_crawler()
    crawler_process.start_crawling()

    t = Thread(target=crawler_process.start_reactor)
    t.daemon = True
    t.start()

    shell = Shell(crawler)
    shell.code = 'adsf'

    import threading
    lock = threading.Lock()

    def fetch(url_or_request):
        lock.acquire()
        try:
            shell.fetch(url_or_request)
            response = shell.vars.get('response')
            return response
        finally:
            lock.release()

    return fetch
开发者ID:chenhc,项目名称:laravel,代码行数:30,代码来源:utils.py

示例4: test_contructor_sets_default_settings_values_when_no_settings_are_specified

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_contructor_sets_default_settings_values_when_no_settings_are_specified(
        setting_name, expected):
    settings = Settings()
    settings.set('HTML_STORAGE', {})

    downloader = HtmlStorageMiddleware(settings)

    assert_that(downloader.__dict__[setting_name], is_(expected))
开发者ID:povilasb,项目名称:scrapy-html-storage,代码行数:10,代码来源:test_html_storage_middleware.py

示例5: _crawl

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
 def _crawl(self):
     settings = Settings()
     settings.set('ITEM_PIPELINES', {
         'app.pipelines.JsonWriterPipeline': 300
     })
     self.process = CrawlerProcess(settings)
     self.process.crawl(self, self.name, self.start_urls)
     self.process.start()
开发者ID:nw4869,项目名称:flask-scrapy,代码行数:10,代码来源:crawler.py

示例6: runSpider

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
 def runSpider(self, spider):
     configure_logging({'LOG_FORMAT': '%(asctime)s [%(name)s] %(levelname)s: %(message)s'})
     settings = Settings()
     settings.set('FEED_URI', 'output.json')
     settings.set('FEED_FORMAT', 'json')
     
     runner = CrawlerRunner(settings)
     dfd = runner.crawl(spider)
     dfd.addBoth(lambda _: reactor.stop())
开发者ID:Preetwinder,项目名称:ScrapyStreaming,代码行数:11,代码来源:scrapystreaming.py

示例7: start

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
    def start(self):
        settings = Settings()

        # crawl responsibly
        settings.set("USER_AGENT", "test")
        crawler_obj = Spider()
        crawler = Crawler(crawler_obj, settings)

        # stop reactor when spider closes
        crawler.signals.connect(self.stop, signal=signals.spider_closed)
        crawler.crawl()
开发者ID:c4simba,项目名称:ParserExample,代码行数:13,代码来源:spiderCtrl.py

示例8: get_crawler_class

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
    def get_crawler_class(self, crawler):
        """
        Searches through the modules in self.__crawer_module for a crawler with
        the name passed along.

        :param str crawler: Name of the crawler to load
        :rtype: crawler-class
        """
        settings = Settings()
        settings.set('SPIDER_MODULES', [self.__crawer_module])
        spider_loader = SpiderLoader(settings)
        return spider_loader.load(crawler)
开发者ID:Sayeedsalam,项目名称:spec-event-data-server,代码行数:14,代码来源:single_crawler.py

示例9: qwebkit_settings

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def qwebkit_settings(settings=None):
    if settings is None:
        settings = Settings()
    elif settings.getbool("__WT__"):
        return settings
    else:
        settings = settings.copy()
        settings.frozen = False
    for name in dir(defs):
        if name.startswith("WT_") and settings.get(name) is None:
            settings.set(name, getattr(defs, name))
    settings.set("__WT__", True)
    return settings
开发者ID:StrongZhu,项目名称:scrapy-webtools,代码行数:15,代码来源:utils.py

示例10: test_constructor_extracts_expected_settings

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def test_constructor_extracts_expected_settings():
    settings = Settings()
    save_html_on_codes = make_allowed_response_codes_list()
    settings.set('HTML_STORAGE', {
        'gzip_output': True,
        'save_html_on_codes': save_html_on_codes
    })

    downloader = HtmlStorageMiddleware(settings)

    assert_that(downloader, has_properties(dict(
        gzip_output=True,
        save_html_on_codes=save_html_on_codes
    )))
开发者ID:povilasb,项目名称:scrapy-html-storage,代码行数:16,代码来源:test_html_storage_middleware.py

示例11: run

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
	def run(self):
		dispatcher.connect(self.stop_reactor, signal=signals.spider_closed)
		spider = PriceSpider(self.str)
		testset = Settings()
		testset.set("ITEM_PIPELINES",{
		    'tutorial.pipelines.MySQLStorePipeline': 1
		})
		crawler = Crawler(testset)
		crawler.configure()
		crawler.crawl(spider)
		crawler.start()
		log.start()
		log.msg('Running reactor...')
		reactor.run(installSignalHandlers=0)  # the script will block here until the spider is closed
		log.msg('Reactor stopped.')
开发者ID:RuichaoQiu,项目名称:gasmanager,代码行数:17,代码来源:main.py

示例12: run

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def run():
    log.start(loglevel=log.DEBUG)
    settings = Settings()

    # crawl responsibly
    settings.set("USER_AGENT", "Gitlaw-ca Scraper (+https://github.com/JasonMWhite/gitlawca-scraper)")
    settings.set("ITEM_PIPELINES", {'gitlawca.scraper.pipelines.LawscraperPipeline': 100})
    crawler = Crawler(settings)

    # stop reactor when spider closes
    crawler.signals.connect(spider_closing, signal=signals.spider_closed)

    crawler.configure()
    crawler.crawl(CanadaLawSpider())
    crawler.start()
    reactor.run()
开发者ID:JasonMWhite,项目名称:gitlawca-scraper,代码行数:18,代码来源:scrape.py

示例13: get_new_settings

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_new_settings(directory = HTTPCACHE_DIR,
                     backend = DEFAULT,
                     depth = 1):
    s = Settings()
    s.set('HTTPCACHE_ENABLED', True)
    s.set('HTTPCACHE_DIR', directory)
    s.set('HTTPCACHE_STORAGE', backend)
    s.set('DEPTH_LIMIT', depth)
    return s
开发者ID:mchichou2015,项目名称:test-application,代码行数:11,代码来源:test_crawl.py

示例14: get_spider_settings

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def get_spider_settings(flask_app_config):
    """
    For the given spider_pipelines(dict) create a scrapy Settings object with
    the common settings for each spider/crawler.

    Returns:
        Scrapy settings class instance
    """
    settings = Settings()

    # set the pipelines for exporting to JSON and CSV
    pipelines = {}
    if flask_app_config['EXPORT_JSON']:
        pipelines['arachne.pipelines.ExportJSON'] = 100
    if flask_app_config['EXPORT_CSV']:
        pipelines['arachne.pipelines.ExportCSV'] = 200
    settings.set('ITEM_PIPELINES', pipelines)
    
    return settings
开发者ID:AnkurDedania,项目名称:arachne,代码行数:21,代码来源:scrapy_utils.py

示例15: run_spider

# 需要导入模块: from scrapy.settings import Settings [as 别名]
# 或者: from scrapy.settings.Settings import set [as 别名]
def run_spider():
    settings = Settings()
    settings.set('ITEM_PIPELINES', {
        '__main__.JsonWriterPipeline': 100
    })

    # enable remote sever certificate verification
    # see http://doc.scrapy.org/en/latest/topics/settings.html#downloader-clientcontextfactory
    settings.set('DOWNLOADER_CLIENTCONTEXTFACTORY',
                 'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'
                 )

    # uncomment below line to enable the logging for debug
    # configure_logging()

    crawler = Crawler(JenkinsJobSpider, settings)
    crawler.signals.connect(callback, signal=signals.spider_closed)
    crawler.crawl()
    reactor.run()
开发者ID:nxintech,项目名称:Charlie,代码行数:21,代码来源:jenkins_spider.py


注:本文中的scrapy.settings.Settings.set方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。