当前位置: 首页>>代码示例>>Python>>正文


Python settings.Settings方法代码示例

本文整理汇总了Python中scrapy.settings.Settings方法的典型用法代码示例。如果您正苦于以下问题:Python settings.Settings方法的具体用法?Python settings.Settings怎么用?Python settings.Settings使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.settings的用法示例。


在下文中一共展示了settings.Settings方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_project_settings

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def get_project_settings():
    if ENVVAR not in os.environ:
        project = os.environ.get('SCRAPY_PROJECT', 'default')
        init_env(project)

    settings = Settings()
    settings_module_path = os.environ.get(ENVVAR)
    if settings_module_path:
        settings.setmodule(settings_module_path, priority='project')

    # XXX: remove this hack
    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
    if pickled_settings:
        settings.setdict(pickle.loads(pickled_settings), priority='project')

    # XXX: deprecate and remove this functionality
    env_overrides = {k[7:]: v for k, v in os.environ.items() if
                     k.startswith('SCRAPY_')}
    if env_overrides:
        settings.setdict(env_overrides, priority='project')

    return settings 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:24,代码来源:project.py

示例2: main

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def main():
	"""Main routine for the execution of the Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print("Item extracted:", item)
	dispatcher.connect(catch_item, signal=signals.item_passed)
	
	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# define the spider for the crawler
	crawler.crawl(EuropythonSpyder())

	# start scrapy
	print("STARTING ENGINE")
	crawler.start() #iniciar el crawler llamando al spider definido
	print("ENGINE STOPPED") 
开发者ID:PacktPublishing,项目名称:Learning-Python-Networking-Second-Edition,代码行数:25,代码来源:EuropythonSpyder.py

示例3: test_get_enabled_status

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_get_enabled_status():
    settings = Settings()
    # check for void settings
    assert _get_enabled_status(settings) == (False, False)
    # plugin enabled with settings
    settings.set('PAGE_STORAGE_ENABLED', True)
    assert _get_enabled_status(settings) == (True, False)
    settings.set('PAGE_STORAGE_ENABLED', None)
    # plugin enabled by spider_type
    for spider_type in ['auto', 'portia']:
        os.environ['SHUB_SPIDER_TYPE'] = spider_type
        assert _get_enabled_status(settings) == (True, False)
    os.environ['SHUB_SPIDER_TYPE'] = 'other_spider_type'
    assert _get_enabled_status(settings) == (False, False)
    # plugin enabled on error
    settings.set('PAGE_STORAGE_ON_ERROR_ENABLED', True)
    assert _get_enabled_status(settings) == (False, True) 
开发者ID:scrapy-plugins,项目名称:scrapy-pagestorage,代码行数:19,代码来源:test_pagestorage.py

示例4: fetch_url

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def fetch_url(cls, session, msites, platform_id, purpose):
        """Actual method to do fetch url action.

        Parameters
        ----------
            msites : list
                a list of Site model class, contains info to build spiders.
            platform_id : int
                id of platform, bind fetched url with this id.
            purpose : {'update', 'archive'}
                indicate which url to fetch.
        """
        settings = Settings(cls.conf['crawl']['scrapy'])
        settings.set('ITEM_PIPELINES',
                     {'hoaxy.crawl.pipelines.UrlPipeline': 300})
        process = CrawlerProcess(settings)
        sll = cls.conf['logging']['loggers']['scrapy']['level']
        logging.getLogger('scrapy').setLevel(logging.getLevelName(sll))
        for ms in msites:
            for sm in build_spiders_iter(ms, purpose):
                sm['kwargs']['session'] = session
                sm['kwargs']['platform_id'] = platform_id
                process.crawl(sm['cls'], *sm['args'], **sm['kwargs'])
        process.start() 
开发者ID:IUNetSci,项目名称:hoaxy-backend,代码行数:26,代码来源:crawl.py

示例5: fetch_html

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def fetch_html(cls, session, url_tuples):
        """Actual method to do fetch html action.

        Parameters
        ----------
            session : object
                a SQLAlchemy session object.
            url_tuples : list
                a list of url tuple (id, raw, status_code).
        """
        settings = Settings(cls.conf['crawl']['scrapy'])
        settings.set('ITEM_PIPELINES',
                     {'hoaxy.crawl.pipelines.HtmlPipeline': 300})
        process = CrawlerProcess(settings)
        sll = cls.conf['logging']['loggers']['scrapy']['level']
        logging.getLogger('scrapy').setLevel(logging.getLevelName(sll))
        logger.warning('Number of url to fetch html is: %s', len(url_tuples))
        process.crawl(
            HtmlSpider,
            session=session,
            url_tuples=url_tuples,
            excluded_domains=cls.conf['crawl']['excluded_domains'])
        process.start() 
开发者ID:IUNetSci,项目名称:hoaxy-backend,代码行数:25,代码来源:crawl.py

示例6: start_job

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def start_job(self, job=None, callback_fn=None):
        print(job)
        spider_job = job['spider_job']
        runner = job['runner']
        spider_cls = spider_job['spider_cls']
        spider_settings = spider_job['spider_settings']
        spider_kwargs = spider_job['spider_kwargs']

        def engine_stopped_callback():
            runner.transform_and_index(callback_fn=callback_fn)

        if callback_fn:
            print("""
==========================================================
WARNING: callback_fn is {}
==========================================================
Since start_job is called with callback_fn, make sure you end the reactor if you want the spider process to
stop after the callback function is executed. By default callback_fn=None will close the reactor.

To write a custom callback_fn

def callback_fn():
    print ("Write your own callback logic")
    from twisted.internet import reactor
    reactor.stop()
==========================================================
        """.format(callback_fn))

        spider = Crawler(spider_cls, Settings(spider_settings))
        spider.signals.connect(engine_stopped_callback, signals.engine_stopped)
        self.runner.crawl(spider, **spider_kwargs)
        """
        d = runner.crawl(spider, **spider_kwargs)
        # d.addBoth(engine_stopped_callback)
        """
        reactor.run() 
开发者ID:invanalabs,项目名称:invana-bot,代码行数:38,代码来源:base.py

示例7: test_spidermon_aws_credentials_not_set

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_spidermon_aws_credentials_not_set():
    settings = Settings()

    (aws_access_key_id, aws_secret_access_key) = get_aws_credentials(settings)

    assert aws_access_key_id is None
    assert aws_secret_access_key is None 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:9,代码来源:test_settings.py

示例8: test_spidermon_aws_credentials

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_spidermon_aws_credentials(mocker):
    warn_mock = mocker.patch("spidermon.utils.settings.warnings.warn")
    settings = Settings(
        {
            "SPIDERMON_AWS_ACCESS_KEY": "aws_access_key",
            "SPIDERMON_AWS_SECRET_KEY": "aws_secret_key",
        }
    )

    (aws_access_key_id, aws_secret_access_key) = get_aws_credentials(settings)

    assert aws_access_key_id == "aws_access_key"
    assert aws_secret_access_key == "aws_secret_key"
    warn_mock.assert_called_with(mocker.ANY, DeprecationWarning) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:16,代码来源:test_settings.py

示例9: test_spidermon_aws_credentials_scrapy_like

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_spidermon_aws_credentials_scrapy_like():
    settings = Settings(
        {
            "SPIDERMON_AWS_ACCESS_KEY_ID": "aws_access_key_id",
            "SPIDERMON_AWS_SECRET_ACCESS_KEY": "aws_secret_access_key",
        }
    )

    (aws_access_key_id, aws_secret_access_key) = get_aws_credentials(settings)

    assert aws_access_key_id == "aws_access_key_id"
    assert aws_secret_access_key == "aws_secret_access_key" 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:14,代码来源:test_settings.py

示例10: test_spidermon_aws_credentials_fall_back_to_scrapy

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_spidermon_aws_credentials_fall_back_to_scrapy():
    settings = Settings(
        {
            "AWS_ACCESS_KEY_ID": "scrapy_aws_access_key_id",
            "AWS_SECRET_ACCESS_KEY": "scrapy_aws_secret_access_key",
        }
    )

    (aws_access_key_id, aws_secret_access_key) = get_aws_credentials(settings)

    assert aws_access_key_id == "scrapy_aws_access_key_id"
    assert aws_secret_access_key == "scrapy_aws_secret_access_key" 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:14,代码来源:test_settings.py

示例11: test_spidermon_aws_credentials_are_preferred_over_scrapy_ones

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def test_spidermon_aws_credentials_are_preferred_over_scrapy_ones():
    settings = Settings(
        {
            "AWS_ACCESS_KEY_ID": "scrapy_aws_access_key_id",
            "AWS_SECRET_ACCESS_KEY": "scrapy_aws_secret_access_key",
            "SPIDERMON_AWS_ACCESS_KEY_ID": "spidermon_aws_access_key_id",
            "SPIDERMON_AWS_SECRET_ACCESS_KEY": "spidermon_aws_secret_access_key",
        }
    )

    (aws_access_key_id, aws_secret_access_key) = get_aws_credentials(settings)

    assert aws_access_key_id == "spidermon_aws_access_key_id"
    assert aws_secret_access_key == "spidermon_aws_secret_access_key" 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:16,代码来源:test_settings.py

示例12: settings

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def settings(request):
    """ Default scrapy-poet settings """
    s = dict(
        # collect scraped items to .collected_items attribute
        ITEM_PIPELINES={
            'tests.utils.CollectorPipeline': 100,
        },
        DOWNLOADER_MIDDLEWARES={
            'scrapy_poet.InjectionMiddleware': 543,
        },
    )
    return Settings(s) 
开发者ID:scrapinghub,项目名称:scrapy-poet,代码行数:14,代码来源:conftest.py

示例13: __init__

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def __init__(self, spidercls, settings=None):
        if isinstance(spidercls, Spider):
            raise ValueError(
                'The spidercls argument must be a class, not an object')

        if isinstance(settings, dict) or settings is None:
            settings = Settings(settings)

        self.spidercls = spidercls
        self.settings = settings.copy()
        self.spidercls.update_settings(self.settings)

        self.signals = SignalManager(self)
        self.stats = load_object(self.settings['STATS_CLASS'])(self)

        handler = LogCounterHandler(self, level=self.settings.get('LOG_LEVEL'))
        logging.root.addHandler(handler)

        d = dict(overridden_settings(self.settings))
        logger.info("Overridden settings: %(settings)r", {'settings': d})

        if get_scrapy_root_handler() is not None:
            # scrapy root handler already installed: update it with new settings
            install_scrapy_root_handler(self.settings)
        # lambda is assigned to Crawler attribute because this way it is not
        # garbage collected after leaving __init__ scope
        self.__remove_handler = lambda: logging.root.removeHandler(handler)
        self.signals.connect(self.__remove_handler, signals.engine_stopped)

        lf_cls = load_object(self.settings['LOG_FORMATTER'])
        self.logformatter = lf_cls.from_crawler(self)
        self.extensions = ExtensionManager.from_crawler(self)

        self.settings.freeze()
        self.crawling = False
        self.spider = None
        self.engine = None 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:39,代码来源:crawler.py

示例14: configure_logging

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def configure_logging(settings=None, install_root_handler=True):
    """
    Initialize logging defaults for Scrapy.

    :param settings: settings used to create and configure a handler for the
        root logger (default: None).
    :type settings: dict, :class:`~scrapy.settings.Settings` object or ``None``

    :param install_root_handler: whether to install root logging handler
        (default: True)
    :type install_root_handler: bool

    This function does:

    - Route warnings and twisted logging through Python standard logging
    - Assign DEBUG and ERROR level to Scrapy and Twisted loggers respectively
    - Route stdout to log if LOG_STDOUT setting is True

    When ``install_root_handler`` is True (default), this function also
    creates a handler for the root logger according to given settings
    (see :ref:`topics-logging-settings`). You can override default options
    using ``settings`` argument. When ``settings`` is empty or None, defaults
    are used.
    """
    if not sys.warnoptions:
        # Route warnings through python logging
        logging.captureWarnings(True)

    observer = twisted_log.PythonLoggingObserver('twisted')
    observer.start()

    dictConfig(DEFAULT_LOGGING)

    if isinstance(settings, dict) or settings is None:
        settings = Settings(settings)

    if settings.getbool('LOG_STDOUT'):
        sys.stdout = StreamLogger(logging.getLogger('stdout'))

    if install_root_handler:
        install_scrapy_root_handler(settings) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:43,代码来源:log.py

示例15: __init__

# 需要导入模块: from scrapy import settings [as 别名]
# 或者: from scrapy.settings import Settings [as 别名]
def __init__(self, store_uri, download_func=None, settings=None):
        if not store_uri:
            raise NotConfigured

        if isinstance(settings, dict) or settings is None:
            settings = Settings(settings)

        cls_name = "FilesPipeline"
        self.store = self._get_store(store_uri)
        resolve = functools.partial(self._key_for_pipe,
                                    base_class_name=cls_name,
                                    settings=settings)
        self.expires = settings.getint(
            resolve('FILES_EXPIRES'), self.EXPIRES
        )
        if not hasattr(self, "FILES_URLS_FIELD"):
            self.FILES_URLS_FIELD = self.DEFAULT_FILES_URLS_FIELD
        if not hasattr(self, "FILES_RESULT_FIELD"):
            self.FILES_RESULT_FIELD = self.DEFAULT_FILES_RESULT_FIELD
        self.files_urls_field = settings.get(
            resolve('FILES_URLS_FIELD'), self.FILES_URLS_FIELD
        )
        self.files_result_field = settings.get(
            resolve('FILES_RESULT_FIELD'), self.FILES_RESULT_FIELD
        )

        super(FilesPipeline, self).__init__(download_func=download_func, settings=settings) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:29,代码来源:files.py


注:本文中的scrapy.settings.Settings方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。