当前位置: 首页>>代码示例>>Python>>正文


Python signals.spider_closed方法代码示例

本文整理汇总了Python中scrapy.signals.spider_closed方法的典型用法代码示例。如果您正苦于以下问题:Python signals.spider_closed方法的具体用法?Python signals.spider_closed怎么用?Python signals.spider_closed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.signals的用法示例。


在下文中一共展示了signals.spider_closed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: update_collection_set

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def update_collection_set(cls, item, response ,spider):
        # if cls.entry == "COLLECTION":
        cls.collection_set.add(item["pid"].split('_')[0])
        cls.process = len(cls.collection_set) - cls.init_colletion_set_size
        # for debug only
        if cls.process > cls.maxsize:
            if cls.entry == "COLLECTION":
                with open("./.trace", "wb") as f:
                    pickle.dump(cls.collection_set, f)

            # store .json file
            f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
            data = [item.__dict__() for item in cls.data]
            json.dump(data, f)

            print("Crawling complete, got {0} data".format(len(cls.data)))
            f.close()
            os.abort()
            # raise CloseSpider
            # cls.signalManger.send_catch_log(signal=signals.spider_closed) 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:22,代码来源:pixiv-beta.py

示例2: _open_webdriver

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def _open_webdriver(self): # 该函数同时作为重启 webdriver 功能使用
        try: self.spider_closed()
        except: pass
        from selenium import webdriver
        option = webdriver.ChromeOptions()
        extset = ['enable-automation', 'ignore-certificate-errors']
        ignimg = "profile.managed_default_content_settings.images"
        mobile = {'deviceName':'Galaxy S5'}
        option.add_argument("--disable-infobars")                       # 旧版本关闭“chrome正受到自动测试软件的控制”信息
        option.add_experimental_option("excludeSwitches", extset)       # 新版本关闭“chrome正受到自动测试软件的控制”信息
        option.add_experimental_option("useAutomationExtension", False) # 新版本关闭“请停用以开发者模式运行的扩展程序”信息
        # option.add_experimental_option('mobileEmulation', mobile)     # 是否使用手机模式打开浏览器
        # option.add_experimental_option("prefs", {ignore_image: 2})    # 开启浏览器时不加载图片(headless模式该配置无效)
        # option.add_argument('--start-maximized')                      # 开启浏览器时是否最大化(headless模式该配置无效)
        # option.add_argument('--headless')                             # 无界面打开浏览器
        # option.add_argument('--window-size=1920,1080')                # 无界面打开浏览器时候只能用这种方式实现最大化
        # option.add_argument('--disable-gpu')                          # 禁用 gpu 硬件加速
        # option.add_argument("--auto-open-devtools-for-tabs")          # 开启浏览器时候是否打开开发者工具(F12)
        # option.add_argument("--user-agent=Mozilla/5.0 HELL")          # 修改 UA 信息
        # option.add_argument('--proxy-server=http://127.0.0.1:8888')   # 增加代理
        self.webdriver = webdriver.Chrome(chrome_options=option) 
开发者ID:cilame,项目名称:vrequest,代码行数:23,代码来源:middlewares.py

示例3: __init__

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self, crawler):
        self.crawler = crawler

        self.close_on = {
            'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'),
            'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
            'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
            'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
            }

        if not any(self.close_on.values()):
            raise NotConfigured

        self.counter = defaultdict(int)

        if self.close_on.get('errorcount'):
            crawler.signals.connect(self.error_count, signal=signals.spider_error)
        if self.close_on.get('pagecount'):
            crawler.signals.connect(self.page_count, signal=signals.response_received)
        if self.close_on.get('timeout'):
            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
        if self.close_on.get('itemcount'):
            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:26,代码来源:closespider.py

示例4: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(pixivSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(cls.update_collection_set, signal=signals.item_scraped)
        return spider

    # allowed_domains = [] 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:9,代码来源:pixiv-beta.py

示例5: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        # store .trace file
        if self.entry == "COLLECTION":
            with open("./.trace", "wb") as f:
                pickle.dump(self.collection_set, f)

        # store .json file
        f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
        data = [item.__dict__() for item in self.data]
        json.dump(data, f)

        print("Crawling complete, got {0} data".format(len(self.data)))
        f.close() 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:15,代码来源:pixiv-beta.py

示例6: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
        return s 
开发者ID:cilame,项目名称:vrequest,代码行数:7,代码来源:middlewares.py

示例7: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self):
        if getattr(self, 'webdriver', None): self.webdriver.quit() 
开发者ID:cilame,项目名称:vrequest,代码行数:4,代码来源:middlewares.py

示例8: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        ext = cls(
            crawler=crawler,
            spider_opened_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_OPEN_MONITORS"
            ),
            spider_closed_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_CLOSE_MONITORS"
            ),
            engine_stopped_suites=crawler.settings.getlist(
                "SPIDERMON_ENGINE_STOP_MONITORS"
            ),
            spider_opened_expression_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_OPEN_EXPRESSION_MONITORS"
            ),
            spider_closed_expression_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_CLOSE_EXPRESSION_MONITORS"
            ),
            engine_stopped_expression_suites=crawler.settings.getlist(
                "SPIDERMON_ENGINE_STOP_EXPRESSION_MONITORS"
            ),
            expressions_monitor_class=crawler.settings.get(
                "SPIDERMON_EXPRESSIONS_MONITOR_CLASS"
            ),
            periodic_suites=crawler.settings.getdict("SPIDERMON_PERIODIC_MONITORS"),
        )
        crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(ext.engine_stopped, signal=signals.engine_stopped)
        return ext 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:32,代码来源:extensions.py

示例9: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        self._run_suites(spider, self.spider_closed_suites)
        for task in self.periodic_tasks[spider]:
            task.stop() 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:6,代码来源:extensions.py

示例10: test_spider_closed_connect_signal

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_connect_signal(mocker, spidermon_enabled_settings):
    spider_closed_method = mocker.patch.object(Spidermon, "spider_closed")

    crawler = get_crawler(settings_dict=spidermon_enabled_settings)
    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(
        signal=signals.spider_closed, spider=spider, reason=None
    )

    assert spider_closed_method.called, "spider_closed not called" 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:12,代码来源:test_spidermon_signal_connect.py

示例11: test_spider_closed_suites_should_run

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run(get_crawler, suites):
    """The suites defined at spider_closed_suites should be loaded and run """
    crawler = get_crawler()
    spidermon = Spidermon(
        crawler, spider_opened_suites=suites, spider_closed_suites=suites
    )
    spidermon.spider_closed_suites[0].run = mock.MagicMock()
    spidermon.spider_opened(crawler.spider)
    spidermon.spider_closed(crawler.spider)
    assert spidermon.spider_closed_suites[0].__class__.__name__ == "Suite01"
    spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:13,代码来源:test_extension.py

示例12: test_spider_closed_suites_should_run_from_signal

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run_from_signal(get_crawler, suites):
    """The suites defined at SPIDERMON_SPIDER_CLOSE_MONITORS setting should be loaded and run """
    settings = {"SPIDERMON_SPIDER_CLOSE_MONITORS": suites}
    crawler = get_crawler(settings)
    spidermon = Spidermon.from_crawler(crawler)
    spidermon.spider_closed_suites[0].run = mock.MagicMock()
    crawler.signals.send_catch_log(signal=signals.spider_closed, spider=crawler.spider)
    spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:10,代码来源:test_extension.py

示例13: __init__

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self):
        dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
        dispatcher.connect(self.spider_closed, signal=signals.spider_closed) 
开发者ID:mrorii,项目名称:tabebot,代码行数:5,代码来源:pipelines.py

示例14: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        [e.finish_exporting() for e in self.exporters.values()]
        [f.close() for f in self.files.values()] 
开发者ID:mrorii,项目名称:tabebot,代码行数:5,代码来源:pipelines.py

示例15: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
        return s 
开发者ID:richshaw2015,项目名称:oh-my-rss,代码行数:8,代码来源:middlewares.py


注:本文中的scrapy.signals.spider_closed方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。