当前位置: 首页>>代码示例>>Python>>正文


Python signals.spider_closed方法代码示例

本文整理汇总了Python中scrapy.signals.spider_closed方法的典型用法代码示例。如果您正苦于以下问题:Python signals.spider_closed方法的具体用法?Python signals.spider_closed怎么用?Python signals.spider_closed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在模块scrapy.signals的用法示例。

在下文中一共展示了signals.spider_closed方法的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: update_collection_set

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def update_collection_set(cls, item, response ,spider):
        # if cls.entry == "COLLECTION":
        cls.collection_set.add(item["pid"].split('_')[0])
        cls.process = len(cls.collection_set) - cls.init_colletion_set_size
        # for debug only
        if cls.process > cls.maxsize:
            if cls.entry == "COLLECTION":
                with open("./.trace", "wb") as f:
                    pickle.dump(cls.collection_set, f)

            # store .json file
            f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
            data = [item.__dict__() for item in cls.data]
            json.dump(data, f)

            print("Crawling complete, got {0} data".format(len(cls.data)))
            f.close()
            os.abort()
            # raise CloseSpider
            # cls.signalManger.send_catch_log(signal=signals.spider_closed) 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:22,代码来源:pixiv-beta.py


示例2: _open_webdriver

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def _open_webdriver(self): # 该函数同时作为重启 webdriver 功能使用
        try: self.spider_closed()
        except: pass
        from selenium import webdriver
        option = webdriver.ChromeOptions()
        extset = ['enable-automation', 'ignore-certificate-errors']
        ignimg = "profile.managed_default_content_settings.images"
        mobile = {'deviceName':'Galaxy S5'}
        option.add_argument("--disable-infobars")                       # 旧版本关闭“chrome正受到自动测试软件的控制”信息
        option.add_experimental_option("excludeSwitches", extset)       # 新版本关闭“chrome正受到自动测试软件的控制”信息
        option.add_experimental_option("useAutomationExtension", False) # 新版本关闭“请停用以开发者模式运行的扩展程序”信息
        # option.add_experimental_option('mobileEmulation', mobile)     # 是否使用手机模式打开浏览器
        # option.add_experimental_option("prefs", {ignore_image: 2})    # 开启浏览器时不加载图片(headless模式该配置无效)
        # option.add_argument('--start-maximized')                      # 开启浏览器时是否最大化(headless模式该配置无效)
        # option.add_argument('--headless')                             # 无界面打开浏览器
        # option.add_argument('--window-size=1920,1080')                # 无界面打开浏览器时候只能用这种方式实现最大化
        # option.add_argument('--disable-gpu')                          # 禁用 gpu 硬件加速
        # option.add_argument("--auto-open-devtools-for-tabs")          # 开启浏览器时候是否打开开发者工具(F12)
        # option.add_argument("--user-agent=Mozilla/5.0 HELL")          # 修改 UA 信息
        # option.add_argument('--proxy-server=http://127.0.0.1:8888')   # 增加代理
        self.webdriver = webdriver.Chrome(chrome_options=option) 
开发者ID:cilame,项目名称:vrequest,代码行数:23,代码来源:middlewares.py


示例3: __init__

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self, crawler):
        self.crawler = crawler

        self.close_on = {
            'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'),
            'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
            'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
            'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
            }

        if not any(self.close_on.values()):
            raise NotConfigured

        self.counter = defaultdict(int)

        if self.close_on.get('errorcount'):
            crawler.signals.connect(self.error_count, signal=signals.spider_error)
        if self.close_on.get('pagecount'):
            crawler.signals.connect(self.page_count, signal=signals.response_received)
        if self.close_on.get('timeout'):
            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
        if self.close_on.get('itemcount'):
            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:26,代码来源:closespider.py


示例4: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(pixivSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(cls.update_collection_set, signal=signals.item_scraped)
        return spider

    # allowed_domains = [] 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:9,代码来源:pixiv-beta.py


示例5: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        # store .trace file
        if self.entry == "COLLECTION":
            with open("./.trace", "wb") as f:
                pickle.dump(self.collection_set, f)

        # store .json file
        f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
        data = [item.__dict__() for item in self.data]
        json.dump(data, f)

        print("Crawling complete, got {0} data".format(len(self.data)))
        f.close() 
开发者ID:vicety,项目名称:Pixiv-Crawler,代码行数:15,代码来源:pixiv-beta.py


示例6: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
        return s 
开发者ID:cilame,项目名称:vrequest,代码行数:7,代码来源:middlewares.py


示例7: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self):
        if getattr(self, 'webdriver', None): self.webdriver.quit() 
开发者ID:cilame,项目名称:vrequest,代码行数:4,代码来源:middlewares.py


示例8: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        ext = cls(
            crawler=crawler,
            spider_opened_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_OPEN_MONITORS"
            ),
            spider_closed_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_CLOSE_MONITORS"
            ),
            engine_stopped_suites=crawler.settings.getlist(
                "SPIDERMON_ENGINE_STOP_MONITORS"
            ),
            spider_opened_expression_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_OPEN_EXPRESSION_MONITORS"
            ),
            spider_closed_expression_suites=crawler.settings.getlist(
                "SPIDERMON_SPIDER_CLOSE_EXPRESSION_MONITORS"
            ),
            engine_stopped_expression_suites=crawler.settings.getlist(
                "SPIDERMON_ENGINE_STOP_EXPRESSION_MONITORS"
            ),
            expressions_monitor_class=crawler.settings.get(
                "SPIDERMON_EXPRESSIONS_MONITOR_CLASS"
            ),
            periodic_suites=crawler.settings.getdict("SPIDERMON_PERIODIC_MONITORS"),
        )
        crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(ext.engine_stopped, signal=signals.engine_stopped)
        return ext 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:32,代码来源:extensions.py


示例9: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        self._run_suites(spider, self.spider_closed_suites)
        for task in self.periodic_tasks[spider]:
            task.stop() 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:6,代码来源:extensions.py


示例10: test_spider_closed_connect_signal

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_connect_signal(mocker, spidermon_enabled_settings):
    spider_closed_method = mocker.patch.object(Spidermon, "spider_closed")

    crawler = get_crawler(settings_dict=spidermon_enabled_settings)
    spider = Spider.from_crawler(crawler, "example.com")
    crawler.signals.send_catch_log(
        signal=signals.spider_closed, spider=spider, reason=None
    )

    assert spider_closed_method.called, "spider_closed not called" 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:12,代码来源:test_spidermon_signal_connect.py


示例11: test_spider_closed_suites_should_run

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run(get_crawler, suites):
    """The suites defined at spider_closed_suites should be loaded and run """
    crawler = get_crawler()
    spidermon = Spidermon(
        crawler, spider_opened_suites=suites, spider_closed_suites=suites
    )
    spidermon.spider_closed_suites[0].run = mock.MagicMock()
    spidermon.spider_opened(crawler.spider)
    spidermon.spider_closed(crawler.spider)
    assert spidermon.spider_closed_suites[0].__class__.__name__ == "Suite01"
    spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:13,代码来源:test_extension.py


示例12: test_spider_closed_suites_should_run_from_signal

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run_from_signal(get_crawler, suites):
    """The suites defined at SPIDERMON_SPIDER_CLOSE_MONITORS setting should be loaded and run """
    settings = {"SPIDERMON_SPIDER_CLOSE_MONITORS": suites}
    crawler = get_crawler(settings)
    spidermon = Spidermon.from_crawler(crawler)
    spidermon.spider_closed_suites[0].run = mock.MagicMock()
    crawler.signals.send_catch_log(signal=signals.spider_closed, spider=crawler.spider)
    spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY) 
开发者ID:scrapinghub,项目名称:spidermon,代码行数:10,代码来源:test_extension.py


示例13: __init__

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self):
        dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
        dispatcher.connect(self.spider_closed, signal=signals.spider_closed) 
开发者ID:mrorii,项目名称:tabebot,代码行数:5,代码来源:pipelines.py


示例14: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        [e.finish_exporting() for e in self.exporters.values()]
        [f.close() for f in self.files.values()] 
开发者ID:mrorii,项目名称:tabebot,代码行数:5,代码来源:pipelines.py


示例15: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
        return s 
开发者ID:richshaw2015,项目名称:oh-my-rss,代码行数:8,代码来源:middlewares.py


示例16: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        if spider.browser:
            self.browser.quit() 
开发者ID:richshaw2015,项目名称:oh-my-rss,代码行数:5,代码来源:middlewares.py


示例17: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline 
开发者ID:zhanghe06,项目名称:news_spider,代码行数:7,代码来源:exporter_csv.py


示例18: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_csv = self.files.pop(spider)
        file_csv.close() 
开发者ID:zhanghe06,项目名称:news_spider,代码行数:6,代码来源:exporter_csv.py


示例19: _set_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def _set_crawler(self, crawler):
        self.crawler = crawler
        self.settings = crawler.settings
        crawler.signals.connect(self.close, signals.spider_closed) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:6,代码来源:__init__.py


示例20: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        o = cls(crawler.stats)
        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
        crawler.signals.connect(o.response_received, signal=signals.response_received)
        return o 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:10,代码来源:corestats.py


示例21: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider, reason):
        finish_time = datetime.datetime.utcnow()
        elapsed_time = finish_time - self.stats.get_value('start_time')
        elapsed_time_seconds = elapsed_time.total_seconds()
        self.stats.set_value('elapsed_time_seconds', elapsed_time_seconds, spider=spider)
        self.stats.set_value('finish_time', finish_time, spider=spider)
        self.stats.set_value('finish_reason', reason, spider=spider) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:9,代码来源:corestats.py


示例22: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        if not crawler.settings.getbool('MEMDEBUG_ENABLED'):
            raise NotConfigured
        o = cls(crawler.stats)
        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
        return o 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:8,代码来源:memdebug.py


示例23: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider, reason):
        gc.collect()
        self.stats.set_value('memdebug/gc_garbage_count', len(gc.garbage), spider=spider)
        for cls, wdict in six.iteritems(live_refs):
            if not wdict:
                continue
            self.stats.set_value('memdebug/live_refs/%s' % cls.__name__, len(wdict), spider=spider) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:9,代码来源:memdebug.py


示例24: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        spider_stats = self.stats.get_stats(spider)
        body = "Global stats\n\n"
        body += "\n".join("%-50s : %s" % i for i in self.stats.get_stats().items())
        body += "\n\n%s stats\n\n" % spider.name
        body += "\n".join("%-50s : %s" % i for i in spider_stats.items())
        return self.mail.send(self.recipients, "Scrapy stats for: %s" % spider.name, body) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:9,代码来源:statsmailer.py


示例25: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        task = getattr(self, 'task', False)
        if task and task.active():
            task.cancel() 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:6,代码来源:closespider.py


示例26: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        jobdir = job_dir(crawler.settings)
        if not jobdir:
            raise NotConfigured

        obj = cls(jobdir)
        crawler.signals.connect(obj.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(obj.spider_opened, signal=signals.spider_opened)
        return obj 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:11,代码来源:spiderstate.py


示例27: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
        if self.jobdir:
            with open(self.statefn, 'wb') as f:
                pickle.dump(spider.state, f, protocol=2) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:6,代码来源:spiderstate.py


示例28: spider_closed

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider, reason):
        if self.task and self.task.running:
            self.task.stop() 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:5,代码来源:logstats.py


示例29: from_crawler

# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
        o = cls(crawler.settings)
        o.crawler = crawler
        crawler.signals.connect(o.open_spider, signals.spider_opened)
        crawler.signals.connect(o.close_spider, signals.spider_closed)
        crawler.signals.connect(o.item_scraped, signals.item_scraped)
        return o 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:9,代码来源:feedexport.py



注:本文中的scrapy.signals.spider_closed方法示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。