本文整理汇总了Python中scrapy.signals.spider_closed方法的典型用法代码示例。如果您正苦于以下问题:Python signals.spider_closed方法的具体用法?Python signals.spider_closed怎么用?Python signals.spider_closed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.signals
的用法示例。
在下文中一共展示了signals.spider_closed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: update_collection_set
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def update_collection_set(cls, item, response ,spider):
# if cls.entry == "COLLECTION":
cls.collection_set.add(item["pid"].split('_')[0])
cls.process = len(cls.collection_set) - cls.init_colletion_set_size
# for debug only
if cls.process > cls.maxsize:
if cls.entry == "COLLECTION":
with open("./.trace", "wb") as f:
pickle.dump(cls.collection_set, f)
# store .json file
f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
data = [item.__dict__() for item in cls.data]
json.dump(data, f)
print("Crawling complete, got {0} data".format(len(cls.data)))
f.close()
os.abort()
# raise CloseSpider
# cls.signalManger.send_catch_log(signal=signals.spider_closed)
示例2: _open_webdriver
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def _open_webdriver(self): # 该函数同时作为重启 webdriver 功能使用
try: self.spider_closed()
except: pass
from selenium import webdriver
option = webdriver.ChromeOptions()
extset = ['enable-automation', 'ignore-certificate-errors']
ignimg = "profile.managed_default_content_settings.images"
mobile = {'deviceName':'Galaxy S5'}
option.add_argument("--disable-infobars") # 旧版本关闭“chrome正受到自动测试软件的控制”信息
option.add_experimental_option("excludeSwitches", extset) # 新版本关闭“chrome正受到自动测试软件的控制”信息
option.add_experimental_option("useAutomationExtension", False) # 新版本关闭“请停用以开发者模式运行的扩展程序”信息
# option.add_experimental_option('mobileEmulation', mobile) # 是否使用手机模式打开浏览器
# option.add_experimental_option("prefs", {ignore_image: 2}) # 开启浏览器时不加载图片(headless模式该配置无效)
# option.add_argument('--start-maximized') # 开启浏览器时是否最大化(headless模式该配置无效)
# option.add_argument('--headless') # 无界面打开浏览器
# option.add_argument('--window-size=1920,1080') # 无界面打开浏览器时候只能用这种方式实现最大化
# option.add_argument('--disable-gpu') # 禁用 gpu 硬件加速
# option.add_argument("--auto-open-devtools-for-tabs") # 开启浏览器时候是否打开开发者工具(F12)
# option.add_argument("--user-agent=Mozilla/5.0 HELL") # 修改 UA 信息
# option.add_argument('--proxy-server=http://127.0.0.1:8888') # 增加代理
self.webdriver = webdriver.Chrome(chrome_options=option)
示例3: __init__
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self, crawler):
self.crawler = crawler
self.close_on = {
'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'),
'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
}
if not any(self.close_on.values()):
raise NotConfigured
self.counter = defaultdict(int)
if self.close_on.get('errorcount'):
crawler.signals.connect(self.error_count, signal=signals.spider_error)
if self.close_on.get('pagecount'):
crawler.signals.connect(self.page_count, signal=signals.response_received)
if self.close_on.get('timeout'):
crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
if self.close_on.get('itemcount'):
crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
示例4: from_crawler
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler, *args, **kwargs):
spider = super(pixivSpider, cls).from_crawler(crawler, *args, **kwargs)
crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(cls.update_collection_set, signal=signals.item_scraped)
return spider
# allowed_domains = []
示例5: spider_closed
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
# store .trace file
if self.entry == "COLLECTION":
with open("./.trace", "wb") as f:
pickle.dump(self.collection_set, f)
# store .json file
f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
data = [item.__dict__() for item in self.data]
json.dump(data, f)
print("Crawling complete, got {0} data".format(len(self.data)))
f.close()
示例6: from_crawler
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
return s
示例7: spider_closed
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self):
if getattr(self, 'webdriver', None): self.webdriver.quit()
示例8: from_crawler
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
ext = cls(
crawler=crawler,
spider_opened_suites=crawler.settings.getlist(
"SPIDERMON_SPIDER_OPEN_MONITORS"
),
spider_closed_suites=crawler.settings.getlist(
"SPIDERMON_SPIDER_CLOSE_MONITORS"
),
engine_stopped_suites=crawler.settings.getlist(
"SPIDERMON_ENGINE_STOP_MONITORS"
),
spider_opened_expression_suites=crawler.settings.getlist(
"SPIDERMON_SPIDER_OPEN_EXPRESSION_MONITORS"
),
spider_closed_expression_suites=crawler.settings.getlist(
"SPIDERMON_SPIDER_CLOSE_EXPRESSION_MONITORS"
),
engine_stopped_expression_suites=crawler.settings.getlist(
"SPIDERMON_ENGINE_STOP_EXPRESSION_MONITORS"
),
expressions_monitor_class=crawler.settings.get(
"SPIDERMON_EXPRESSIONS_MONITOR_CLASS"
),
periodic_suites=crawler.settings.getdict("SPIDERMON_PERIODIC_MONITORS"),
)
crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(ext.engine_stopped, signal=signals.engine_stopped)
return ext
示例9: spider_closed
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
self._run_suites(spider, self.spider_closed_suites)
for task in self.periodic_tasks[spider]:
task.stop()
示例10: test_spider_closed_connect_signal
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_connect_signal(mocker, spidermon_enabled_settings):
spider_closed_method = mocker.patch.object(Spidermon, "spider_closed")
crawler = get_crawler(settings_dict=spidermon_enabled_settings)
spider = Spider.from_crawler(crawler, "example.com")
crawler.signals.send_catch_log(
signal=signals.spider_closed, spider=spider, reason=None
)
assert spider_closed_method.called, "spider_closed not called"
示例11: test_spider_closed_suites_should_run
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run(get_crawler, suites):
"""The suites defined at spider_closed_suites should be loaded and run """
crawler = get_crawler()
spidermon = Spidermon(
crawler, spider_opened_suites=suites, spider_closed_suites=suites
)
spidermon.spider_closed_suites[0].run = mock.MagicMock()
spidermon.spider_opened(crawler.spider)
spidermon.spider_closed(crawler.spider)
assert spidermon.spider_closed_suites[0].__class__.__name__ == "Suite01"
spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY)
示例12: test_spider_closed_suites_should_run_from_signal
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def test_spider_closed_suites_should_run_from_signal(get_crawler, suites):
"""The suites defined at SPIDERMON_SPIDER_CLOSE_MONITORS setting should be loaded and run """
settings = {"SPIDERMON_SPIDER_CLOSE_MONITORS": suites}
crawler = get_crawler(settings)
spidermon = Spidermon.from_crawler(crawler)
spidermon.spider_closed_suites[0].run = mock.MagicMock()
crawler.signals.send_catch_log(signal=signals.spider_closed, spider=crawler.spider)
spidermon.spider_closed_suites[0].run.assert_called_once_with(mock.ANY)
示例13: __init__
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def __init__(self):
dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
示例14: spider_closed
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def spider_closed(self, spider):
[e.finish_exporting() for e in self.exporters.values()]
[f.close() for f in self.files.values()]
示例15: from_crawler
# 需要导入模块: from scrapy import signals [as 别名]
# 或者: from scrapy.signals import spider_closed [as 别名]
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
return s