本文整理汇总了Python中scrapy.contrib.exporter.JsonItemExporter类的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter类的具体用法?Python JsonItemExporter怎么用?Python JsonItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了JsonItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SaveNewItems
class SaveNewItems(object):
def __init__(self):
self.files = []
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def process_item(self, item, spider):
self.new_file_exporter.export_item(item)
print "Save " + item["title"][0]
return item
def spider_opened(self, spider):
self.new_item_file = open("storage\\%s_new_items.json" % spider.name, "w")
self.new_file_exporter = JsonItemExporter(self.new_item_file)
self.new_file_exporter.start_exporting()
def spider_closed(self, spider):
with open("storage\\%s_items.json" % spider.name, "w") as items_file:
self.exporter = JsonItemExporter(items_file)
self.exporter.start_exporting()
for item in incomingData:
self.exporter.export_item(item)
self.exporter.finish_exporting()
self.new_file_exporter.finish_exporting()
items_file.close()
self.new_item_file.close()
示例2: ExportJSON
class ExportJSON(ExportData):
"""
Exporting to export/json/spider-name.json file
"""
def spider_opened(self, spider):
file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
self.files[spider] = file_to_save
self.exporter = JsonItemExporter(file_to_save)
self.exporter.start_exporting()
示例3: __init__
def __init__(self):
file = codecs.open('books2.json','w+b',encoding='utf-8')
#file = open('books2.json','w+b')
self.exporter = JsonItemExporter(file)
self.exporter.encoding='utf-8'
self.exporter.start_exporting()
self.encoder = json.JSONEncoder(ensure_ascii=False)
示例4: open_spider
def open_spider(self, spider):
self.cols = spider.cols
self.start_urls = spider.start_urls
self.file = open('test.json', 'w+b')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
示例5: spider_opened
def spider_opened(self, spider):
if(spider.name == 'timesnews'):
file = open('TodaysToiScrapedItems.json', 'w+b')
else :
file = open('TodaysHtScrapedItems.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
示例6: spider_closed
def spider_closed(self, spider):
with open("storage\\%s_items.json" % spider.name, "w") as items_file:
self.exporter = JsonItemExporter(items_file)
self.exporter.start_exporting()
for item in incomingData:
self.exporter.export_item(item)
self.exporter.finish_exporting()
self.new_file_exporter.finish_exporting()
items_file.close()
self.new_item_file.close()
示例7: engine_started
def engine_started(self):
self.json_file = open("result.json", "w")
self.json_exporter = JsonItemExporter(
self.json_file,
fields_to_export=self.fields_to_export[self.spider._crawler.settings["CommandLineParameter"][0]],
)
self.json_exporter.start_exporting()
log.msg(
message="ManningPipeline, engine_started, mode=%s"
% self.spider._crawler.settings["CommandLineParameter"][0]
)
示例8: process_item
def process_item(self, item, spider):
if self.first_item:
self.first_item = False
file = open('%s_items.json' % spider.name, 'wb')
# scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文?
# http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence
# 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。
# 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。
self.exporter = JsonItemExporter(file, ensure_ascii=False)
self.exporter.start_exporting()
self.exporter.export_item(item)
return item
示例9: AppsPipeline
class AppsPipeline(object):
def __init__(self, spider):
self.file = open('{category}-{today}.json'.format(
today = date.today().strftime('%d-%m-%Y'),
category = spider.category), 'wb')
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
@classmethod
def from_crawler(cls, crawler):
return cls(spider = crawler.spider)
def spider_opened(self, spider):
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
if spider.name == 'apps':
self.exporter.export_item(item)
return item
示例10: MonitorPipeline
class MonitorPipeline(object):
def open_spider(self, spider):
self.cols = spider.cols
self.start_urls = spider.start_urls
self.file = open('test.json', 'w+b')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
try:
index = self.start_urls.index( item['surl'] )
groupId = index / self.cols
r = index % self.cols
if r == 0:
item['main'] = 0
elif r == 1:
item['main'] = 1
elif r == 2:
item['main'] = 2
item['gid'] = groupId
except:
index = -1
self.exporter.export_item(item)
return item
示例11: JsonExportPipeline
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
#file = open('%_ixbt_jokes.json' % spider.name, 'w+b')
file = open('ixbt_jokes.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例12: YxreviewPipeline
class YxreviewPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open('items.json', 'wb')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.checkData(item, "title")
self.checkData(item, "summary")
self.checkData(item, "cover_image")
self.checkData(item, "score")
self.exporter.export_item(item)
return item
def checkData(self ,item, field):
if len(item[field]) > 0:
newText = item[field][0].encode("utf-8")
item[field] = newText.strip()
else:
item[field] = ""
示例13: JsonExportPipeline
class JsonExportPipeline(object):
def __init__(self):
log.msg('JsonExportPipeline.init....', level=log.INFO)
self.files = {}
@classmethod
def from_crawler(cls, crawler):
log.msg('JsonExportPipeline.from_crawler....', level=log.INFO)
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
log.msg('JsonExportPipeline.spider_opened....', level=log.INFO)
file = open('%s.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
log.msg('JsonExportPipeline.spider_closed....', level=log.INFO)
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
log.msg('JsonExportPipeline.process_item....', level=log.INFO)
self.exporter.export_item(item)
return item
示例14: SpidercrawlerPipeline
class SpidercrawlerPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
#file = open('ScrapedItems.json', 'w+b')
self.exporter = JsonItemExporter(file)
def spider_opened(self, spider):
if(spider.name == 'timesnews'):
file = open('TodaysToiScrapedItems.json', 'w+b')
else :
file = open('TodaysHtScrapedItems.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例15: ExportJSON
class ExportJSON(object):
"""
Exporting to export/json/spider-name.json file
"""
def __init__(self):
self.files = {}
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
self.files[spider] = file_to_save
self.exporter = JsonItemExporter(file_to_save)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_to_save = self.files.pop(spider)
file_to_save.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item