本文整理汇总了Python中scrapy.exporters.JsonItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter.start_exporting方法的具体用法?Python JsonItemExporter.start_exporting怎么用?Python JsonItemExporter.start_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.exporters.JsonItemExporter
的用法示例。
在下文中一共展示了JsonItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: JsonExportPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
"""
app.pipelines.exporter_json.JsonExportPipeline
"""
def __init__(self):
self.files = {}
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file_json = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file_json
self.exporter = JsonItemExporter(file_json)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_json = self.files.pop(spider)
file_json.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例2: SaveItemToJson
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class SaveItemToJson(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file=file)
print self.exporter
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例3: JsonPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('/home/gaoliang/Desktop/result.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file, ensure_ascii=False) # 添加ensure_ascii=False用于使json保存中文不乱码
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: JsonPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipeline(object):
"""Save Pipeline output to JSON."""
def __init__(self, spider_name):
self.file = open("output/{}_recipes.json".format(spider_name), 'wb')
self.file.write(
'{"date_scraped": "%s", "recipes": ' % datetime.datetime.now()
)
self.exporter = JsonItemExporter(self.file, encoding='utf-8',
ensure_ascii=False)
self.exporter.start_exporting()
@classmethod
def from_crawler(cls, crawler):
return cls(
spider_name=crawler.spider.name
)
def close_spider(self):
self.exporter.finish_exporting()
self.file.write("}")
self.file.close()
def process_item(self, item):
self.exporter.export_item(item)
return item
示例5: from_crawler
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipelineExporterMixin:
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
item = self.pre_process_item(item)
self.exporter.export_item(item)
return item
def pre_process_item(self, item):
return item
示例6: JsonExportPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = codecs.open('%s_data.json' % spider.name, 'w+b', encoding='utf-8')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: JsonExportPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
_log.info('JsonExportPipeline.init....')
self.files = {}
@classmethod
def from_crawler(cls, crawler):
_log.info('JsonExportPipeline.from_crawler....')
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
_log.info('JsonExportPipeline.spider_opened....')
file = open('%s.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
_log.info('JsonExportPipeline.spider_closed....')
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
_log.info('JsonExportPipeline.process_item....')
self.exporter.export_item(item)
return item
示例8: BaseFilePipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class BaseFilePipeline(object):
def __init__(self,saved_path):
self.files = {}
self.saved_path = saved_path
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls(crawler.settings.get('SAVED_PATH'))
return pipeline
def open_spider(self, spider):
tp = self.gettype()['name']
filename = '%s_%s.json' % (spider.name,tp)
filename = os.path.join(self.saved_path,filename)
file_ = open(filename,'w+b')
self.files[spider] = file_
self.exporter = JsonItemExporter(file_,ensure_ascii=False,encoding='utf-8')
self.exporter.start_exporting()
def gettype():
pass
def close_spider(self, spider):
self.exporter.finish_exporting()
file_ = self.files.pop(spider)
file_.close()
示例9: JsonExporterPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExporterPipeline(object):
# 调用scrapy提供的json_export 导出json文件
def __init__(self):
self.file = open('articleexport.json','wb')
self.exporter = JsonItemExporter(self.file,encoding="utf-8",ensure_ascii = False)
self.exporter.start_exporting()
def close_spider(self,spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例10: VisionsJsonPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class VisionsJsonPipeline(object):
def __init__(self):
self.exporter = None
def open_spider(self, spider):
self.exporter = JsonItemExporter(open('%s.json' %spider.name, 'wb'))
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
def close_spider(self, spider):
self.exporter.finish_exporting()
示例11: process_item
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
def process_item(self, item, spider):
designer_dir_name = skutils.escape_filename(item['name'])
designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name)
file_path = os.path.join(designer_dir_path, designer_dir_name)
# write json file
with open('%s.json' % file_path, 'w+b') as f:
exporter = JsonItemExporter(f)
exporter.start_exporting()
exporter.export_item(item)
exporter.finish_exporting()
# write excel file
excelutils.write_designer_excel(item, file_path, designer_dir_name)
return item
示例12: JsonWriterPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonWriterPipeline(BaseItemExporter):
def __init__(self, **kwargs):
self._configure(kwargs)
self.files = {}
self.encoder = json.JSONEncoder(ensure_ascii=False, **kwargs)
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = codecs.open('item.json', 'wb', encoding="utf-8")
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if item['title']: # and item['image_url'] :
item['description'] = re.sub("\r|\n","", item['description'])
item['general_impression'] = re.sub("\r|\n","", item['general_impression'])
item['subject_of_photo'] = re.sub("\r|\n","", item['subject_of_photo'])
item['composition'] = re.sub("\r|\n","", item['composition'])
item['use_of_camera'] = re.sub("\r|\n","", item['use_of_camera'])
item['depth_of_field'] = re.sub("\r|\n","", item['depth_of_field'])
item['color_lighting'] = re.sub("\r|\n","", item['color_lighting'])
item['focus'] = re.sub("\r|\n","", item['focus'])
##line = json.dumps(dict(item)) + '\n'
##self.file.write(line)
self.exporter.export_item(item)
return item
示例13: JsonExportPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
def spider_opened(self, spider):
file = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例14: WikicrawlerPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class WikicrawlerPipeline(object):
def __init__(self):
self.item_file = open('items.json', 'wb')
self.exporter = JsonItemExporter(self.item_file)
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
def spider_closed(self):
self.exporter.finish_exporting()
self.item_file.close()
示例15: SiteMapJsonExportPipeline
# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class SiteMapJsonExportPipeline(object):
'''Process the SiteMap spider output Items, and write them as JSON to an output file. The output file is taken from the Spider's config (spider.config)'''
@classmethod
def from_crawler(cls, crawler):
''' Boilerplate '''
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open(spider.config['map_file'], 'wb')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item