当前位置: 首页>>代码示例>>Python>>正文


Python JsonItemExporter.start_exporting方法代码示例

本文整理汇总了Python中scrapy.exporters.JsonItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter.start_exporting方法的具体用法?Python JsonItemExporter.start_exporting怎么用?Python JsonItemExporter.start_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.exporters.JsonItemExporter的用法示例。


在下文中一共展示了JsonItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: JsonExportPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
    """
    app.pipelines.exporter_json.JsonExportPipeline
    """
    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_json = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file_json
        self.exporter = JsonItemExporter(file_json)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_json = self.files.pop(spider)
        file_json.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:zhanghe06,项目名称:scrapy_project,代码行数:31,代码来源:exporter_json.py

示例2: SaveItemToJson

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class SaveItemToJson(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file=file)
        print self.exporter
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:eatskolnikov,项目名称:DEVCA2016,代码行数:28,代码来源:pipelines.py

示例3: JsonPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('/home/gaoliang/Desktop/result.json', 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file, ensure_ascii=False)  # 添加ensure_ascii=False用于使json保存中文不乱码
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:AnnaYe,项目名称:NjuptSpider,代码行数:27,代码来源:pipelines.py

示例4: JsonPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipeline(object):
  """Save Pipeline output to JSON."""
  def __init__(self, spider_name):
    self.file = open("output/{}_recipes.json".format(spider_name), 'wb')
    self.file.write(
        '{"date_scraped": "%s", "recipes": ' % datetime.datetime.now()
    )
    self.exporter = JsonItemExporter(self.file, encoding='utf-8',
                                     ensure_ascii=False)
    self.exporter.start_exporting()

  @classmethod
  def from_crawler(cls, crawler):
    return cls(
        spider_name=crawler.spider.name
    )

  def close_spider(self):
    self.exporter.finish_exporting()
    self.file.write("}")
    self.file.close()

  def process_item(self, item):
    self.exporter.export_item(item)
    return item
开发者ID:Chouvic,项目名称:food2vec,代码行数:27,代码来源:pipelines.py

示例5: from_crawler

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonPipelineExporterMixin:
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        item = self.pre_process_item(item)
        self.exporter.export_item(item)
        return item

    def pre_process_item(self, item):
        return item
开发者ID:gilbertoalexsantos,项目名称:uris,代码行数:28,代码来源:mixins.py

示例6: JsonExportPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = codecs.open('%s_data.json' % spider.name, 'w+b', encoding='utf-8')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:bgcolors,项目名称:ztcrawl,代码行数:28,代码来源:pipelines.py

示例7: JsonExportPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
    def __init__(self):
        _log.info('JsonExportPipeline.init....')
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        _log.info('JsonExportPipeline.from_crawler....')
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        _log.info('JsonExportPipeline.spider_opened....')
        file = open('%s.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        _log.info('JsonExportPipeline.spider_closed....')
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        _log.info('JsonExportPipeline.process_item....')
        self.exporter.export_item(item)
        return item
开发者ID:65kg,项目名称:core-scrapy,代码行数:32,代码来源:pipelines.py

示例8: BaseFilePipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class BaseFilePipeline(object):
    def __init__(self,saved_path):
        self.files = {}
        self.saved_path = saved_path
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls(crawler.settings.get('SAVED_PATH'))
        return pipeline


    def open_spider(self, spider):
        tp = self.gettype()['name']
        filename = '%s_%s.json' % (spider.name,tp)
        filename = os.path.join(self.saved_path,filename)

        file_ = open(filename,'w+b')
        self.files[spider] = file_
        self.exporter = JsonItemExporter(file_,ensure_ascii=False,encoding='utf-8')
        self.exporter.start_exporting()

    def gettype():
        pass

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        file_ = self.files.pop(spider)
        file_.close()
开发者ID:taichao,项目名称:newsspider,代码行数:31,代码来源:pipelines.py

示例9: JsonExporterPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExporterPipeline(object):
    # 调用scrapy提供的json_export 导出json文件
    def __init__(self):
        self.file = open('articleexport.json','wb')
        self.exporter = JsonItemExporter(self.file,encoding="utf-8",ensure_ascii = False)
        self.exporter.start_exporting()
    def close_spider(self,spider):
        self.exporter.finish_exporting()
        self.file.close()
    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:201585052,项目名称:-,代码行数:14,代码来源:pipelines.py

示例10: VisionsJsonPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class VisionsJsonPipeline(object):
    def __init__(self):
        self.exporter = None

    def open_spider(self, spider):
        self.exporter = JsonItemExporter(open('%s.json' %spider.name, 'wb'))
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.exporter.finish_exporting()
开发者ID:anamarce,项目名称:Visions-crawler,代码行数:16,代码来源:pipelines.py

示例11: process_item

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
    def process_item(self, item, spider):

        designer_dir_name = skutils.escape_filename(item['name'])
        designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name)
        file_path = os.path.join(designer_dir_path, designer_dir_name)

        # write json file
        with open('%s.json' % file_path, 'w+b') as f:
            exporter = JsonItemExporter(f)
            exporter.start_exporting()
            exporter.export_item(item)
            exporter.finish_exporting()

        # write excel file
        excelutils.write_designer_excel(item, file_path, designer_dir_name)

        return item
开发者ID:sunnykaka,项目名称:skwander,代码行数:19,代码来源:pipelines.py

示例12: JsonWriterPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonWriterPipeline(BaseItemExporter):

  def __init__(self, **kwargs):
    self._configure(kwargs)
    self.files = {} 
    self.encoder = json.JSONEncoder(ensure_ascii=False, **kwargs)
 
  @classmethod
  def from_crawler(cls, crawler):
    pipeline = cls()
    crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
    crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
    return pipeline

  def spider_opened(self, spider):
    file = codecs.open('item.json', 'wb', encoding="utf-8")
    self.files[spider] = file
    self.exporter = JsonItemExporter(file)
    self.exporter.start_exporting()

  def spider_closed(self, spider):
    self.exporter.finish_exporting()
    file = self.files.pop(spider)
    file.close() 

  def process_item(self, item, spider):

    if item['title']: # and item['image_url'] :
      item['description'] = re.sub("\r|\n","", item['description'])
      item['general_impression'] = re.sub("\r|\n","", item['general_impression'])
      item['subject_of_photo'] = re.sub("\r|\n","", item['subject_of_photo'])
      item['composition'] = re.sub("\r|\n","", item['composition'])
      item['use_of_camera'] = re.sub("\r|\n","", item['use_of_camera'])
      item['depth_of_field'] = re.sub("\r|\n","", item['depth_of_field'])
      item['color_lighting'] = re.sub("\r|\n","", item['color_lighting'])
      item['focus'] = re.sub("\r|\n","", item['focus'])

      ##line = json.dumps(dict(item)) + '\n'
      ##self.file.write(line)
      self.exporter.export_item(item)
    return item   
开发者ID:kunghunglu,项目名称:lmcrawler,代码行数:43,代码来源:pipelines.py

示例13: JsonExportPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):

    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:marcmilan,项目名称:urban-fiesta,代码行数:23,代码来源:pipelines.py

示例14: WikicrawlerPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class WikicrawlerPipeline(object):

    def __init__(self):
        self.item_file = open('items.json', 'wb')
        self.exporter = JsonItemExporter(self.item_file)

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)

    def spider_closed(self):
        self.exporter.finish_exporting()
        self.item_file.close()
开发者ID:jameskelleher,项目名称:wikicrawler,代码行数:24,代码来源:pipelines.py

示例15: SiteMapJsonExportPipeline

# 需要导入模块: from scrapy.exporters import JsonItemExporter [as 别名]
# 或者: from scrapy.exporters.JsonItemExporter import start_exporting [as 别名]
class SiteMapJsonExportPipeline(object):
	'''Process the SiteMap spider output Items, and write them as JSON to an output file. The output file is taken from the Spider's config (spider.config)'''

	@classmethod
	def from_crawler(cls, crawler):
		''' Boilerplate '''
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		self.file = open(spider.config['map_file'], 'wb')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
开发者ID:lores,项目名称:bi_spider,代码行数:25,代码来源:pipelines.py


注:本文中的scrapy.exporters.JsonItemExporter.start_exporting方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。