当前位置: 首页>>代码示例>>Python>>正文


Python exporter.JsonItemExporter类代码示例

本文整理汇总了Python中scrapy.contrib.exporter.JsonItemExporter的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter类的具体用法?Python JsonItemExporter怎么用?Python JsonItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了JsonItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: SaveNewItems

class SaveNewItems(object):
    def __init__(self):
        self.files = []
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def process_item(self, item, spider):
        self.new_file_exporter.export_item(item)
        print "Save " + item["title"][0]
        return item

    def spider_opened(self, spider):
        self.new_item_file = open("storage\\%s_new_items.json" % spider.name, "w")
        self.new_file_exporter = JsonItemExporter(self.new_item_file)
        self.new_file_exporter.start_exporting()

    def spider_closed(self, spider):
        with open("storage\\%s_items.json" % spider.name, "w") as items_file:
            self.exporter = JsonItemExporter(items_file)
            self.exporter.start_exporting()
            for item in incomingData:
                self.exporter.export_item(item)
            self.exporter.finish_exporting()
            self.new_file_exporter.finish_exporting()
            items_file.close()
            self.new_item_file.close()
开发者ID:altus88,项目名称:crawler,代码行数:26,代码来源:pipelines.py

示例2: ExportJSON

class ExportJSON(ExportData):
    """
    Exporting to export/json/spider-name.json file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()
开发者ID:Watermeloniscoding,项目名称:arachne,代码行数:9,代码来源:pipelines.py

示例3: __init__

 def __init__(self):
     file = codecs.open('books2.json','w+b',encoding='utf-8')
     #file = open('books2.json','w+b')
     self.exporter = JsonItemExporter(file)
     self.exporter.encoding='utf-8'
     self.exporter.start_exporting()
     self.encoder = json.JSONEncoder(ensure_ascii=False)
开发者ID:Terrile,项目名称:oreilly_spider_2,代码行数:7,代码来源:pipelines.py

示例4: open_spider

	def open_spider(self, spider):
		self.cols = spider.cols
		self.start_urls = spider.start_urls

		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()
开发者ID:xxoxx,项目名称:dc,代码行数:7,代码来源:pipelines.py

示例5: spider_opened

	def spider_opened(self, spider):
		if(spider.name == 'timesnews'):
			file = open('TodaysToiScrapedItems.json', 'w+b')
		else :
			file = open('TodaysHtScrapedItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
开发者ID:yogeshdixit41,项目名称:PyScrapyWebCrawler,代码行数:8,代码来源:pipelines.py

示例6: spider_closed

 def spider_closed(self, spider):
     with open("storage\\%s_items.json" % spider.name, "w") as items_file:
         self.exporter = JsonItemExporter(items_file)
         self.exporter.start_exporting()
         for item in incomingData:
             self.exporter.export_item(item)
         self.exporter.finish_exporting()
         self.new_file_exporter.finish_exporting()
         items_file.close()
         self.new_item_file.close()
开发者ID:altus88,项目名称:crawler,代码行数:10,代码来源:pipelines.py

示例7: engine_started

 def engine_started(self):
     self.json_file = open("result.json", "w")
     self.json_exporter = JsonItemExporter(
         self.json_file,
         fields_to_export=self.fields_to_export[self.spider._crawler.settings["CommandLineParameter"][0]],
     )
     self.json_exporter.start_exporting()
     log.msg(
         message="ManningPipeline, engine_started, mode=%s"
         % self.spider._crawler.settings["CommandLineParameter"][0]
     )
开发者ID:smart--petea,项目名称:scrapy_manning,代码行数:11,代码来源:pipelines.py

示例8: process_item

 def process_item(self, item, spider):
     if self.first_item:
         self.first_item = False
         file = open('%s_items.json' % spider.name, 'wb')
         # scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文?
         # http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence
         # 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。
         # 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。
         self.exporter = JsonItemExporter(file, ensure_ascii=False)
         self.exporter.start_exporting()
     self.exporter.export_item(item)
     return item
开发者ID:ejoful,项目名称:scrapy_example,代码行数:12,代码来源:pipelines.py

示例9: AppsPipeline

class AppsPipeline(object):

    def __init__(self, spider):
        self.file = open('{category}-{today}.json'.format(
                today = date.today().strftime('%d-%m-%Y'),
                category = spider.category), 'wb')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(spider = crawler.spider)

    def spider_opened(self, spider):
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        if spider.name == 'apps':
            self.exporter.export_item(item)
        return item
开发者ID:streeck,项目名称:playstore-crawler,代码行数:25,代码来源:pipelines.py

示例10: MonitorPipeline

class MonitorPipeline(object):

	def open_spider(self, spider):
		self.cols = spider.cols
		self.start_urls = spider.start_urls

		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def close_spider(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):

		try:
			index = self.start_urls.index( item['surl'] )
			groupId = index / self.cols
			r = index % self.cols
			if r == 0:
				item['main'] = 0
			elif r == 1:
				item['main'] = 1
			elif r == 2:
				item['main'] = 2
			item['gid'] = groupId
		except:
			index = -1

		self.exporter.export_item(item)
		return item		
开发者ID:xxoxx,项目名称:dc,代码行数:32,代码来源:pipelines.py

示例11: JsonExportPipeline

class JsonExportPipeline(object):
	def __init__(self):
		self.files = {}

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		#file = open('%_ixbt_jokes.json' % spider.name, 'w+b')
		file = open('ixbt_jokes.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
开发者ID:AndrewLvov,项目名称:ixbt_jokes,代码行数:26,代码来源:pipelines.py

示例12: YxreviewPipeline

class YxreviewPipeline(object):

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        self.file = open('items.json', 'wb')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.checkData(item, "title")
        self.checkData(item, "summary")
        self.checkData(item, "cover_image")
        self.checkData(item, "score")

        self.exporter.export_item(item)

        return item

    def checkData(self ,item, field):
        if len(item[field]) > 0:
            newText = item[field][0].encode("utf-8")
            item[field] = newText.strip()
        else:
            item[field] = ""
开发者ID:KinoAndWorld,项目名称:YouxiaReview,代码行数:34,代码来源:pipelines.py

示例13: JsonExportPipeline

class JsonExportPipeline(object):
    def __init__(self):
        log.msg('JsonExportPipeline.init....', level=log.INFO)
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        log.msg('JsonExportPipeline.from_crawler....', level=log.INFO)
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        log.msg('JsonExportPipeline.spider_opened....', level=log.INFO)
        file = open('%s.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        log.msg('JsonExportPipeline.spider_closed....', level=log.INFO)
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        log.msg('JsonExportPipeline.process_item....', level=log.INFO)
        self.exporter.export_item(item)
        return item
开发者ID:lovoror,项目名称:core-scrapy,代码行数:30,代码来源:pipelines.py

示例14: SpidercrawlerPipeline

class SpidercrawlerPipeline(object):
	def __init__(self):
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)
		self.files = {}
		#file = open('ScrapedItems.json', 'w+b')
		self.exporter = JsonItemExporter(file)
	
	def spider_opened(self, spider):
		if(spider.name == 'timesnews'):
			file = open('TodaysToiScrapedItems.json', 'w+b')
		else :
			file = open('TodaysHtScrapedItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
	
	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()
		
	def process_item(self, item, spider):
		self.exporter.export_item(item)
	        return item
开发者ID:yogeshdixit41,项目名称:PyScrapyWebCrawler,代码行数:25,代码来源:pipelines.py

示例15: ExportJSON

class ExportJSON(object):

    """
    Exporting to export/json/spider-name.json file
    """

    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_to_save = self.files.pop(spider)
        file_to_save.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:AnkurDedania,项目名称:arachne,代码行数:31,代码来源:pipelines.py


注:本文中的scrapy.contrib.exporter.JsonItemExporter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。