当前位置: 首页>>代码示例>>Python>>正文


Python JsonItemExporter.start_exporting方法代码示例

本文整理汇总了Python中scrapy.contrib.exporter.JsonItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter.start_exporting方法的具体用法?Python JsonItemExporter.start_exporting怎么用?Python JsonItemExporter.start_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.contrib.exporter.JsonItemExporter的用法示例。


在下文中一共展示了JsonItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ExportJSON

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class ExportJSON(object):

    """
    Exporting to export/json/spider-name.json file
    """

    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_to_save = self.files.pop(spider)
        file_to_save.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:AnkurDedania,项目名称:arachne,代码行数:33,代码来源:pipelines.py

示例2: MonitorPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class MonitorPipeline(object):

	def open_spider(self, spider):
		self.cols = spider.cols
		self.start_urls = spider.start_urls

		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def close_spider(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):

		try:
			index = self.start_urls.index( item['surl'] )
			groupId = index / self.cols
			r = index % self.cols
			if r == 0:
				item['main'] = 0
			elif r == 1:
				item['main'] = 1
			elif r == 2:
				item['main'] = 2
			item['gid'] = groupId
		except:
			index = -1

		self.exporter.export_item(item)
		return item		
开发者ID:xxoxx,项目名称:dc,代码行数:34,代码来源:pipelines.py

示例3: JsonExportPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
	def __init__(self):
		self.files = {}

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		#file = open('%_ixbt_jokes.json' % spider.name, 'w+b')
		file = open('ixbt_jokes.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
开发者ID:AndrewLvov,项目名称:ixbt_jokes,代码行数:28,代码来源:pipelines.py

示例4: AppsPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class AppsPipeline(object):

    def __init__(self, spider):
        self.file = open('{category}-{today}.json'.format(
                today = date.today().strftime('%d-%m-%Y'),
                category = spider.category), 'wb')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(spider = crawler.spider)

    def spider_opened(self, spider):
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        if spider.name == 'apps':
            self.exporter.export_item(item)
        return item
开发者ID:streeck,项目名称:playstore-crawler,代码行数:27,代码来源:pipelines.py

示例5: SaveNewItems

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class SaveNewItems(object):
    def __init__(self):
        self.files = []
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def process_item(self, item, spider):
        self.new_file_exporter.export_item(item)
        print "Save " + item["title"][0]
        return item

    def spider_opened(self, spider):
        self.new_item_file = open("storage\\%s_new_items.json" % spider.name, "w")
        self.new_file_exporter = JsonItemExporter(self.new_item_file)
        self.new_file_exporter.start_exporting()

    def spider_closed(self, spider):
        with open("storage\\%s_items.json" % spider.name, "w") as items_file:
            self.exporter = JsonItemExporter(items_file)
            self.exporter.start_exporting()
            for item in incomingData:
                self.exporter.export_item(item)
            self.exporter.finish_exporting()
            self.new_file_exporter.finish_exporting()
            items_file.close()
            self.new_item_file.close()
开发者ID:altus88,项目名称:crawler,代码行数:28,代码来源:pipelines.py

示例6: SpidercrawlerPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class SpidercrawlerPipeline(object):
	def __init__(self):
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)
		self.files = {}
		#file = open('ScrapedItems.json', 'w+b')
		self.exporter = JsonItemExporter(file)
	
	def spider_opened(self, spider):
		if(spider.name == 'timesnews'):
			file = open('TodaysToiScrapedItems.json', 'w+b')
		else :
			file = open('TodaysHtScrapedItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
	
	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()
		
	def process_item(self, item, spider):
		self.exporter.export_item(item)
	        return item
开发者ID:yogeshdixit41,项目名称:PyScrapyWebCrawler,代码行数:27,代码来源:pipelines.py

示例7: CrawlerPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class CrawlerPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:Hazardius,项目名称:FlatSearch,代码行数:28,代码来源:pipelines.py

示例8: JsonExportPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
    def __init__(self):
        log.msg('JsonExportPipeline.init....', level=log.INFO)
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        log.msg('JsonExportPipeline.from_crawler....', level=log.INFO)
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        log.msg('JsonExportPipeline.spider_opened....', level=log.INFO)
        file = open('%s.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        log.msg('JsonExportPipeline.spider_closed....', level=log.INFO)
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        log.msg('JsonExportPipeline.process_item....', level=log.INFO)
        self.exporter.export_item(item)
        return item
开发者ID:lovoror,项目名称:core-scrapy,代码行数:32,代码来源:pipelines.py

示例9: YxreviewPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class YxreviewPipeline(object):

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        self.file = open('items.json', 'wb')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.checkData(item, "title")
        self.checkData(item, "summary")
        self.checkData(item, "cover_image")
        self.checkData(item, "score")

        self.exporter.export_item(item)

        return item

    def checkData(self ,item, field):
        if len(item[field]) > 0:
            newText = item[field][0].encode("utf-8")
            item[field] = newText.strip()
        else:
            item[field] = ""
开发者ID:KinoAndWorld,项目名称:YouxiaReview,代码行数:36,代码来源:pipelines.py

示例10: CLPipe

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class CLPipe(object):
    """A pipeline for writing results to json"""
    def __init__(self, **kwargs):
        self.files = {}
        self.AppID = kwargs.get('AppID')
        self.ApiKey = kwargs.get('ApiKey')
        super(CLPipe, self).__init__(**kwargs)

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        #open a static/dynamic file to read and write to
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()
        #reopen new static/dynamic file to parse for sending
        new = open('%s_items.json' % spider.name)
        data = json.load(new)
        #reg = re.compile(r'[\n\r\t]')
        #for i in data:
        #    log.msg( i )
            #this is actually very bad to loop here
            #in one day I sent almost 500k requests.. thats bad
            #try sending one load and process on the other end. 


            #not sure if this is efficient, but it works
            #makes new api call for each loop
            #pushes single object for each call
        connection = httplib.HTTPSConnection('api.parse.com', 443)
        connection.connect()
        connection.request('POST', '/1/functions/scrapeSaver', json.dumps({
        #    #"email":data[i]["email"], "referer":data[i]["referer"], "scrapeID":data[i]["id"]
            "data":data
        }), {
            "X-Parse-Application-Id": self.AppID,
            "X-Parse-REST-API-Key": self.ApiKey,
            "Content-Type": "application/json"
        })
        result = json.loads(connection.getresponse().read())
        print "Sending load ", result
        #done with the new file, close it
        new.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:MrRyanAlexander,项目名称:madbot,代码行数:60,代码来源:pipelines.py

示例11: ExportJSON

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class ExportJSON(ExportData):
    """
    Exporting to export/json/spider-name.json file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()
开发者ID:Watermeloniscoding,项目名称:arachne,代码行数:11,代码来源:pipelines.py

示例12: JSONExportPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JSONExportPipeline(object):
    def __init__(self):
        self.file = open('items.json', 'w')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()
开发者ID:flyingsleeves,项目名称:watson_crawler,代码行数:15,代码来源:pipelines.py

示例13: JsonItemPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonItemPipeline(object):

	def open_spider(self, spider):
		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def close_spider(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item	
开发者ID:xxoxx,项目名称:dc,代码行数:16,代码来源:pipelines.py

示例14: DoubanSpiderPipeline

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class DoubanSpiderPipeline(object):
    def __init__(self):
        file = codecs.open('books2.json','w+b',encoding='utf-8')
        #file = open('books2.json','w+b')
        self.exporter = JsonItemExporter(file)
        self.exporter.encoding='utf-8'
        self.exporter.start_exporting()
        self.encoder = json.JSONEncoder(ensure_ascii=False)

    def spider_closed(self,spider):
        self.exporter.finish_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(self.encoder.encode(item))
        return item
开发者ID:Terrile,项目名称:oreilly_spider_2,代码行数:17,代码来源:pipelines.py

示例15: DoubanJsonWrite

# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class DoubanJsonWrite(object):
    def __init__(self):
        #dispatcher.connect(self.open_spider, signals.spider_opened)
        #dispatcher.connect(self.close_spider, signals.spider_closed)
        self.itemsfile = open('imtes.jl', 'w')

    def open_spider(self, spider):
        self.exporter = JsonItemExporter(self.itemsfile)
        self.exporter.start_exporting()
    def close_spider(self, spider):
        self.exporter.finish_exporting()


    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:szqh97,项目名称:test,代码行数:18,代码来源:pipelines.py


注:本文中的scrapy.contrib.exporter.JsonItemExporter.start_exporting方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。