當前位置: 首頁>>代碼示例>>Python>>正文


Python exporter.JsonLinesItemExporter類代碼示例

本文整理匯總了Python中scrapy.contrib.exporter.JsonLinesItemExporter的典型用法代碼示例。如果您正苦於以下問題:Python JsonLinesItemExporter類的具體用法?Python JsonLinesItemExporter怎麽用?Python JsonLinesItemExporter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了JsonLinesItemExporter類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: NordstromPipeline

class NordstromPipeline(object):

  def __init__(self):
    self.files = {}
    self.ids_seen = set()

  @classmethod
  def from_crawler(cls, crawler):
    pipeline = cls()
    crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
    crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
    return pipeline

  def process_item(self, item, spider):
    if item['product_item_num'] in self.ids_seen:
      raise DropItem("Duplicate item found: %s" % item)
    else:
      self.ids_seen.add(item['product_item_num'])
      self.exporter.export_item(item)
      return item

  def spider_opened(self, spider):
    out_file = open('%s_products.jl' % spider.name, 'w+b')
    self.files[spider] = out_file
    self.exporter = JsonLinesItemExporter(out_file)
    self.exporter.start_exporting()

  def spider_closed(self, spider):
    self.exporter.finish_exporting()
    out_file = self.files.pop(spider)
    out_file.close()
開發者ID:vazeer,項目名稱:Aisle_Automation,代碼行數:31,代碼來源:pipelines.py

示例2: FeedWriterPipeline

class FeedWriterPipeline(object):
    def __init__(self):
        log.msg('FeedWriterPipeline.__init__()')
        self.file = None
        self.item_exporter = None
        self.count = 0

    def open_spider(self, spider):
        if FeedSpider.is_feed_op(spider):
            spider.make_sure_path_exists(spider.get_output_dir_path())
            file_name = spider.get_feed_output_file_path()
            self.file = open(file_name, 'a')
            self.item_exporter = JsonLinesItemExporter(self.file)
            log.msg('FeedWriterPipeline, opened file %s to append.' % file_name)

    def process_item(self, item, spider):
        if FeedSpider.is_feed_op(spider) and isinstance(item, FeedItem):
            self.item_exporter.export_item(item)
            self.count += 1
            spider.check_max_limit(self.count)
            raise DropItem('Save item success')
        else:
            return item

    def close_spider(self, spider):
        if FeedSpider.is_feed_op(spider):
            self.file.write('Parsed %i feed items.%s' % (self.count, os.linesep))
            self.file.close()
            log.msg('closed file, appended %i items.' % self.count)
開發者ID:jarvisji,項目名稱:ScrapyCrawler,代碼行數:29,代碼來源:pipelines.py

示例3: PerispiderPipeline

class PerispiderPipeline(object):
	def open_spider(self, spider):
		name = "%s.json" % spider.name
		self.file = open(name, 'w')
		self.exporter = JsonLinesItemExporter(self.file)
		self.exporter.start_exporting()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
開發者ID:lonnys,項目名稱:perispider,代碼行數:10,代碼來源:pipelines.py

示例4: get_exporter

 def get_exporter(self, item):
     exporter = None
     if item.__class__ in self.exporters:
         exporter = self.exporters[item.__class__]
     else:
         if item.__class__ == items.unused_genotype_data:
             exporter = JsonLinesItemExporter(open(_class_to_file(item.__class__), 'w+b'))
         else:
             exporter = CsvItemExporter(open(_class_to_file(item.__class__), 'w+b'))
         self.exporters[item.__class__] = exporter
         exporter.start_exporting()
     return exporter
開發者ID:innovativemedicine,項目名稱:haplorec,代碼行數:12,代碼來源:pipelines.py

示例5: JsonLinesItemPipeline

class JsonLinesItemPipeline(object):

	def open_spider(self, spider):
		self.file = open('test.json', 'w+b')
		self.exporter = JsonLinesItemExporter(self.file)

	def close_spider(self, spider):
		self.file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
開發者ID:xxoxx,項目名稱:dc,代碼行數:12,代碼來源:pipelines.py

示例6: spider_opened

 def spider_opened(self, spider):
     if not os.path.exists('./json/'):
         os.makedirs('./json/')
     if isinstance(spider, MSPCrawler):
         MSPFile = open('json/msps.json', 'w+b')
         self.files['msps'] = MSPFile
         self.MSPExporter = JsonLinesItemExporter(MSPFile)
         self.MSPExporter.start_exporting()
     elif isinstance(spider, VoteCrawler):
         VoteFile = open('json/votes-' + spider.mspid + '.json', 'w+b')
         self.files['votes'] = VoteFile
         self.VoteExporter = JsonLinesItemExporter(VoteFile)
         self.VoteExporter.start_exporting()
開發者ID:DCBoland,項目名稱:MSPCrawler,代碼行數:13,代碼來源:pipelines.py

示例7: MoviesPipeline

class MoviesPipeline(object):
    
    def __init__(self):
        self.field_to_export = []
        self.file = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        # signals start of export

        print "Spider opened...\nPreparing to crawl..."

        self.json_exporter = JsonLinesItemExporter(open('movies.json', 'wb'))
        self.json_exporter.start_exporting()

        # Since the charts frequently change, we need to deal with differences
        # in the cached data and current data. 
        # For now, we'll just truncate the table when the spider opens
        # and dump everything in.

        cursor = connection.cursor()

        sql = 'truncate table %s' % MYSQL_TABLE

        try:
            cursor.execute(sql)
            connection.commit()
            print "*** Truncated %s Table ***" % MYSQL_TABLE
        except:
            print "Error %d %s" % (e.args[0], e.args[1])
            connection.rollback()

    def process_item(self, item, spider):
        # store the item in the database
        insert_database(item)

        # Write to JSON file
        self.json_exporter.export_item(item)

        return item

    def spider_closed(self, spider):
        # signal end of export
        self.json_exporter = finish_exporting()
開發者ID:athimmig,項目名稱:business_intelligence,代碼行數:50,代碼來源:pipelines.py

示例8: FmlPipeline

class FmlPipeline(object):
	'''
	def __init__(self):
		self.file = open('data2.json', 'w')
		self.exporter = JsonLinesItemExporter(self.file)
		self.exporter.start_exporting()
	'''
	def open_spider(self, spider):
		name = "%s.json" % spider.name
		self.file = open(name, 'w')
		self.exporter = JsonLinesItemExporter(self.file)
		self.exporter.start_exporting()
	
	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
開發者ID:notsobad,項目名稱:joke-spider,代碼行數:16,代碼來源:pipelines.py

示例9: open_spider

 def open_spider(self, spider):
     if FeedSpider.is_feed_op(spider):
         spider.make_sure_path_exists(spider.get_output_dir_path())
         file_name = spider.get_feed_output_file_path()
         self.file = open(file_name, 'a')
         self.item_exporter = JsonLinesItemExporter(self.file)
         log.msg('FeedWriterPipeline, opened file %s to append.' % file_name)
開發者ID:jarvisji,項目名稱:ScrapyCrawler,代碼行數:7,代碼來源:pipelines.py

示例10: spider_opened

 def spider_opened(self, spider):
     file = open('%s/%s/%s.json'% (settings.DATA_DIR,
                                   spider.name,
                                   datetime.date.today().isoformat()),
                 'w+b')
     self.files[spider] = file
     self.exporter = JsonLinesItemExporter(file)
     self.exporter.start_exporting()
開發者ID:laprice,項目名稱:flask_api_demo,代碼行數:8,代碼來源:pipelines.py

示例11: process_spider_output

 def process_spider_output(self, response, result, spider):
     items = []
     for r in result:
         if isinstance(r, Item):
             items.append(r)
         yield r
     cca = response2cca(response, base64=True)
     cca['features'] = {'items': items}
     cca_item = self.create_item(cca)
     cca_path = self.get_cca_path(spider)
     if cca_path is None:
         yield cca_item
     else:
         exporter = self.exporters_by_path.get(cca_path)
         if exporter is None:
             exporter = JsonLinesItemExporter(open(cca_path, 'a+'))
             self.exporters_by_path[cca_path] = exporter
         exporter.export_item(cca_item)
開發者ID:dracone,項目名稱:memex-scrapy-utils,代碼行數:18,代碼來源:cca.py

示例12: process_item

    def process_item(self, item, spider):
        """
        Writes the item to output
        """

        # create the output file for a new class of item per spider
        settings = spider.crawler.settings
        if item.__class__ not in self.xporters[spider.name]:
            filename = '%s.json' % item.export_filename
            dirpath = path.join(settings.get('IO_PATH', 'io'), settings['DATA_SET'])
            _mkdir_p(dirpath)
            file_h = open(path.join(dirpath, filename), 'w')
            xporter = JsonLinesItemExporter(file=file_h)
            xporter.start_exporting()
            self.xporters[spider.name][item.__class__] = (file_h, xporter)

        xporter = self.xporters[spider.name][item.__class__][1]
        xporter.export_item(item)
        return item
開發者ID:YukiShan,項目名稱:amazon-review-spam,代碼行數:19,代碼來源:pipelines.py

示例13: JsonLinesExportPipeline

class JsonLinesExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        out_file = open('%s_pics.json' % spider.name, 'a')
        self.files[spider] = out_file
        self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        out_file = self.files.pop(spider)
        out_file.close()

    def process_item(self, item, spider):
        if item.get("image_urls"):
            self.exporter.export_item(item)
        return item
開發者ID:tenggyut,項目名稱:PicScrapy,代碼行數:26,代碼來源:pipelines.py

示例14: TibiaPipeline

class TibiaPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:mic-kul,項目名稱:tibia-scrapy,代碼行數:25,代碼來源:pipelines.py

示例15: ValidatorPipeline

class ValidatorPipeline(object):
    """ Exports items in a temporary JSON file.
        Unnecessary fields are excluded. """

    def __init__(self):
        self.exporter = None
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        fname = open(_get_spider_output_filename(spider), 'wb')
        self.files[spider] = fname
        self.exporter = JsonLinesItemExporter(fname)
        self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        f = self.files.pop(spider)
        f.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:realchief,項目名稱:Python3-Scrapy-for-multiple-Ecommerce-sites,代碼行數:30,代碼來源:validation.py


注:本文中的scrapy.contrib.exporter.JsonLinesItemExporter類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。