當前位置: 首頁>>代碼示例>>Python>>正文


Python JsonLinesItemExporter.finish_exporting方法代碼示例

本文整理匯總了Python中scrapy.contrib.exporter.JsonLinesItemExporter.finish_exporting方法的典型用法代碼示例。如果您正苦於以下問題:Python JsonLinesItemExporter.finish_exporting方法的具體用法?Python JsonLinesItemExporter.finish_exporting怎麽用?Python JsonLinesItemExporter.finish_exporting使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.contrib.exporter.JsonLinesItemExporter的用法示例。


在下文中一共展示了JsonLinesItemExporter.finish_exporting方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: JsonLinesExportPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonLinesExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        out_file = open('%s_pics.json' % spider.name, 'a')
        self.files[spider] = out_file
        self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        out_file = self.files.pop(spider)
        out_file.close()

    def process_item(self, item, spider):
        if item.get("image_urls"):
            self.exporter.export_item(item)
        return item
開發者ID:tenggyut,項目名稱:PicScrapy,代碼行數:28,代碼來源:pipelines.py

示例2: NordstromPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class NordstromPipeline(object):

  def __init__(self):
    self.files = {}
    self.ids_seen = set()

  @classmethod
  def from_crawler(cls, crawler):
    pipeline = cls()
    crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
    crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
    return pipeline

  def process_item(self, item, spider):
    if item['product_item_num'] in self.ids_seen:
      raise DropItem("Duplicate item found: %s" % item)
    else:
      self.ids_seen.add(item['product_item_num'])
      self.exporter.export_item(item)
      return item

  def spider_opened(self, spider):
    out_file = open('%s_products.jl' % spider.name, 'w+b')
    self.files[spider] = out_file
    self.exporter = JsonLinesItemExporter(out_file)
    self.exporter.start_exporting()

  def spider_closed(self, spider):
    self.exporter.finish_exporting()
    out_file = self.files.pop(spider)
    out_file.close()
開發者ID:vazeer,項目名稱:Aisle_Automation,代碼行數:33,代碼來源:pipelines.py

示例3: TibiaPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class TibiaPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:mic-kul,項目名稱:tibia-scrapy,代碼行數:27,代碼來源:pipelines.py

示例4: ValidatorPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class ValidatorPipeline(object):
    """ Exports items in a temporary JSON file.
        Unnecessary fields are excluded. """

    def __init__(self):
        self.exporter = None
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        fname = open(_get_spider_output_filename(spider), 'wb')
        self.files[spider] = fname
        self.exporter = JsonLinesItemExporter(fname)
        self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        f = self.files.pop(spider)
        f.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:realchief,項目名稱:Python3-Scrapy-for-multiple-Ecommerce-sites,代碼行數:32,代碼來源:validation.py

示例5: ScrippaPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class ScrippaPipeline(object):
    
    def __init__(self):
        #self.files = {}
        #self.log("MMMMMMMMMMMMMMMMMMMMMMMMMMAAAAAAAAAAAATE", level=log.WARNING)
        print "DDDDDDDDDDDDDDDDDDDDDDDDDUUUUUUUUUUUUUUUUUUUUUUUUUUUDE"
        #file = open('1_reports.json', 'w+b')
        
    
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('2_reports.json', 'w+b')
        #self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        #file = self.files.pop(spider)
        #file.close()

    def process_item(self, item, spider):
        print "ScrippaPipeline: exporting item ============================== "
        self.exporter.export_item(item)
        return item
開發者ID:danryu,項目名稱:bbripper,代碼行數:33,代碼來源:pipelines.py

示例6: PajandanPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class PajandanPipeline(object):
    def __init__(self):
        self.files = {} # may be more than one spider

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        # write utf-8 file
        f = codecs.open('articles.json', 'w+', encoding='utf-8')
        self.files[spider] = f
        self.exporter = JsonLinesItemExporter(f, ensure_ascii=False)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        f = self.files.pop(spider)
        f.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:gavinhub,項目名稱:Jandan.EPUB,代碼行數:28,代碼來源:pipelines.py

示例7: PlayerPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class PlayerPipeline(object):
    def __init__(self, *args, **kwargs):
        self.player_info_file = None
        self.player_info_exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.player_info_file = open("%s/output/player_info.json" % settings.PROJECT_ROOT, 'wb')
        self.player_info_exporter = JsonLinesItemExporter(self.player_info_file)
        self.player_info_exporter.start_exporting()

    def spider_closed(self, spider):
        self.player_info_exporter.finish_exporting()
        self.player_info_file.close()

    def process_item(self, item, spider):
        if isinstance(item, PlayerInfoItem):
            self.player_info_exporter.export_item(item)
        return item
開發者ID:ayc92,項目名稱:nba-crawler,代碼行數:27,代碼來源:pipelines.py

示例8: JsonExportPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonExportPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_Joke.txt' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:zhilinwang,項目名稱:scrapy-crawler,代碼行數:28,代碼來源:pipelines.py

示例9: JsonExportPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        json_path = os.path.join('data', '%s.json' % spider.name)
        file = open(json_path, 'w+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        log.msg("process_item", level=log.DEBUG)
        return item
開發者ID:irgmedeiros,項目名稱:folhainvest,代碼行數:29,代碼來源:pipelines.py

示例10: AdbPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class AdbPipeline(object):
    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}
        self.seen = set([])

    def spider_opened(self, spider):
        file = open('%s/%s/%s.json'% (settings.DATA_DIR,
                                      spider.name,
                                      datetime.date.today().isoformat()),
                    'w+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        if self.seen_before(item):
            raise DropItem
        self.exporter.export_item(item)
        return item

    def seen_before(self, item):
        if item['product'] in self.seen:
            return True
        else:
            self.seen.add(item['product'])
            return False
開發者ID:laprice,項目名稱:flask_api_demo,代碼行數:35,代碼來源:pipelines.py

示例11: TffdatapullPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class TffdatapullPipeline(object):
    def __init__(self):
        self.fields_to_export = [
            'match_code',
            'match_href',
            'home_team_id',
            'home_team_name',
            'score',
            'guest_team_id',
			'guest_team_name',
			'date_of_match',
			'time_of_match',
			'stadium_name',
			'organization_name'
        ]
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
 
    def spider_opened(self, spider):
        self.csv_exporter = CsvItemExporter(open(spider.name+".csv", "w"),
                                            fields_to_export=self.fields_to_export,
											quoting=csv.QUOTE_ALL)
        self.json_exporter = TffdatapullJsonItemExporter(open(spider.name+".json", "w"),
                                                      fields_to_export=self.fields_to_export,
                                                      sort_keys=True, indent=4)
        self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"),
                                                        fields_to_export=self.fields_to_export)
 
        self.xml_exporter = TffdatapullXmlItemExporter(open(spider.name+".xml", "w"),
                                                    fields_to_export=self.fields_to_export,
                                                    root_element="match_code", item_element="match_code")
        # Make a quick copy of the list
        self.csv_exporter.start_exporting()
        self.json_exporter.start_exporting()
        self.jsonlines_exporter.start_exporting()
        self.xml_exporter.start_exporting()
 
    def process_item(self, item, spider):
        self.csv_exporter.export_item(item)
        self.json_exporter.export_item(item)
        self.jsonlines_exporter.export_item(item)
        self.xml_exporter.export_item(item)
        return item
 
    def spider_closed(self, spider):
        self.csv_exporter.finish_exporting()
        self.json_exporter.finish_exporting()
        self.jsonlines_exporter.finish_exporting()
        self.xml_exporter.finish_exporting()
開發者ID:osmant,項目名稱:TFFDataPull,代碼行數:51,代碼來源:pipelines.py

示例12: JsonWriterPipeline2

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonWriterPipeline2(object):
  def _init_(self):
    self.fields_to_export = [
      'title',
      'link'
    ]
    dispatcher.connect(self.spider_opened, signals.spider_opened)
    dispatcher.connect(self.spider_closed, signals.spider_closed)
  
  def spider_opened(self,spider):
    self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"), fields_to_export=self.fields_to_export)
    self.jsonlines_exporter.start_exporting()

  def process_item(self,item,spider):
    self.jsonlines_exporter.export_item(item)
    return item
  def spider_closed(self, spider):
    self.jsonlines_exporter.finish_exporting()
開發者ID:nsprams,項目名稱:webscrap,代碼行數:20,代碼來源:pipelines.py

示例13: JsonExportExternalIdPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonExportExternalIdPipeline(object):
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.file = open('scraped/%s.json' % spider.external_id, 'w')
        self.exporter = JsonLinesItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
開發者ID:netconstructor,項目名稱:gatherer,代碼行數:21,代碼來源:pipelines.py

示例14: MspPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class MspPipeline(object):
    def __init__(self):
        self.files = {}
        self.ids_seen = set()
    
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
    
    def spider_opened(self, spider):
        if not os.path.exists('./json/'):
            os.makedirs('./json/')
        if isinstance(spider, MSPCrawler):
            MSPFile = open('json/msps.json', 'w+b')
            self.files['msps'] = MSPFile
            self.MSPExporter = JsonLinesItemExporter(MSPFile)
            self.MSPExporter.start_exporting()
        elif isinstance(spider, VoteCrawler):
            VoteFile = open('json/votes-' + spider.mspid + '.json', 'w+b')
            self.files['votes'] = VoteFile
            self.VoteExporter = JsonLinesItemExporter(VoteFile)
            self.VoteExporter.start_exporting()
        
    def spider_closed(self, spider):
        if isinstance(spider, VoteCrawler):
            self.VoteExporter.finish_exporting()
        elif isinstance(spider, MSPCrawler):
            self.MSPExporter.finish_exporting()
        for file in self.files.values():
            file.close()
    
    def process_item(self, item, spider):
        if isinstance(item, MSPItem):
            self.MSPExporter.export_item(item)
        elif isinstance(item, VoteItem):
            self.VoteExporter.export_item(item)
        return item
開發者ID:DCBoland,項目名稱:MSPCrawler,代碼行數:42,代碼來源:pipelines.py

示例15: JsonLinesExportPipeline

# 需要導入模塊: from scrapy.contrib.exporter import JsonLinesItemExporter [as 別名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 別名]
class JsonLinesExportPipeline(object):

    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}
        self.first_item = True

    def spider_opened(self, spider):
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonLinesItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
開發者ID:TanvirMahmudEmon,項目名稱:scrapage,代碼行數:24,代碼來源:pipelines.py


注:本文中的scrapy.contrib.exporter.JsonLinesItemExporter.finish_exporting方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。