当前位置: 首页>>代码示例>>Python>>正文


Python CsvItemExporter.export_item方法代码示例

本文整理汇总了Python中scrapy.exporters.CsvItemExporter.export_item方法的典型用法代码示例。如果您正苦于以下问题:Python CsvItemExporter.export_item方法的具体用法?Python CsvItemExporter.export_item怎么用?Python CsvItemExporter.export_item使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.exporters.CsvItemExporter的用法示例。


在下文中一共展示了CsvItemExporter.export_item方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: JsonExportPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class JsonExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
#         file = open('%s_data.xml' % spider.name, 'w+b')
        import os
        filePath = os.path.dirname(__file__)
        outputDir = filePath +'/output/'
        file = open(outputDir + '%s_data.csv' % spider.name, 'w+b')
        self.files[spider] = file
#         self.exporter = JsonItemExporter(file)
        self.exporter = CsvItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:uuhako,项目名称:myScrapy,代码行数:32,代码来源:pipelines.py

示例2: CsvExportPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CsvExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_societies.csv' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.fields_to_export = ['name', 'president', 'email', 'url', 'facebook', 'membership', 'about',
                                          'date_established']
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:2khc,项目名称:Python-Projects,代码行数:29,代码来源:pipelines.py

示例3: CSVExportPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CSVExportPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s' % spider.nameOfFile, 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)

        self.exporter.fields_to_export = ['originalString', 'translatedString']

        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:dianjuar,项目名称:WordPress_Translation_Hack,代码行数:31,代码来源:pipelines.py

示例4: WebcrawlerPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class WebcrawlerPipeline(object):
    def __init__ (self):
        self.files = {}
        pass
    
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
    
    def spider_opened(self, spider):
        file = open("%s_urls.txt" % (spider.name), "w+b")
        self.files[spider] = file
        self.exporter = CsvItemExporter(file, include_headers_line=False)
        self.exporter.start_exporting()
        pass
    
    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()
        pass
    
    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
    pass
开发者ID:garrettmiller,项目名称:fingerburner,代码行数:31,代码来源:pipelines.py

示例5: CsvExportPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CsvExportPipeline(object):

    def __init__(self):

        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):

        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)

        return pipeline

    def spider_opened(self, spider):

        file = open('vagas.csv', 'wb')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):

        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):

        self.exporter.export_item(item)

        return item
开发者ID:abevieiramota,项目名称:learning-scrapy,代码行数:35,代码来源:pipelines.py

示例6: CSVWriterPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CSVWriterPipeline(object):
    
    def __init__(self,filename):
        self.filename = filename
        
    @classmethod
    def from_crawler(cls, crawler):
        settings = crawler.settings
        filename = settings.get('OUTPUT_FILE')
        pipeline = cls(filename)
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.file = open(self.filename, 'w+b')
        self.exporter = CsvItemExporter(self.file,include_headers_line=True)
        self.exporter.encoding='utf-8'
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:mm-manu,项目名称:scrapy_web_scraper,代码行数:29,代码来源:pipelines.py

示例7: CSVPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CSVPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_items.csv' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file,delimiter='\t')
        self.exporter.fields_to_export = ['userId','bookId','name','rating','relativeRating','booklistNum']
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:yujiaxinlong,项目名称:Crawlers,代码行数:29,代码来源:pipelines.py

示例8: catalogscraperPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class catalogscraperPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open("%s_items.csv" % spider.name, "w+b")
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.fields_to_export = ["title"]
        #'subject', 'description', 'creator', 'source', 'published', 'rights', 'citation', 'url']
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:Mesil,项目名称:Catalog-Record-Scraper,代码行数:29,代码来源:pipelines.py

示例9: CsvExportPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CsvExportPipeline(object):
    """
    app.pipelines.exporter_csv.CsvExportPipeline
    """
    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_csv = open('%s_items.csv' % spider.name, 'w+b')
        self.files[spider] = file_csv
        self.exporter = CsvItemExporter(file_csv)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_csv = self.files.pop(spider)
        file_csv.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:zhanghe06,项目名称:scrapy_project,代码行数:31,代码来源:exporter_csv.py

示例10: DumpToFile

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class DumpToFile(object):
    """
    Dump harvested data into flat file, no other logic is implemented here
    (it's "Dump" :-)
    """
    def __init__(self):
        self.files = {}
        self.counter = 0

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        # TODO: verify if still needed for registration of spider_closed/opened event?
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        filename = spider.get_dump_filepath()
        f = open(filename, 'w')
        self.files[spider.name] = f
        # by default csv module uses Windows-style line terminators (\r\n)
        self.exporter = CsvItemExporter(f, include_headers_line=True, delimiter='|', lineterminator='\n')
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        f = self.files.pop(spider.name)
        f.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        # for counter, could set att in spider at closing
        self.counter += 1
        return item
开发者ID:mart2010,项目名称:brd,代码行数:37,代码来源:pipelines.py

示例11: CSVPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class CSVPipeline(object):

  def __init__(self):
    self.files = {}

  @classmethod
  def from_crawler(cls, crawler):
    pipeline = cls()
    crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
    crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
    return pipeline

  def spider_opened(self, spider):
    file = open('%s_items.csv' % spider.name, 'w+b')
    self.files[spider] = file
    self.exporter = CsvItemExporter(file)
    self.exporter.fields_to_export = ["filename", "titel", "publicatie", "dossiernummer", "organisatie", "publicatiedatum", "publicatietype", "file_urls"]
    self.exporter.start_exporting()

  def spider_closed(self, spider):
    self.exporter.finish_exporting()
    file = self.files.pop(spider)
    file.close()

  def process_item(self, item, spider):
    self.exporter.export_item(item)
    return item
开发者ID:eadebruijn,项目名称:Webscraping,代码行数:29,代码来源:pipelines.py

示例12: assertExportResult

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
 def assertExportResult(self, item, expected, **kwargs):
     fp = BytesIO()
     ie = CsvItemExporter(fp, **kwargs)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertCsvEqual(fp.getvalue(), expected)
开发者ID:Rokicto,项目名称:scrapy,代码行数:9,代码来源:test_exporters.py

示例13: AmazonCsvPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class AmazonCsvPipeline(object):
    def open_spider(self, spider):
        # 保存csv数据的文件对象
        self.f = open("Amazon_goods_crawl.csv", "w")
        # 创建csv文件读写对象
        self.csv_exporter = CsvItemExporter(self.f)
        # 开始进行csv文件读写
        self.csv_exporter.start_exporting()
        # 根据商品标题进行去重处理
        self.add_title = set()

    def process_item(self, item, spider):
        if item['title'] in self.add_title:
            print u'[EEROR] 数据已保存,勿重复%s'% item['title']
        else:
            self.add_title.add(item['title'])
            # 每次写入一个item数据
            # print u'[INFO] 正在写入csv文件中%s'% item['title']
            self.csv_exporter.export_item(item)
        return item

    def close_spider(self, spider):
        # 结束csv文件读写
        # print u'[INFO] 写入csv文件已完成'
        self.csv_exporter.finish_exporting()
        # 关闭文件
        self.f.close()
开发者ID:Joker-Cch,项目名称:Amazon,代码行数:29,代码来源:pipelines.py

示例14: test_header_export_two_items

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
 def test_header_export_two_items(self):
     for item in [self.i, dict(self.i)]:
         output = BytesIO()
         ie = CsvItemExporter(output)
         ie.start_exporting()
         ie.export_item(item)
         ie.export_item(item)
         ie.finish_exporting()
         self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
开发者ID:247DigitalGroup,项目名称:scrapy,代码行数:11,代码来源:test_exporters.py

示例15: FashionnovaPipeline

# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import export_item [as 别名]
class FashionnovaPipeline(object):
    def __init__(self):
        self.filename = 'fashionnova.csv'
    def open_spider(self, spider):
        self.csvfile = open(self.filename, 'wb')
        self.exporter = CsvItemExporter(self.csvfile)
        self.exporter.start_exporting()
    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.csvfile.close()
    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:kellyho15,项目名称:capstone,代码行数:15,代码来源:pipelines.py


注:本文中的scrapy.exporters.CsvItemExporter.export_item方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。