本文整理汇总了Python中scrapy.exporters.CsvItemExporter类的典型用法代码示例。如果您正苦于以下问题:Python CsvItemExporter类的具体用法?Python CsvItemExporter怎么用?Python CsvItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CsvItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: spider_opened
def spider_opened(self, spider):
self.results_csv = open('results_3.csv', 'wb')
self.missing_csv = open('results_miss_2.csv', 'wb')
self.results_exporter = CsvItemExporter(self.results_csv)
self.missing_exporter = CsvItemExporter(self.missing_csv)
self.results_exporter.start_exporting()
self.missing_exporter.start_exporting()
示例2: FacupPipeline
class FacupPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
#create files and instantiate exporter class
#then run start_exporting() method, this is required for item exporter class
def spider_opened(self, spider):
self.results_csv = open('results_3.csv', 'wb')
self.missing_csv = open('results_miss_2.csv', 'wb')
self.results_exporter = CsvItemExporter(self.results_csv)
self.missing_exporter = CsvItemExporter(self.missing_csv)
self.results_exporter.start_exporting()
self.missing_exporter.start_exporting()
def process_item(self, item, spider):
self.results_exporter = CsvItemExporter(self.results_csv)
self.missing_exporter = CsvItemExporter(self.missing_csv)
return item
def spider_closed(self, spider):
self.results_exporter.finish_exporting()
self.missing_exporter.finish_exporting()
self.results_csv.close()
self.missing_csv.close()
示例3: CsvExportPipeline
class CsvExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_societies.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['name', 'president', 'email', 'url', 'facebook', 'membership', 'about',
'date_established']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: JsonExportPipeline
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# file = open('%s_data.xml' % spider.name, 'w+b')
import os
filePath = os.path.dirname(__file__)
outputDir = filePath +'/output/'
file = open(outputDir + '%s_data.csv' % spider.name, 'w+b')
self.files[spider] = file
# self.exporter = JsonItemExporter(file)
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例5: spider_opened
def spider_opened(self, spider):
file = open("%s_items.csv" % spider.name, "w+b")
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ["title"]
#'subject', 'description', 'creator', 'source', 'published', 'rights', 'citation', 'url']
self.exporter.start_exporting()
示例6: spider_opened
def spider_opened(self, spider):
filename = spider.get_dump_filepath()
f = open(filename, 'w')
self.files[spider.name] = f
# by default csv module uses Windows-style line terminators (\r\n)
self.exporter = CsvItemExporter(f, include_headers_line=True, delimiter='|', lineterminator='\n')
self.exporter.start_exporting()
示例7: open_spider
def open_spider(self, spider):
# 保存csv数据的文件对象
self.f = open('aqi.csv', 'w')
# 创建csv文件读写对象
self.csv_exporter = CsvItemExporter(self.f)
# 开始进行csv文件读写
self.csv_exporter.start_exporting()
示例8: spider_opened
def spider_opened(self, spider):
if spider.name in 'realestate':
self.file = open('current_listing.csv', 'w+b')
else:
self.file = open('past_listing.csv', 'w+b')
self.exporter = CsvItemExporter(self.file)
self.exporter.start_exporting()
示例9: spider_opened
def spider_opened(self, spider):
file = open('%s_societies.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['name', 'president', 'email', 'url', 'facebook', 'membership', 'about',
'date_established']
self.exporter.start_exporting()
示例10: spider_opened
def spider_opened(self, spider):
file = open('%s' % spider.nameOfFile, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['originalString', 'translatedString']
self.exporter.start_exporting()
示例11: assertExportResult
def assertExportResult(self, item, expected, **kwargs):
fp = BytesIO()
ie = CsvItemExporter(fp, **kwargs)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
self.assertCsvEqual(fp.getvalue(), expected)
示例12: open_spider
def open_spider(self, spider):
# 保存csv数据的文件对象
self.f = open("Amazon_goods_crawl.csv", "w")
# 创建csv文件读写对象
self.csv_exporter = CsvItemExporter(self.f)
# 开始进行csv文件读写
self.csv_exporter.start_exporting()
# 根据商品标题进行去重处理
self.add_title = set()
示例13: spider_opened
def spider_opened(self, spider):
path = CrawlerPipeline.EXPORT_PATH + "/" + spider.spider_id + '_export.csv'
export_file = open(path, 'ab' if os.path.isfile(path) else 'wb')
self.files[spider.spider_id] = export_file
self.exporter = CsvItemExporter(export_file)
self.exporter.fields_to_export = [
"item_id", "url", "num_links", "num_images",
"num_scripts", "num_styles", "headers", "text"
]
self.exporter.start_exporting()
示例14: spider_opened
def spider_opened(self, spider):
# file = open('%s_data.xml' % spider.name, 'w+b')
import os
filePath = os.path.dirname(__file__)
outputDir = filePath +'/output/'
file = open(outputDir + '%s_data.csv' % spider.name, 'w+b')
self.files[spider] = file
# self.exporter = JsonItemExporter(file)
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
示例15: test_header_export_two_items
def test_header_export_two_items(self):
for item in [self.i, dict(self.i)]:
output = BytesIO()
ie = CsvItemExporter(output)
ie.start_exporting()
ie.export_item(item)
ie.export_item(item)
ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')