本文整理汇总了Python中scrapy.exporters.CsvItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python CsvItemExporter.start_exporting方法的具体用法?Python CsvItemExporter.start_exporting怎么用?Python CsvItemExporter.start_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.exporters.CsvItemExporter
的用法示例。
在下文中一共展示了CsvItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: JsonExportPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# file = open('%s_data.xml' % spider.name, 'w+b')
import os
filePath = os.path.dirname(__file__)
outputDir = filePath +'/output/'
file = open(outputDir + '%s_data.csv' % spider.name, 'w+b')
self.files[spider] = file
# self.exporter = JsonItemExporter(file)
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例2: FacupPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class FacupPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
#create files and instantiate exporter class
#then run start_exporting() method, this is required for item exporter class
def spider_opened(self, spider):
self.results_csv = open('results_3.csv', 'wb')
self.missing_csv = open('results_miss_2.csv', 'wb')
self.results_exporter = CsvItemExporter(self.results_csv)
self.missing_exporter = CsvItemExporter(self.missing_csv)
self.results_exporter.start_exporting()
self.missing_exporter.start_exporting()
def process_item(self, item, spider):
self.results_exporter = CsvItemExporter(self.results_csv)
self.missing_exporter = CsvItemExporter(self.missing_csv)
return item
def spider_closed(self, spider):
self.results_exporter.finish_exporting()
self.missing_exporter.finish_exporting()
self.results_csv.close()
self.missing_csv.close()
示例3: CsvExportPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CsvExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_societies.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['name', 'president', 'email', 'url', 'facebook', 'membership', 'about',
'date_established']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: CSVExportPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CSVExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s' % spider.nameOfFile, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['originalString', 'translatedString']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例5: WebcrawlerPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class WebcrawlerPipeline(object):
def __init__ (self):
self.files = {}
pass
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open("%s_urls.txt" % (spider.name), "w+b")
self.files[spider] = file
self.exporter = CsvItemExporter(file, include_headers_line=False)
self.exporter.start_exporting()
pass
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
pass
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
pass
示例6: CsvExportPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CsvExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('vagas.csv', 'wb')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: CSVWriterPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CSVWriterPipeline(object):
def __init__(self,filename):
self.filename = filename
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
filename = settings.get('OUTPUT_FILE')
pipeline = cls(filename)
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open(self.filename, 'w+b')
self.exporter = CsvItemExporter(self.file,include_headers_line=True)
self.exporter.encoding='utf-8'
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例8: CSVPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CSVPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file,delimiter='\t')
self.exporter.fields_to_export = ['userId','bookId','name','rating','relativeRating','booklistNum']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例9: catalogscraperPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class catalogscraperPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open("%s_items.csv" % spider.name, "w+b")
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ["title"]
#'subject', 'description', 'creator', 'source', 'published', 'rights', 'citation', 'url']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例10: CsvExportPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CsvExportPipeline(object):
"""
app.pipelines.exporter_csv.CsvExportPipeline
"""
def __init__(self):
self.files = {}
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file_csv = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file_csv
self.exporter = CsvItemExporter(file_csv)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_csv = self.files.pop(spider)
file_csv.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例11: DumpToFile
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class DumpToFile(object):
"""
Dump harvested data into flat file, no other logic is implemented here
(it's "Dump" :-)
"""
def __init__(self):
self.files = {}
self.counter = 0
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
# TODO: verify if still needed for registration of spider_closed/opened event?
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
filename = spider.get_dump_filepath()
f = open(filename, 'w')
self.files[spider.name] = f
# by default csv module uses Windows-style line terminators (\r\n)
self.exporter = CsvItemExporter(f, include_headers_line=True, delimiter='|', lineterminator='\n')
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider.name)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
# for counter, could set att in spider at closing
self.counter += 1
return item
示例12: CSVPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class CSVPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ["filename", "titel", "publicatie", "dossiernummer", "organisatie", "publicatiedatum", "publicatietype", "file_urls"]
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例13: assertExportResult
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
def assertExportResult(self, item, expected, **kwargs):
fp = BytesIO()
ie = CsvItemExporter(fp, **kwargs)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
self.assertCsvEqual(fp.getvalue(), expected)
示例14: AmazonCsvPipeline
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
class AmazonCsvPipeline(object):
def open_spider(self, spider):
# 保存csv数据的文件对象
self.f = open("Amazon_goods_crawl.csv", "w")
# 创建csv文件读写对象
self.csv_exporter = CsvItemExporter(self.f)
# 开始进行csv文件读写
self.csv_exporter.start_exporting()
# 根据商品标题进行去重处理
self.add_title = set()
def process_item(self, item, spider):
if item['title'] in self.add_title:
print u'[EEROR] 数据已保存,勿重复%s'% item['title']
else:
self.add_title.add(item['title'])
# 每次写入一个item数据
# print u'[INFO] 正在写入csv文件中%s'% item['title']
self.csv_exporter.export_item(item)
return item
def close_spider(self, spider):
# 结束csv文件读写
# print u'[INFO] 写入csv文件已完成'
self.csv_exporter.finish_exporting()
# 关闭文件
self.f.close()
示例15: test_header_export_two_items
# 需要导入模块: from scrapy.exporters import CsvItemExporter [as 别名]
# 或者: from scrapy.exporters.CsvItemExporter import start_exporting [as 别名]
def test_header_export_two_items(self):
for item in [self.i, dict(self.i)]:
output = BytesIO()
ie = CsvItemExporter(output)
ie.start_exporting()
ie.export_item(item)
ie.export_item(item)
ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')