本文整理汇总了Python中scrapy.contrib.exporter.CsvItemExporter.finish_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python CsvItemExporter.finish_exporting方法的具体用法?Python CsvItemExporter.finish_exporting怎么用?Python CsvItemExporter.finish_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.CsvItemExporter
的用法示例。
在下文中一共展示了CsvItemExporter.finish_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TutorialPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class TutorialPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
list = ['id','title', 'time', 'director', 'year', 'star','cost']
self.exporter.fields_to_export = list
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例2: CSVPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class CSVPipeline(object):
def __init__(self):
self.files = {}
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
try:
fo = open(spider.output_file, 'w+b')
except IOError as e:
spider.crawler.engine.close_spider(spider, "ERROR: Can't create CSV file: " + str(e))
return
self.files[spider] = fo
self.exporter = CsvItemExporter(fo)
self.exporter.fields_to_export = settings.getlist("EXPORT_FIELDS")
self.exporter.start_exporting()
def spider_closed(self, spider):
if self.exporter is not None:
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例3: YangmaodangPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class YangmaodangPipeline(object):
'''
保存采集的水木羊毛信息,将其保存到csv文件中,并将其传到邮箱中。
'''
def __init__(self):
self.filename = 'output/newsmth-'+time.strftime('%Y%m%d')+'.csv'
self.file = open(self.filename, 'wb')
self.items = []
# self.file.write('$$'.join(YangmaodangItem.fields))
def open_spider(self, spider):
self.exporter = CsvItemExporter(self.file)
self.exporter.start_exporting()
def close_spider(self, spider):
# 利用回复数对文章排序
sortedlist = sorted(self.items, key=lambda x: int(operator.itemgetter('reply_num')(x)), reverse=True)
for item in sortedlist:
self.exporter.export_item(item)
self.exporter.finish_exporting()
self.file.close()
send_email(self.filename)
def process_item(self, item, spider):
self.items.append(item)
# self.exporter.export_item(item)
return item
示例4: GnewsPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class GnewsPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file,True,'\n')
self.exporter.fields_to_export=['category','topstory','snippet','link','originallink','sublinks','sublinktext','gpost','gpostsnip','extras','extraslink','related']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
'''def process_item(self, item, spider):
示例5: OpossumPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class OpossumPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open('/home/moorcock/work/mrs_opossum/items.csv', 'w+b')
self.exporter = CsvItemExporter(self.file)
self.exporter.fields_to_export = ['id', 'title', 'image', 'keywords']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
item_exp = ExportImageItem(
id=item['id'],
title=item['title'].strip(' \t\n'),
image=item['images'][0]['path'].split('/')[-1].split('.')[0],
keywords=item['keywords']
)
self.exporter.export_item(item_exp)
return item_exp
示例6: BuildingsPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class BuildingsPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open("buildings.csv", "w+b")
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = fields_to_export
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: GameListingPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class GameListingPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_products.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
keys = ['name','address','zipCode','jobCostMin','jobCostMax',
'contactName','contactPhone','website','licenseNumber',
'averageRating','profileUrl','followers','following',
'badgeCount','projectCount','reviewCount','commentCount']
dictionary = item_to_dictionary(item,keys)
# print 'document to insert',dictionary
client.insert('updatedListings', dictionary, callback=insert_callback)
# client.insert('listings', dictionary, callback=insert_callback)
self.exporter.export_item(item)
示例8: CSVPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class CSVPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['name', 'rank', 'overallScore', 'teachingScore', 'internationalOutlook', 'industryIncome', 'research', 'citations', 'textBelow']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例9: FinancePipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class FinancePipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_stock.csv' % spider.code, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file,
fields_to_export=['date','Open','High', 'Low', 'Close', 'Volume', 'AdjClose'])
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例10: EaCOpenListBotPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class EaCOpenListBotPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_%s_items.csv' % (spider.name, spider.category), 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['vendor', 'product', 'default']
#self.exporter.fields_to_export = ['default']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例11: CSVPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class CSVPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.fields_to_export = ['team_year', 'track', 'region']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例12: CsvExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class CsvExportPipeline(object):
def process_item(self, item, spider):
outputdir = '%s%s/%s' % (settings['ADAPTFM_OUTPUT_PATH'], spider.folder, item['brandCategory'][0])
name = item['brandFeed'][0].replace('http://','').replace('/','_').replace('.xml','')
filename = '%s/%s.csv' % (outputdir, name)
if not os.path.isdir (os.path.dirname(filename)):
os.mkdir(os.path.dirname(filename))
file = open(filename, 'a+b')
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
self.exporter.export_item(item)
self.exporter.finish_exporting()
file.close()
return item
示例13: CsvExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class CsvExportPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
def spider_opened(self, spider):
file = open('%s_%s.csv' % (spider.name, int(time.time())), 'w+b')
self.files[spider] = file
if 'yopt' in spider.name:
self.exporter = CsvItemExporter(file,fields_to_export = ['date','instrument','option_symbol','symbol','expiration','type','strike','last','change','bid','ask','volume','open_int'],dialect='excel')
elif 'prices' in spider.name:
self.exporter = CsvItemExporter(file,fields_to_export = ['date','open','high','low','close','volume','adj_close'],dialect='excel')
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if item is None:
raise DropItem("None")
self.exporter.export_item(item)
return item
示例14: MultiCSVItemPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class MultiCSVItemPipeline(object):
def __init__(self):
self.files = {}
self.exporter1 = CsvItemExporter(fields_to_export=ProfRatingItem.fields.keys(),file=open("profRating.csv",'wb'))
self.exporter2 = CsvItemExporter(fields_to_export=ProfSummaryItem.fields.keys(),file=open("profSummary.csv",'wb'))
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter1.start_exporting()
self.exporter2.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter1.export_item(item)
self.exporter2.export_item(item)
return item
示例15: TutorialPipeline
# 需要导入模块: from scrapy.contrib.exporter import CsvItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.CsvItemExporter import finish_exporting [as 别名]
class TutorialPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open("mediabase.csv", 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
#self.exporter.fields_to_export = ["Name","Address","City","Neighborhood","State","Zip","Phone","Website","Image_url","Hours_Mon","Hours_Tue","Hours_Wed","Hours_Thu","Hours_Fri","Hours_Sat","Hours_Sun","Price","TakesReservation","Delivery","TakeOut","AcceptsCreditCards","GoodFor","Parking","WheelChairAccessible","BikeParking","GoodForKids","GoodForGroups","Attire","Ambience","NoiseLevel","Alcohol","OutDoorSeating","Wifi","HasTV","WaiterService","Caters","Url"]
self.exporter.fields_to_export = ["Type","Area","PlaceName","Web","Tel","Address","Zip","Town","Hours","CompanyName","OrganizationNo","Turnover","Employed","LastName","FirstName","Telephone","AllabolagUrl","EniroUrl"]
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item