本文整理汇总了Python中scrapy.contrib.exporter.JsonLinesItemExporter.finish_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonLinesItemExporter.finish_exporting方法的具体用法?Python JsonLinesItemExporter.finish_exporting怎么用?Python JsonLinesItemExporter.finish_exporting使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.JsonLinesItemExporter
的用法示例。
在下文中一共展示了JsonLinesItemExporter.finish_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: JsonLinesExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonLinesExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
out_file = open('%s_pics.json' % spider.name, 'a')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
def process_item(self, item, spider):
if item.get("image_urls"):
self.exporter.export_item(item)
return item
示例2: NordstromPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class NordstromPipeline(object):
def __init__(self):
self.files = {}
self.ids_seen = set()
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def process_item(self, item, spider):
if item['product_item_num'] in self.ids_seen:
raise DropItem("Duplicate item found: %s" % item)
else:
self.ids_seen.add(item['product_item_num'])
self.exporter.export_item(item)
return item
def spider_opened(self, spider):
out_file = open('%s_products.jl' % spider.name, 'w+b')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
示例3: TibiaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class TibiaPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: ValidatorPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class ValidatorPipeline(object):
""" Exports items in a temporary JSON file.
Unnecessary fields are excluded. """
def __init__(self):
self.exporter = None
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
fname = open(_get_spider_output_filename(spider), 'wb')
self.files[spider] = fname
self.exporter = JsonLinesItemExporter(fname)
self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例5: ScrippaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class ScrippaPipeline(object):
def __init__(self):
#self.files = {}
#self.log("MMMMMMMMMMMMMMMMMMMMMMMMMMAAAAAAAAAAAATE", level=log.WARNING)
print "DDDDDDDDDDDDDDDDDDDDDDDDDUUUUUUUUUUUUUUUUUUUUUUUUUUUDE"
#file = open('1_reports.json', 'w+b')
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('2_reports.json', 'w+b')
#self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
#file = self.files.pop(spider)
#file.close()
def process_item(self, item, spider):
print "ScrippaPipeline: exporting item ============================== "
self.exporter.export_item(item)
return item
示例6: PajandanPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class PajandanPipeline(object):
def __init__(self):
self.files = {} # may be more than one spider
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# write utf-8 file
f = codecs.open('articles.json', 'w+', encoding='utf-8')
self.files[spider] = f
self.exporter = JsonLinesItemExporter(f, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: PlayerPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class PlayerPipeline(object):
def __init__(self, *args, **kwargs):
self.player_info_file = None
self.player_info_exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.player_info_file = open("%s/output/player_info.json" % settings.PROJECT_ROOT, 'wb')
self.player_info_exporter = JsonLinesItemExporter(self.player_info_file)
self.player_info_exporter.start_exporting()
def spider_closed(self, spider):
self.player_info_exporter.finish_exporting()
self.player_info_file.close()
def process_item(self, item, spider):
if isinstance(item, PlayerInfoItem):
self.player_info_exporter.export_item(item)
return item
示例8: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_Joke.txt' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例9: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
json_path = os.path.join('data', '%s.json' % spider.name)
file = open(json_path, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
log.msg("process_item", level=log.DEBUG)
return item
示例10: AdbPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class AdbPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
self.seen = set([])
def spider_opened(self, spider):
file = open('%s/%s/%s.json'% (settings.DATA_DIR,
spider.name,
datetime.date.today().isoformat()),
'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if self.seen_before(item):
raise DropItem
self.exporter.export_item(item)
return item
def seen_before(self, item):
if item['product'] in self.seen:
return True
else:
self.seen.add(item['product'])
return False
示例11: TffdatapullPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class TffdatapullPipeline(object):
def __init__(self):
self.fields_to_export = [
'match_code',
'match_href',
'home_team_id',
'home_team_name',
'score',
'guest_team_id',
'guest_team_name',
'date_of_match',
'time_of_match',
'stadium_name',
'organization_name'
]
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.csv_exporter = CsvItemExporter(open(spider.name+".csv", "w"),
fields_to_export=self.fields_to_export,
quoting=csv.QUOTE_ALL)
self.json_exporter = TffdatapullJsonItemExporter(open(spider.name+".json", "w"),
fields_to_export=self.fields_to_export,
sort_keys=True, indent=4)
self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"),
fields_to_export=self.fields_to_export)
self.xml_exporter = TffdatapullXmlItemExporter(open(spider.name+".xml", "w"),
fields_to_export=self.fields_to_export,
root_element="match_code", item_element="match_code")
# Make a quick copy of the list
self.csv_exporter.start_exporting()
self.json_exporter.start_exporting()
self.jsonlines_exporter.start_exporting()
self.xml_exporter.start_exporting()
def process_item(self, item, spider):
self.csv_exporter.export_item(item)
self.json_exporter.export_item(item)
self.jsonlines_exporter.export_item(item)
self.xml_exporter.export_item(item)
return item
def spider_closed(self, spider):
self.csv_exporter.finish_exporting()
self.json_exporter.finish_exporting()
self.jsonlines_exporter.finish_exporting()
self.xml_exporter.finish_exporting()
示例12: JsonWriterPipeline2
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonWriterPipeline2(object):
def _init_(self):
self.fields_to_export = [
'title',
'link'
]
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self,spider):
self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"), fields_to_export=self.fields_to_export)
self.jsonlines_exporter.start_exporting()
def process_item(self,item,spider):
self.jsonlines_exporter.export_item(item)
return item
def spider_closed(self, spider):
self.jsonlines_exporter.finish_exporting()
示例13: JsonExportExternalIdPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonExportExternalIdPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open('scraped/%s.json' % spider.external_id, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
示例14: MspPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class MspPipeline(object):
def __init__(self):
self.files = {}
self.ids_seen = set()
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
if not os.path.exists('./json/'):
os.makedirs('./json/')
if isinstance(spider, MSPCrawler):
MSPFile = open('json/msps.json', 'w+b')
self.files['msps'] = MSPFile
self.MSPExporter = JsonLinesItemExporter(MSPFile)
self.MSPExporter.start_exporting()
elif isinstance(spider, VoteCrawler):
VoteFile = open('json/votes-' + spider.mspid + '.json', 'w+b')
self.files['votes'] = VoteFile
self.VoteExporter = JsonLinesItemExporter(VoteFile)
self.VoteExporter.start_exporting()
def spider_closed(self, spider):
if isinstance(spider, VoteCrawler):
self.VoteExporter.finish_exporting()
elif isinstance(spider, MSPCrawler):
self.MSPExporter.finish_exporting()
for file in self.files.values():
file.close()
def process_item(self, item, spider):
if isinstance(item, MSPItem):
self.MSPExporter.export_item(item)
elif isinstance(item, VoteItem):
self.VoteExporter.export_item(item)
return item
示例15: JsonLinesExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import finish_exporting [as 别名]
class JsonLinesExportPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
self.first_item = True
def spider_opened(self, spider):
file = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item