本文整理汇总了Python中scrapy.contrib.exporter.JsonLinesItemExporter类的典型用法代码示例。如果您正苦于以下问题:Python JsonLinesItemExporter类的具体用法?Python JsonLinesItemExporter怎么用?Python JsonLinesItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了JsonLinesItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: NordstromPipeline
class NordstromPipeline(object):
def __init__(self):
self.files = {}
self.ids_seen = set()
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def process_item(self, item, spider):
if item['product_item_num'] in self.ids_seen:
raise DropItem("Duplicate item found: %s" % item)
else:
self.ids_seen.add(item['product_item_num'])
self.exporter.export_item(item)
return item
def spider_opened(self, spider):
out_file = open('%s_products.jl' % spider.name, 'w+b')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
示例2: FeedWriterPipeline
class FeedWriterPipeline(object):
def __init__(self):
log.msg('FeedWriterPipeline.__init__()')
self.file = None
self.item_exporter = None
self.count = 0
def open_spider(self, spider):
if FeedSpider.is_feed_op(spider):
spider.make_sure_path_exists(spider.get_output_dir_path())
file_name = spider.get_feed_output_file_path()
self.file = open(file_name, 'a')
self.item_exporter = JsonLinesItemExporter(self.file)
log.msg('FeedWriterPipeline, opened file %s to append.' % file_name)
def process_item(self, item, spider):
if FeedSpider.is_feed_op(spider) and isinstance(item, FeedItem):
self.item_exporter.export_item(item)
self.count += 1
spider.check_max_limit(self.count)
raise DropItem('Save item success')
else:
return item
def close_spider(self, spider):
if FeedSpider.is_feed_op(spider):
self.file.write('Parsed %i feed items.%s' % (self.count, os.linesep))
self.file.close()
log.msg('closed file, appended %i items.' % self.count)
示例3: PerispiderPipeline
class PerispiderPipeline(object):
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: get_exporter
def get_exporter(self, item):
exporter = None
if item.__class__ in self.exporters:
exporter = self.exporters[item.__class__]
else:
if item.__class__ == items.unused_genotype_data:
exporter = JsonLinesItemExporter(open(_class_to_file(item.__class__), 'w+b'))
else:
exporter = CsvItemExporter(open(_class_to_file(item.__class__), 'w+b'))
self.exporters[item.__class__] = exporter
exporter.start_exporting()
return exporter
示例5: JsonLinesItemPipeline
class JsonLinesItemPipeline(object):
def open_spider(self, spider):
self.file = open('test.json', 'w+b')
self.exporter = JsonLinesItemExporter(self.file)
def close_spider(self, spider):
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例6: spider_opened
def spider_opened(self, spider):
if not os.path.exists('./json/'):
os.makedirs('./json/')
if isinstance(spider, MSPCrawler):
MSPFile = open('json/msps.json', 'w+b')
self.files['msps'] = MSPFile
self.MSPExporter = JsonLinesItemExporter(MSPFile)
self.MSPExporter.start_exporting()
elif isinstance(spider, VoteCrawler):
VoteFile = open('json/votes-' + spider.mspid + '.json', 'w+b')
self.files['votes'] = VoteFile
self.VoteExporter = JsonLinesItemExporter(VoteFile)
self.VoteExporter.start_exporting()
示例7: MoviesPipeline
class MoviesPipeline(object):
def __init__(self):
self.field_to_export = []
self.file = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# signals start of export
print "Spider opened...\nPreparing to crawl..."
self.json_exporter = JsonLinesItemExporter(open('movies.json', 'wb'))
self.json_exporter.start_exporting()
# Since the charts frequently change, we need to deal with differences
# in the cached data and current data.
# For now, we'll just truncate the table when the spider opens
# and dump everything in.
cursor = connection.cursor()
sql = 'truncate table %s' % MYSQL_TABLE
try:
cursor.execute(sql)
connection.commit()
print "*** Truncated %s Table ***" % MYSQL_TABLE
except:
print "Error %d %s" % (e.args[0], e.args[1])
connection.rollback()
def process_item(self, item, spider):
# store the item in the database
insert_database(item)
# Write to JSON file
self.json_exporter.export_item(item)
return item
def spider_closed(self, spider):
# signal end of export
self.json_exporter = finish_exporting()
示例8: FmlPipeline
class FmlPipeline(object):
'''
def __init__(self):
self.file = open('data2.json', 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
'''
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例9: open_spider
def open_spider(self, spider):
if FeedSpider.is_feed_op(spider):
spider.make_sure_path_exists(spider.get_output_dir_path())
file_name = spider.get_feed_output_file_path()
self.file = open(file_name, 'a')
self.item_exporter = JsonLinesItemExporter(self.file)
log.msg('FeedWriterPipeline, opened file %s to append.' % file_name)
示例10: spider_opened
def spider_opened(self, spider):
file = open('%s/%s/%s.json'% (settings.DATA_DIR,
spider.name,
datetime.date.today().isoformat()),
'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
示例11: process_spider_output
def process_spider_output(self, response, result, spider):
items = []
for r in result:
if isinstance(r, Item):
items.append(r)
yield r
cca = response2cca(response, base64=True)
cca['features'] = {'items': items}
cca_item = self.create_item(cca)
cca_path = self.get_cca_path(spider)
if cca_path is None:
yield cca_item
else:
exporter = self.exporters_by_path.get(cca_path)
if exporter is None:
exporter = JsonLinesItemExporter(open(cca_path, 'a+'))
self.exporters_by_path[cca_path] = exporter
exporter.export_item(cca_item)
示例12: process_item
def process_item(self, item, spider):
"""
Writes the item to output
"""
# create the output file for a new class of item per spider
settings = spider.crawler.settings
if item.__class__ not in self.xporters[spider.name]:
filename = '%s.json' % item.export_filename
dirpath = path.join(settings.get('IO_PATH', 'io'), settings['DATA_SET'])
_mkdir_p(dirpath)
file_h = open(path.join(dirpath, filename), 'w')
xporter = JsonLinesItemExporter(file=file_h)
xporter.start_exporting()
self.xporters[spider.name][item.__class__] = (file_h, xporter)
xporter = self.xporters[spider.name][item.__class__][1]
xporter.export_item(item)
return item
示例13: JsonLinesExportPipeline
class JsonLinesExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
out_file = open('%s_pics.json' % spider.name, 'a')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
def process_item(self, item, spider):
if item.get("image_urls"):
self.exporter.export_item(item)
return item
示例14: TibiaPipeline
class TibiaPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例15: ValidatorPipeline
class ValidatorPipeline(object):
""" Exports items in a temporary JSON file.
Unnecessary fields are excluded. """
def __init__(self):
self.exporter = None
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
fname = open(_get_spider_output_filename(spider), 'wb')
self.files[spider] = fname
self.exporter = JsonLinesItemExporter(fname)
self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item