本文整理汇总了Python中scrapy.contrib.exporter.JsonLinesItemExporter.export_item方法的典型用法代码示例。如果您正苦于以下问题:Python JsonLinesItemExporter.export_item方法的具体用法?Python JsonLinesItemExporter.export_item怎么用?Python JsonLinesItemExporter.export_item使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.JsonLinesItemExporter
的用法示例。
在下文中一共展示了JsonLinesItemExporter.export_item方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
json_path = os.path.join('data', '%s.json' % spider.name)
file = open(json_path, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
log.msg("process_item", level=log.DEBUG)
return item
示例2: AdbPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class AdbPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
self.seen = set([])
def spider_opened(self, spider):
file = open('%s/%s/%s.json'% (settings.DATA_DIR,
spider.name,
datetime.date.today().isoformat()),
'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if self.seen_before(item):
raise DropItem
self.exporter.export_item(item)
return item
def seen_before(self, item):
if item['product'] in self.seen:
return True
else:
self.seen.add(item['product'])
return False
示例3: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_Joke.txt' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: JsonLinesExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class JsonLinesExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
out_file = open('%s_pics.json' % spider.name, 'a')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
def process_item(self, item, spider):
if item.get("image_urls"):
self.exporter.export_item(item)
return item
示例5: TibiaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class TibiaPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例6: PlayerPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class PlayerPipeline(object):
def __init__(self, *args, **kwargs):
self.player_info_file = None
self.player_info_exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.player_info_file = open("%s/output/player_info.json" % settings.PROJECT_ROOT, 'wb')
self.player_info_exporter = JsonLinesItemExporter(self.player_info_file)
self.player_info_exporter.start_exporting()
def spider_closed(self, spider):
self.player_info_exporter.finish_exporting()
self.player_info_file.close()
def process_item(self, item, spider):
if isinstance(item, PlayerInfoItem):
self.player_info_exporter.export_item(item)
return item
示例7: ValidatorPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class ValidatorPipeline(object):
""" Exports items in a temporary JSON file.
Unnecessary fields are excluded. """
def __init__(self):
self.exporter = None
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
fname = open(_get_spider_output_filename(spider), 'wb')
self.files[spider] = fname
self.exporter = JsonLinesItemExporter(fname)
self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例8: PajandanPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class PajandanPipeline(object):
def __init__(self):
self.files = {} # may be more than one spider
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# write utf-8 file
f = codecs.open('articles.json', 'w+', encoding='utf-8')
self.files[spider] = f
self.exporter = JsonLinesItemExporter(f, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例9: FeedWriterPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class FeedWriterPipeline(object):
def __init__(self):
log.msg('FeedWriterPipeline.__init__()')
self.file = None
self.item_exporter = None
self.count = 0
def open_spider(self, spider):
if FeedSpider.is_feed_op(spider):
spider.make_sure_path_exists(spider.get_output_dir_path())
file_name = spider.get_feed_output_file_path()
self.file = open(file_name, 'a')
self.item_exporter = JsonLinesItemExporter(self.file)
log.msg('FeedWriterPipeline, opened file %s to append.' % file_name)
def process_item(self, item, spider):
if FeedSpider.is_feed_op(spider) and isinstance(item, FeedItem):
self.item_exporter.export_item(item)
self.count += 1
spider.check_max_limit(self.count)
raise DropItem('Save item success')
else:
return item
def close_spider(self, spider):
if FeedSpider.is_feed_op(spider):
self.file.write('Parsed %i feed items.%s' % (self.count, os.linesep))
self.file.close()
log.msg('closed file, appended %i items.' % self.count)
示例10: ScrippaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class ScrippaPipeline(object):
def __init__(self):
#self.files = {}
#self.log("MMMMMMMMMMMMMMMMMMMMMMMMMMAAAAAAAAAAAATE", level=log.WARNING)
print "DDDDDDDDDDDDDDDDDDDDDDDDDUUUUUUUUUUUUUUUUUUUUUUUUUUUDE"
#file = open('1_reports.json', 'w+b')
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('2_reports.json', 'w+b')
#self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
#file = self.files.pop(spider)
#file.close()
def process_item(self, item, spider):
print "ScrippaPipeline: exporting item ============================== "
self.exporter.export_item(item)
return item
示例11: PerispiderPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class PerispiderPipeline(object):
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例12: JsonLinesItemPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class JsonLinesItemPipeline(object):
def open_spider(self, spider):
self.file = open('test.json', 'w+b')
self.exporter = JsonLinesItemExporter(self.file)
def close_spider(self, spider):
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例13: MoviesPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class MoviesPipeline(object):
def __init__(self):
self.field_to_export = []
self.file = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# signals start of export
print "Spider opened...\nPreparing to crawl..."
self.json_exporter = JsonLinesItemExporter(open('movies.json', 'wb'))
self.json_exporter.start_exporting()
# Since the charts frequently change, we need to deal with differences
# in the cached data and current data.
# For now, we'll just truncate the table when the spider opens
# and dump everything in.
cursor = connection.cursor()
sql = 'truncate table %s' % MYSQL_TABLE
try:
cursor.execute(sql)
connection.commit()
print "*** Truncated %s Table ***" % MYSQL_TABLE
except:
print "Error %d %s" % (e.args[0], e.args[1])
connection.rollback()
def process_item(self, item, spider):
# store the item in the database
insert_database(item)
# Write to JSON file
self.json_exporter.export_item(item)
return item
def spider_closed(self, spider):
# signal end of export
self.json_exporter = finish_exporting()
示例14: TffdatapullPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class TffdatapullPipeline(object):
def __init__(self):
self.fields_to_export = [
'match_code',
'match_href',
'home_team_id',
'home_team_name',
'score',
'guest_team_id',
'guest_team_name',
'date_of_match',
'time_of_match',
'stadium_name',
'organization_name'
]
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.csv_exporter = CsvItemExporter(open(spider.name+".csv", "w"),
fields_to_export=self.fields_to_export,
quoting=csv.QUOTE_ALL)
self.json_exporter = TffdatapullJsonItemExporter(open(spider.name+".json", "w"),
fields_to_export=self.fields_to_export,
sort_keys=True, indent=4)
self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"),
fields_to_export=self.fields_to_export)
self.xml_exporter = TffdatapullXmlItemExporter(open(spider.name+".xml", "w"),
fields_to_export=self.fields_to_export,
root_element="match_code", item_element="match_code")
# Make a quick copy of the list
self.csv_exporter.start_exporting()
self.json_exporter.start_exporting()
self.jsonlines_exporter.start_exporting()
self.xml_exporter.start_exporting()
def process_item(self, item, spider):
self.csv_exporter.export_item(item)
self.json_exporter.export_item(item)
self.jsonlines_exporter.export_item(item)
self.xml_exporter.export_item(item)
return item
def spider_closed(self, spider):
self.csv_exporter.finish_exporting()
self.json_exporter.finish_exporting()
self.jsonlines_exporter.finish_exporting()
self.xml_exporter.finish_exporting()
示例15: FmlPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import export_item [as 别名]
class FmlPipeline(object):
'''
def __init__(self):
self.file = open('data2.json', 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
'''
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item