本文整理汇总了Python中scrapy.contrib.exporter.JsonLinesItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonLinesItemExporter.start_exporting方法的具体用法?Python JsonLinesItemExporter.start_exporting怎么用?Python JsonLinesItemExporter.start_exporting使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.JsonLinesItemExporter
的用法示例。
在下文中一共展示了JsonLinesItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: NordstromPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class NordstromPipeline(object):
def __init__(self):
self.files = {}
self.ids_seen = set()
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def process_item(self, item, spider):
if item['product_item_num'] in self.ids_seen:
raise DropItem("Duplicate item found: %s" % item)
else:
self.ids_seen.add(item['product_item_num'])
self.exporter.export_item(item)
return item
def spider_opened(self, spider):
out_file = open('%s_products.jl' % spider.name, 'w+b')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
示例2: AdbPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class AdbPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
self.seen = set([])
def spider_opened(self, spider):
file = open('%s/%s/%s.json'% (settings.DATA_DIR,
spider.name,
datetime.date.today().isoformat()),
'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if self.seen_before(item):
raise DropItem
self.exporter.export_item(item)
return item
def seen_before(self, item):
if item['product'] in self.seen:
return True
else:
self.seen.add(item['product'])
return False
示例3: PajandanPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class PajandanPipeline(object):
def __init__(self):
self.files = {} # may be more than one spider
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# write utf-8 file
f = codecs.open('articles.json', 'w+', encoding='utf-8')
self.files[spider] = f
self.exporter = JsonLinesItemExporter(f, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: ValidatorPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class ValidatorPipeline(object):
""" Exports items in a temporary JSON file.
Unnecessary fields are excluded. """
def __init__(self):
self.exporter = None
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
fname = open(_get_spider_output_filename(spider), 'wb')
self.files[spider] = fname
self.exporter = JsonLinesItemExporter(fname)
self.exporter.fields_to_export = _get_fields_to_check(ProductItem)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例5: TibiaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class TibiaPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s.json' % (spider.name + datetime.datetime.now().isoformat()), 'a+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例6: ScrippaPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class ScrippaPipeline(object):
def __init__(self):
#self.files = {}
#self.log("MMMMMMMMMMMMMMMMMMMMMMMMMMAAAAAAAAAAAATE", level=log.WARNING)
print "DDDDDDDDDDDDDDDDDDDDDDDDDUUUUUUUUUUUUUUUUUUUUUUUUUUUDE"
#file = open('1_reports.json', 'w+b')
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('2_reports.json', 'w+b')
#self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
#file = self.files.pop(spider)
#file.close()
def process_item(self, item, spider):
print "ScrippaPipeline: exporting item ============================== "
self.exporter.export_item(item)
return item
示例7: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_Joke.txt' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例8: PlayerPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class PlayerPipeline(object):
def __init__(self, *args, **kwargs):
self.player_info_file = None
self.player_info_exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.player_info_file = open("%s/output/player_info.json" % settings.PROJECT_ROOT, 'wb')
self.player_info_exporter = JsonLinesItemExporter(self.player_info_file)
self.player_info_exporter.start_exporting()
def spider_closed(self, spider):
self.player_info_exporter.finish_exporting()
self.player_info_file.close()
def process_item(self, item, spider):
if isinstance(item, PlayerInfoItem):
self.player_info_exporter.export_item(item)
return item
示例9: JsonLinesExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class JsonLinesExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
out_file = open('%s_pics.json' % spider.name, 'a')
self.files[spider] = out_file
self.exporter = JsonLinesItemExporter(out_file, ensure_ascii=False)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
out_file = self.files.pop(spider)
out_file.close()
def process_item(self, item, spider):
if item.get("image_urls"):
self.exporter.export_item(item)
return item
示例10: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
json_path = os.path.join('data', '%s.json' % spider.name)
file = open(json_path, 'w+b')
self.files[spider] = file
self.exporter = JsonLinesItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
log.msg("process_item", level=log.DEBUG)
return item
示例11: PerispiderPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class PerispiderPipeline(object):
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例12: get_exporter
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
def get_exporter(self, item):
exporter = None
if item.__class__ in self.exporters:
exporter = self.exporters[item.__class__]
else:
if item.__class__ == items.unused_genotype_data:
exporter = JsonLinesItemExporter(open(_class_to_file(item.__class__), 'w+b'))
else:
exporter = CsvItemExporter(open(_class_to_file(item.__class__), 'w+b'))
self.exporters[item.__class__] = exporter
exporter.start_exporting()
return exporter
示例13: MoviesPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class MoviesPipeline(object):
def __init__(self):
self.field_to_export = []
self.file = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
# signals start of export
print "Spider opened...\nPreparing to crawl..."
self.json_exporter = JsonLinesItemExporter(open('movies.json', 'wb'))
self.json_exporter.start_exporting()
# Since the charts frequently change, we need to deal with differences
# in the cached data and current data.
# For now, we'll just truncate the table when the spider opens
# and dump everything in.
cursor = connection.cursor()
sql = 'truncate table %s' % MYSQL_TABLE
try:
cursor.execute(sql)
connection.commit()
print "*** Truncated %s Table ***" % MYSQL_TABLE
except:
print "Error %d %s" % (e.args[0], e.args[1])
connection.rollback()
def process_item(self, item, spider):
# store the item in the database
insert_database(item)
# Write to JSON file
self.json_exporter.export_item(item)
return item
def spider_closed(self, spider):
# signal end of export
self.json_exporter = finish_exporting()
示例14: TffdatapullPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class TffdatapullPipeline(object):
def __init__(self):
self.fields_to_export = [
'match_code',
'match_href',
'home_team_id',
'home_team_name',
'score',
'guest_team_id',
'guest_team_name',
'date_of_match',
'time_of_match',
'stadium_name',
'organization_name'
]
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.csv_exporter = CsvItemExporter(open(spider.name+".csv", "w"),
fields_to_export=self.fields_to_export,
quoting=csv.QUOTE_ALL)
self.json_exporter = TffdatapullJsonItemExporter(open(spider.name+".json", "w"),
fields_to_export=self.fields_to_export,
sort_keys=True, indent=4)
self.jsonlines_exporter = JsonLinesItemExporter(open(spider.name+".linejson", "w"),
fields_to_export=self.fields_to_export)
self.xml_exporter = TffdatapullXmlItemExporter(open(spider.name+".xml", "w"),
fields_to_export=self.fields_to_export,
root_element="match_code", item_element="match_code")
# Make a quick copy of the list
self.csv_exporter.start_exporting()
self.json_exporter.start_exporting()
self.jsonlines_exporter.start_exporting()
self.xml_exporter.start_exporting()
def process_item(self, item, spider):
self.csv_exporter.export_item(item)
self.json_exporter.export_item(item)
self.jsonlines_exporter.export_item(item)
self.xml_exporter.export_item(item)
return item
def spider_closed(self, spider):
self.csv_exporter.finish_exporting()
self.json_exporter.finish_exporting()
self.jsonlines_exporter.finish_exporting()
self.xml_exporter.finish_exporting()
示例15: FmlPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonLinesItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonLinesItemExporter import start_exporting [as 别名]
class FmlPipeline(object):
'''
def __init__(self):
self.file = open('data2.json', 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
'''
def open_spider(self, spider):
name = "%s.json" % spider.name
self.file = open(name, 'w')
self.exporter = JsonLinesItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item