本文整理汇总了Python中scrapy.contrib.exporter.JsonItemExporter.start_exporting方法的典型用法代码示例。如果您正苦于以下问题:Python JsonItemExporter.start_exporting方法的具体用法?Python JsonItemExporter.start_exporting怎么用?Python JsonItemExporter.start_exporting使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.JsonItemExporter
的用法示例。
在下文中一共展示了JsonItemExporter.start_exporting方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ExportJSON
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class ExportJSON(object):
"""
Exporting to export/json/spider-name.json file
"""
def __init__(self):
self.files = {}
self.exporter = None
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
self.files[spider] = file_to_save
self.exporter = JsonItemExporter(file_to_save)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file_to_save = self.files.pop(spider)
file_to_save.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例2: MonitorPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class MonitorPipeline(object):
def open_spider(self, spider):
self.cols = spider.cols
self.start_urls = spider.start_urls
self.file = open('test.json', 'w+b')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
try:
index = self.start_urls.index( item['surl'] )
groupId = index / self.cols
r = index % self.cols
if r == 0:
item['main'] = 0
elif r == 1:
item['main'] = 1
elif r == 2:
item['main'] = 2
item['gid'] = groupId
except:
index = -1
self.exporter.export_item(item)
return item
示例3: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
#file = open('%_ixbt_jokes.json' % spider.name, 'w+b')
file = open('ixbt_jokes.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例4: AppsPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class AppsPipeline(object):
def __init__(self, spider):
self.file = open('{category}-{today}.json'.format(
today = date.today().strftime('%d-%m-%Y'),
category = spider.category), 'wb')
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
@classmethod
def from_crawler(cls, crawler):
return cls(spider = crawler.spider)
def spider_opened(self, spider):
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
if spider.name == 'apps':
self.exporter.export_item(item)
return item
示例5: SaveNewItems
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class SaveNewItems(object):
def __init__(self):
self.files = []
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def process_item(self, item, spider):
self.new_file_exporter.export_item(item)
print "Save " + item["title"][0]
return item
def spider_opened(self, spider):
self.new_item_file = open("storage\\%s_new_items.json" % spider.name, "w")
self.new_file_exporter = JsonItemExporter(self.new_item_file)
self.new_file_exporter.start_exporting()
def spider_closed(self, spider):
with open("storage\\%s_items.json" % spider.name, "w") as items_file:
self.exporter = JsonItemExporter(items_file)
self.exporter.start_exporting()
for item in incomingData:
self.exporter.export_item(item)
self.exporter.finish_exporting()
self.new_file_exporter.finish_exporting()
items_file.close()
self.new_item_file.close()
示例6: SpidercrawlerPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class SpidercrawlerPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
#file = open('ScrapedItems.json', 'w+b')
self.exporter = JsonItemExporter(file)
def spider_opened(self, spider):
if(spider.name == 'timesnews'):
file = open('TodaysToiScrapedItems.json', 'w+b')
else :
file = open('TodaysHtScrapedItems.json', 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: CrawlerPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class CrawlerPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_products.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例8: JsonExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonExportPipeline(object):
def __init__(self):
log.msg('JsonExportPipeline.init....', level=log.INFO)
self.files = {}
@classmethod
def from_crawler(cls, crawler):
log.msg('JsonExportPipeline.from_crawler....', level=log.INFO)
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
log.msg('JsonExportPipeline.spider_opened....', level=log.INFO)
file = open('%s.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
log.msg('JsonExportPipeline.spider_closed....', level=log.INFO)
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
log.msg('JsonExportPipeline.process_item....', level=log.INFO)
self.exporter.export_item(item)
return item
示例9: YxreviewPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class YxreviewPipeline(object):
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open('items.json', 'wb')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.checkData(item, "title")
self.checkData(item, "summary")
self.checkData(item, "cover_image")
self.checkData(item, "score")
self.exporter.export_item(item)
return item
def checkData(self ,item, field):
if len(item[field]) > 0:
newText = item[field][0].encode("utf-8")
item[field] = newText.strip()
else:
item[field] = ""
示例10: CLPipe
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class CLPipe(object):
"""A pipeline for writing results to json"""
def __init__(self, **kwargs):
self.files = {}
self.AppID = kwargs.get('AppID')
self.ApiKey = kwargs.get('ApiKey')
super(CLPipe, self).__init__(**kwargs)
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
#open a static/dynamic file to read and write to
file = open('%s_items.json' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = JsonItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
#reopen new static/dynamic file to parse for sending
new = open('%s_items.json' % spider.name)
data = json.load(new)
#reg = re.compile(r'[\n\r\t]')
#for i in data:
# log.msg( i )
#this is actually very bad to loop here
#in one day I sent almost 500k requests.. thats bad
#try sending one load and process on the other end.
#not sure if this is efficient, but it works
#makes new api call for each loop
#pushes single object for each call
connection = httplib.HTTPSConnection('api.parse.com', 443)
connection.connect()
connection.request('POST', '/1/functions/scrapeSaver', json.dumps({
# #"email":data[i]["email"], "referer":data[i]["referer"], "scrapeID":data[i]["id"]
"data":data
}), {
"X-Parse-Application-Id": self.AppID,
"X-Parse-REST-API-Key": self.ApiKey,
"Content-Type": "application/json"
})
result = json.loads(connection.getresponse().read())
print "Sending load ", result
#done with the new file, close it
new.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例11: ExportJSON
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class ExportJSON(ExportData):
"""
Exporting to export/json/spider-name.json file
"""
def spider_opened(self, spider):
file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
self.files[spider] = file_to_save
self.exporter = JsonItemExporter(file_to_save)
self.exporter.start_exporting()
示例12: JSONExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JSONExportPipeline(object):
def __init__(self):
self.file = open('items.json', 'w')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
示例13: JsonItemPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class JsonItemPipeline(object):
def open_spider(self, spider):
self.file = open('test.json', 'w+b')
self.exporter = JsonItemExporter(self.file)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例14: DoubanSpiderPipeline
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class DoubanSpiderPipeline(object):
def __init__(self):
file = codecs.open('books2.json','w+b',encoding='utf-8')
#file = open('books2.json','w+b')
self.exporter = JsonItemExporter(file)
self.exporter.encoding='utf-8'
self.exporter.start_exporting()
self.encoder = json.JSONEncoder(ensure_ascii=False)
def spider_closed(self,spider):
self.exporter.finish_exporting()
def process_item(self, item, spider):
self.exporter.export_item(self.encoder.encode(item))
return item
示例15: DoubanJsonWrite
# 需要导入模块: from scrapy.contrib.exporter import JsonItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.JsonItemExporter import start_exporting [as 别名]
class DoubanJsonWrite(object):
def __init__(self):
#dispatcher.connect(self.open_spider, signals.spider_opened)
#dispatcher.connect(self.close_spider, signals.spider_closed)
self.itemsfile = open('imtes.jl', 'w')
def open_spider(self, spider):
self.exporter = JsonItemExporter(self.itemsfile)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item