本文整理汇总了Python中scrapy.contrib.exporter.XmlItemExporter类的典型用法代码示例。如果您正苦于以下问题:Python XmlItemExporter类的具体用法?Python XmlItemExporter怎么用?Python XmlItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了XmlItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self):
self.files_si = open("entradas_con_tags.xml", "w+b")
self.files_no = open("entradas_sin_tags.xml", "w+b")
self.exporter_si = XmlItemExporter(self.files_si)
self.exporter_no = XmlItemExporter(self.files_no)
self.exporter_si.start_exporting()
self.exporter_no.start_exporting()
示例2: spider_opened
def spider_opened(self, spider):
file1 = open('%s.xml' % spider.name, 'w+b')
file2 = open('%s_without_tags.xml' % spider.name, 'w+b')
self.files[spider] = [file1,file2]
self.exporter1 = XmlItemExporter(file1)
self.exporter2 = XmlItemExporter(file2)
self.exporter1.start_exporting()
self.exporter2.start_exporting()
示例3: __init__
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
file_tags = open('posts_con_tags.xml' , 'w+b')
file_notags = open('posts_sin_tags.xml', 'w+b')
self.files['tags'] = file_tags
self.files['notags'] = file_notags
self.exporter_tags = XmlItemExporter(file_tags)
self.exporter_notags = XmlItemExporter(file_notags)
示例4: run
def run(self, args, opts):
if len(args) != 1:
return False
if opts.output:
file = open(opts.output, 'w+b')
exporter = XmlItemExporter(file)
dispatcher.connect(exporter.export_item, signal=signals.item_passed)
exporter.start_exporting()
module = _import_file(args[0])
scrapymanager.runonce(module.SPIDER)
if opts.output:
exporter.finish_exporting()
示例5: get_new_fileexporter
def get_new_fileexporter (self, item, spider):
#get the owner
try:
owner = item['ownerKey'][0]
except:
owner = 'other'
log.msg('creating fileExporter for %s' % (owner), level=log.INFO)
#close any existing exporters and files
if owner in self.fileExporters[spider]:
fileExporter = self.fileExporters[spider][owner]
file = fileExporter['file']
exporter = fileExporter['exporter']
exporter.finish_exporting()
file.close()
dir = '/'.join([spider.folder, owner])
# one batch per time that the spider has been resumed
#batch = self.resumeCount[spider] #spider.state['resume_count']
if owner in self.seq[spider]:
seq = self.seq[spider][owner]
seq += 1
log.msg('owner %s exists, incrementing count %d' % (owner, seq), level=log.DEBUG)
self.seq[spider][owner] = seq
else:
seq = self.seq[spider][owner] = 1
log.msg('owner NOT %s exist, incrementing count %d' % (owner, seq), level=log.DEBUG)
#ignore seq and use timestamp to allow job to resume withou having to track sequence
filename = '%s%s/%s_%d.xml' % (settings['ADAPTFM_OUTPUT_PATH'], dir, spider.name, time.time())
if not os.path.isdir (os.path.dirname(filename)):
os.mkdir(os.path.dirname(filename))
# spider.currentFilename = filename
file = open(filename, 'w+b')
# start exporting
exporter = XmlItemExporter(file)
exporter.start_exporting()
fileExporter = {'exporter': exporter, 'file':file}
# add to spider/owner
self.fileExporters[spider][owner] = fileExporter
log.msg('get_new_fileexporter %s' % (filename), level=log.DEBUG)
return fileExporter
示例6: run
def run(self, args, opts):
if len(args) != 1:
return False
if opts.output:
file = open(opts.output, 'w+b')
exporter = XmlItemExporter(file)
dispatcher.connect(exporter.export_item, signal=signals.item_passed)
exporter.start_exporting()
module = _import_file(args[0])
# schedule spider and start engine
scrapymanager.queue.append_spider(module.SPIDER)
scrapymanager.start()
if opts.output:
exporter.finish_exporting()
示例7: spider_opened
def spider_opened(self, spider):
# fichero de guardado
self.file = open('datos.xml', 'w+b')
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
示例8: spider_opened
def spider_opened(self, spider):
# fichero de guardado
self.file = open('entradas_no_etiquetadas.xml', 'w+b')
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
示例9: assertExportResult
def assertExportResult(self, item, expected_value):
fp = BytesIO()
ie = XmlItemExporter(fp)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
self.assertXmlEquivalent(fp.getvalue(), expected_value)
示例10: spider_opened
def spider_opened(self, spider):
# Creamos el fichero .xml
file = open('items.xml', 'w')
# Creamos la entrada al diccionario
self.files[spider] = file
# Establecemos el exportador xml
self.exporter = XmlItemExporter(file)
# Comenzamos a exportar
self.exporter.start_exporting()
示例11: spider_opened
def spider_opened(self, spider):
logtime = datetime.today()
file = open(
"%s/itemlog_%s_%s.xml" % (settings.get("LOG_DIR"), logtime.strftime("%Y-%m-%d_%H_%M"), spider.domain_name),
"w+b",
)
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
示例12: test_multivalued_fields
def test_multivalued_fields(self):
output = StringIO()
item = TestItem(name=[u'John\xa3', u'Doe'])
ie = XmlItemExporter(output)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
self.assertEqual(output.getvalue(), expected_value)
示例13: EuropythonXmlExport
class EuropythonXmlExport(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('europython_items.xml', 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例14: TagPipeline
class TagPipeline(object):
"""
Sólo exporta los posts con etiquetas (tags) definidas
"""
def __init__(self):
# Conexión de las señales de apertura y cierre del spider
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
# Crea el fichero para la exportación
self.file = open('posts_con_tags.xml', 'w+b')
# Inicializa el exportardor y comienza la exportación
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
# Termina la exportación
self.exporter.finish_exporting()
# Cierra el fichero
self.file.close()
def process_item(self, item, spider):
if item['etiquetas']:
# Al menos una etiqueta definida, exporta el item
self.exporter.export_item(item)
return item
示例15: RueventsPipeline
class RueventsPipeline(object):
def __init__(self):
self.duplicates = {}
self.files = {}
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.duplicates[spider]=set()
file = open('%s_items.xml' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
del self.duplicates[spider]
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if item['event_id'] in self.duplicates[spider]:
raise DropItem("Duplicate item found!")
else:
self.duplicates[spider].add(item['event_id'])
self.exporter.export_item(item)
return item