本文整理汇总了Python中scrapy.contrib.exporter.XmlItemExporter.export_item方法的典型用法代码示例。如果您正苦于以下问题:Python XmlItemExporter.export_item方法的具体用法?Python XmlItemExporter.export_item怎么用?Python XmlItemExporter.export_item使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.contrib.exporter.XmlItemExporter
的用法示例。
在下文中一共展示了XmlItemExporter.export_item方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: XmlWritePipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlWritePipeline(object):
"""docstring for XmlWritePipeline"""
def __init__(self):
pass
@classmethod
def from_crawler(cls,crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.file = open('bbsData.xml', 'wb')
self.expoter = XmlItemExporter(self.file)
self.expoter.start_exporting()
def spider_closed(self, spider):
self.expoter.finish_exporting()
self.file.close()
# process the crawled data, define and call dataProcess function
# dataProcess('bbsData.xml', 'text.txt')
def process_item(self, item, spider):
self.expoter.export_item(item)
return item
示例2: FicheroXmlPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class FicheroXmlPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
file_tags = open('posts_con_tags.xml' , 'w+b')
file_notags = open('posts_sin_tags.xml', 'w+b')
self.files['tags'] = file_tags
self.files['notags'] = file_notags
self.exporter_tags = XmlItemExporter(file_tags)
self.exporter_notags = XmlItemExporter(file_notags)
def spider_opened(self, spider):
self.exporter_tags.start_exporting()
self.exporter_notags.start_exporting()
def spider_closed(self, spider):
self.exporter_tags.finish_exporting()
self.exporter_notags.finish_exporting()
file = self.files.pop('tags')
file.close()
file = self.files.pop('notags')
file.close()
def process_item(self, item, spider):
if item['tags']:
self.exporter_tags.export_item(item)
else:
self.exporter_notags.export_item(item)
return item
示例3: XmlExportPipelineWithoutTags
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipelineWithoutTags(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_post_without_tags.xml' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if item['tag'][0]=="":
self.exporter.export_item(item)
return item
示例4: conEtiquetaPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class conEtiquetaPipeline(object):
""" Las entradas que tienen etiqueta las pasa al archivo entradas_etiquetadas.xml """
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
# fichero de guardado
self.file = open('entradas_etiquetadas.xml', 'w+b')
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
""" si el item tiene una etiqueta lo exporta al archivo prefijado """
if item['etiquetas']:
self.exporter.export_item(item)
return item
示例5: RueventsPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class RueventsPipeline(object):
def __init__(self):
self.duplicates = {}
self.files = {}
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
self.duplicates[spider]=set()
file = open('%s_items.xml' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
del self.duplicates[spider]
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if item['event_id'] in self.duplicates[spider]:
raise DropItem("Duplicate item found!")
else:
self.duplicates[spider].add(item['event_id'])
self.exporter.export_item(item)
return item
示例6: XmlExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
f = open('%s_products.xml' % spider.name, 'w+b')
self.files[spider] = f
self.exporter = XmlItemExporter(f)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
f = self.files.pop(spider)
f.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例7: EuropythonXmlExport
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class EuropythonXmlExport(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('europython_items.xml', 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
示例8: TagPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class TagPipeline(object):
"""
Sólo exporta los posts con etiquetas (tags) definidas
"""
def __init__(self):
# Conexión de las señales de apertura y cierre del spider
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
# Crea el fichero para la exportación
self.file = open('posts_con_tags.xml', 'w+b')
# Inicializa el exportardor y comienza la exportación
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
# Termina la exportación
self.exporter.finish_exporting()
# Cierra el fichero
self.file.close()
def process_item(self, item, spider):
if item['etiquetas']:
# Al menos una etiqueta definida, exporta el item
self.exporter.export_item(item)
return item
示例9: OfficielebekendmakingenPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class OfficielebekendmakingenPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
def spider_opened(self, spider):
logtime = datetime.today()
file = open(
"%s/itemlog_%s_%s.xml" % (settings.get("LOG_DIR"), logtime.strftime("%Y-%m-%d_%H_%M"), spider.domain_name),
"w+b",
)
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, spider, item):
for field in item:
if item[field]:
item[field] = item[field][0]
self.exporter.export_item(item)
return item
示例10: DamePostPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class DamePostPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file1 = open('%s.xml' % spider.name, 'w+b')
file2 = open('%s_without_tags.xml' % spider.name, 'w+b')
self.files[spider] = [file1,file2]
self.exporter1 = XmlItemExporter(file1)
self.exporter2 = XmlItemExporter(file2)
self.exporter1.start_exporting()
self.exporter2.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
files = self.files.pop(spider)
files[0].close()
files[1].close()
def process_item(self, item, spider):
if not item['tag_list']:
self.exporter2.export_item(item)
else:
self.exporter1.export_item(item)
return item
示例11: XmlExportWithOutLabels
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportWithOutLabels(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('postUGR_withOutLabel.xml', 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
if not item['etiquetas']:
self.exporter.export_item(item)
return item
示例12: guardadoXMLPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class guardadoXMLPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_opened(self, spider):
# fichero de guardado
self.file = open('datos.xml', 'w+b')
self.exporter = XmlItemExporter(self.file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
"""Solo procesa las imagenes y se deshace de las referencias a los videos """
if item['ruta_imagen']:
self.exporter.export_item(item)
return item
else:
raise DropItem("Este dia no hay imagen %s" % item)
示例13: assertExportResult
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
def assertExportResult(self, item, expected_value):
fp = BytesIO()
ie = XmlItemExporter(fp)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
self.assertXmlEquivalent(fp.getvalue(), expected_value)
示例14: test_multivalued_fields
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
def test_multivalued_fields(self):
output = StringIO()
item = TestItem(name=[u'John\xa3', u'Doe'])
ie = XmlItemExporter(output)
ie.start_exporting()
ie.export_item(item)
ie.finish_exporting()
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
self.assertEqual(output.getvalue(), expected_value)
示例15: XmlExportPipeline
# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipeline(object):
def __init__(self):
self.files_si = open("entradas_con_tags.xml", "w+b")
self.files_no = open("entradas_sin_tags.xml", "w+b")
self.exporter_si = XmlItemExporter(self.files_si)
self.exporter_no = XmlItemExporter(self.files_no)
self.exporter_si.start_exporting()
self.exporter_no.start_exporting()
def process_item(self, item, spider):
if len(item["tags"]) == 0:
self.exporter_no.export_item(item)
else:
self.exporter_si.export_item(item)
return item