当前位置: 首页>>代码示例>>Python>>正文


Python XmlItemExporter.export_item方法代码示例

本文整理汇总了Python中scrapy.contrib.exporter.XmlItemExporter.export_item方法的典型用法代码示例。如果您正苦于以下问题:Python XmlItemExporter.export_item方法的具体用法?Python XmlItemExporter.export_item怎么用?Python XmlItemExporter.export_item使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.contrib.exporter.XmlItemExporter的用法示例。


在下文中一共展示了XmlItemExporter.export_item方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: XmlWritePipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlWritePipeline(object):
	"""docstring for XmlWritePipeline"""
	
	def __init__(self):
		pass
	
	@classmethod
	def from_crawler(cls,crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline
	
	def spider_opened(self, spider):
		self.file = open('bbsData.xml', 'wb')
		self.expoter = XmlItemExporter(self.file)
		self.expoter.start_exporting()
	
	def spider_closed(self, spider):
		self.expoter.finish_exporting()
		self.file.close()
		# process the crawled data, define and call dataProcess function
		# dataProcess('bbsData.xml', 'text.txt')
	
	def process_item(self, item, spider):
		self.expoter.export_item(item)
		return item
开发者ID:JaminQiang,项目名称:ScrapySpider,代码行数:29,代码来源:pipelines.py

示例2: FicheroXmlPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class FicheroXmlPipeline(object):
    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}
        file_tags = open('posts_con_tags.xml' , 'w+b')
        file_notags = open('posts_sin_tags.xml', 'w+b')
        self.files['tags'] = file_tags
        self.files['notags'] = file_notags
        self.exporter_tags = XmlItemExporter(file_tags)
        self.exporter_notags = XmlItemExporter(file_notags)

    def spider_opened(self, spider):        
        self.exporter_tags.start_exporting()
        self.exporter_notags.start_exporting()

    def spider_closed(self, spider):
        self.exporter_tags.finish_exporting()
        self.exporter_notags.finish_exporting()
        
        file = self.files.pop('tags')
        file.close()
        file = self.files.pop('notags')
        file.close()

    def process_item(self, item, spider):
        
        if item['tags']:
            self.exporter_tags.export_item(item)
        else:
            self.exporter_notags.export_item(item)
        return item
        
开发者ID:JoseVP,项目名称:Python-Avanzado,代码行数:34,代码来源:pipelines.py

示例3: XmlExportPipelineWithoutTags

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipelineWithoutTags(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file = open('%s_post_without_tags.xml' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = XmlItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):

        if item['tag'][0]=="":
			self.exporter.export_item(item)
        return item
开发者ID:Smi1984,项目名称:posts_scrapy,代码行数:31,代码来源:pipelines.py

示例4: conEtiquetaPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class conEtiquetaPipeline(object):
    """ Las entradas que tienen etiqueta las pasa al archivo entradas_etiquetadas.xml """

    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def spider_opened(self, spider):

        # fichero de guardado
        self.file = open('entradas_etiquetadas.xml', 'w+b')

        self.exporter = XmlItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
    
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
		""" si el item tiene una etiqueta lo exporta al archivo prefijado """
		if item['etiquetas']:
			self.exporter.export_item(item)
		return item
开发者ID:vencejo,项目名称:scrapyOSL,代码行数:27,代码来源:pipelines.py

示例5: RueventsPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class RueventsPipeline(object):
	def __init__(self):
		self.duplicates = {}
		self.files = {}
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)

	def spider_opened(self, spider):
		self.duplicates[spider]=set()
		file = open('%s_items.xml' % spider.name, 'w+b')
		self.files[spider] = file
		self.exporter = XmlItemExporter(file)
		self.exporter.start_exporting()
	
	def spider_closed(self, spider):
		del self.duplicates[spider]
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		if item['event_id'] in self.duplicates[spider]:
			raise DropItem("Duplicate item found!")
		else:
			self.duplicates[spider].add(item['event_id'])
			self.exporter.export_item(item)
			return item
开发者ID:dev-null00,项目名称:RUGoing,代码行数:29,代码来源:pipelines.py

示例6: XmlExportPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        f = open('%s_products.xml' % spider.name, 'w+b')
        self.files[spider] = f
        self.exporter = XmlItemExporter(f)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        f = self.files.pop(spider)
        f.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
开发者ID:CyberKronos,项目名称:Gamehub,代码行数:28,代码来源:pipelines.py

示例7: EuropythonXmlExport

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class EuropythonXmlExport(object):
	
	def __init__(self):
		self.files = {}

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		file = open('europython_items.xml', 'w+b')
		self.files[spider] = file
		self.exporter = XmlItemExporter(file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
开发者ID:jmortega,项目名称:europython-scrapy,代码行数:28,代码来源:pipelines.py

示例8: TagPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class TagPipeline(object):
    """
        Sólo exporta los posts con etiquetas (tags) definidas
    """

    def __init__(self):
        # Conexión de las señales de apertura y cierre del spider
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def spider_opened(self, spider):

        # Crea el fichero para la exportación
        self.file = open('posts_con_tags.xml', 'w+b')

        # Inicializa el exportardor y comienza la exportación
        self.exporter = XmlItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
    
        # Termina la exportación
        self.exporter.finish_exporting()

        # Cierra el fichero
        self.file.close()

    def process_item(self, item, spider):

        if item['etiquetas']:

            # Al menos una etiqueta definida, exporta el item
            self.exporter.export_item(item)

        return item
开发者ID:camador,项目名称:curso_python,代码行数:37,代码来源:pipelines.py

示例9: OfficielebekendmakingenPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class OfficielebekendmakingenPipeline(object):
    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        logtime = datetime.today()
        file = open(
            "%s/itemlog_%s_%s.xml" % (settings.get("LOG_DIR"), logtime.strftime("%Y-%m-%d_%H_%M"), spider.domain_name),
            "w+b",
        )
        self.files[spider] = file
        self.exporter = XmlItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, spider, item):
        for field in item:
            if item[field]:
                item[field] = item[field][0]
        self.exporter.export_item(item)
        return item
开发者ID:justinvw,项目名称:officiele-bekendmakingen-scraper,代码行数:29,代码来源:pipelines.py

示例10: DamePostPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class DamePostPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file1 = open('%s.xml' % spider.name, 'w+b')
        file2 = open('%s_without_tags.xml' % spider.name, 'w+b')

        self.files[spider] = [file1,file2]
        self.exporter1 = XmlItemExporter(file1)
        self.exporter2 = XmlItemExporter(file2)
        self.exporter1.start_exporting()
        self.exporter2.start_exporting()

    def spider_closed(self, spider):
        self.exporter1.finish_exporting()
        self.exporter2.finish_exporting()
        files = self.files.pop(spider)
        files[0].close()
        files[1].close()

    def process_item(self, item, spider):
        if not item['tag_list']:
            self.exporter2.export_item(item)
        else:
            self.exporter1.export_item(item)
        return item
开发者ID:asce,项目名称:python2,代码行数:37,代码来源:pipelines.py

示例11: XmlExportWithOutLabels

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportWithOutLabels(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file = open('postUGR_withOutLabel.xml', 'w+b')
        self.files[spider] = file
        self.exporter = XmlItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
    	if not item['etiquetas']:
    		self.exporter.export_item(item)
    	return item
开发者ID:jmortega,项目名称:python-scrapy,代码行数:29,代码来源:pipelines.py

示例12: guardadoXMLPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class guardadoXMLPipeline(object):

    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def spider_opened(self, spider):

        # fichero de guardado
        self.file = open('datos.xml', 'w+b')

        self.exporter = XmlItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
    
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        """Solo procesa las imagenes y se deshace de las referencias a los videos """
        if item['ruta_imagen']:
            self.exporter.export_item(item)
            return item
        else:
            raise DropItem("Este dia no hay imagen  %s" % item)
开发者ID:vencejo,项目名称:Visor-APOD,代码行数:28,代码来源:pipelines.py

示例13: assertExportResult

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
 def assertExportResult(self, item, expected_value):
     fp = BytesIO()
     ie = XmlItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertXmlEquivalent(fp.getvalue(), expected_value)
开发者ID:505555998,项目名称:scrapy,代码行数:9,代码来源:test_contrib_exporter.py

示例14: test_multivalued_fields

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
 def test_multivalued_fields(self):
     output = StringIO()
     item = TestItem(name=[u'John\xa3', u'Doe'])
     ie = XmlItemExporter(output)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
     self.assertEqual(output.getvalue(), expected_value)
开发者ID:AJamesPhillips,项目名称:scrapy,代码行数:11,代码来源:test_contrib_exporter.py

示例15: XmlExportPipeline

# 需要导入模块: from scrapy.contrib.exporter import XmlItemExporter [as 别名]
# 或者: from scrapy.contrib.exporter.XmlItemExporter import export_item [as 别名]
class XmlExportPipeline(object):
    def __init__(self):
        self.files_si = open("entradas_con_tags.xml", "w+b")
        self.files_no = open("entradas_sin_tags.xml", "w+b")
        self.exporter_si = XmlItemExporter(self.files_si)
        self.exporter_no = XmlItemExporter(self.files_no)
        self.exporter_si.start_exporting()
        self.exporter_no.start_exporting()

    def process_item(self, item, spider):
        if len(item["tags"]) == 0:
            self.exporter_no.export_item(item)
        else:
            self.exporter_si.export_item(item)
        return item
开发者ID:fvictor,项目名称:oslscrapy,代码行数:17,代码来源:pipelines.py


注:本文中的scrapy.contrib.exporter.XmlItemExporter.export_item方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。