当前位置: 首页>>代码示例>>Python>>正文


Python exporter.XmlItemExporter类代码示例

本文整理汇总了Python中scrapy.contrib.exporter.XmlItemExporter的典型用法代码示例。如果您正苦于以下问题:Python XmlItemExporter类的具体用法?Python XmlItemExporter怎么用?Python XmlItemExporter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了XmlItemExporter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

 def __init__(self):
     self.files_si = open("entradas_con_tags.xml", "w+b")
     self.files_no = open("entradas_sin_tags.xml", "w+b")
     self.exporter_si = XmlItemExporter(self.files_si)
     self.exporter_no = XmlItemExporter(self.files_no)
     self.exporter_si.start_exporting()
     self.exporter_no.start_exporting()
开发者ID:fvictor,项目名称:oslscrapy,代码行数:7,代码来源:pipelines.py

示例2: spider_opened

    def spider_opened(self, spider):
        file1 = open('%s.xml' % spider.name, 'w+b')
        file2 = open('%s_without_tags.xml' % spider.name, 'w+b')

        self.files[spider] = [file1,file2]
        self.exporter1 = XmlItemExporter(file1)
        self.exporter2 = XmlItemExporter(file2)
        self.exporter1.start_exporting()
        self.exporter2.start_exporting()
开发者ID:asce,项目名称:python2,代码行数:9,代码来源:pipelines.py

示例3: __init__

 def __init__(self):
     dispatcher.connect(self.spider_opened, signals.spider_opened)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     self.files = {}
     file_tags = open('posts_con_tags.xml' , 'w+b')
     file_notags = open('posts_sin_tags.xml', 'w+b')
     self.files['tags'] = file_tags
     self.files['notags'] = file_notags
     self.exporter_tags = XmlItemExporter(file_tags)
     self.exporter_notags = XmlItemExporter(file_notags)
开发者ID:JoseVP,项目名称:Python-Avanzado,代码行数:10,代码来源:pipelines.py

示例4: run

 def run(self, args, opts):
     if len(args) != 1:
         return False
     if opts.output:
         file = open(opts.output, 'w+b')
         exporter = XmlItemExporter(file)
         dispatcher.connect(exporter.export_item, signal=signals.item_passed)
         exporter.start_exporting()
     module = _import_file(args[0])
     scrapymanager.runonce(module.SPIDER)
     if opts.output:
         exporter.finish_exporting()
开发者ID:serkanh,项目名称:scrapy,代码行数:12,代码来源:runspider.py

示例5: get_new_fileexporter

	def get_new_fileexporter (self, item, spider):
		#get the owner
		try:
			owner = item['ownerKey'][0]
		except:
			owner = 'other'
	
		log.msg('creating fileExporter for %s' % (owner), level=log.INFO)
		
		#close any existing exporters and files
		if owner in self.fileExporters[spider]:
			fileExporter = self.fileExporters[spider][owner]
			file = fileExporter['file']
			exporter = fileExporter['exporter']
			exporter.finish_exporting()
			file.close()
		
		dir = '/'.join([spider.folder, owner])	
		
		# one batch per time that the spider has been resumed
		#batch = self.resumeCount[spider] #spider.state['resume_count']
		
		
		if owner in self.seq[spider]:
			seq = self.seq[spider][owner]
			seq += 1
			log.msg('owner %s exists, incrementing count %d' % (owner, seq), level=log.DEBUG)
			self.seq[spider][owner] = seq
		
		else:
			seq = self.seq[spider][owner] = 1
			log.msg('owner NOT %s exist, incrementing count %d' % (owner, seq), level=log.DEBUG)
		
		#ignore seq and use timestamp to allow job to resume withou having to track sequence
		filename =  '%s%s/%s_%d.xml' % (settings['ADAPTFM_OUTPUT_PATH'], dir, spider.name, time.time())

		if not os.path.isdir (os.path.dirname(filename)):
			os.mkdir(os.path.dirname(filename))
			
		# spider.currentFilename = filename	
		file = open(filename, 'w+b')
		
		# start exporting
		exporter = XmlItemExporter(file)
		exporter.start_exporting()
		
		fileExporter = {'exporter': exporter, 'file':file}
		# add to spider/owner
		self.fileExporters[spider][owner] = fileExporter
		
		log.msg('get_new_fileexporter %s' % (filename), level=log.DEBUG)
		
		return fileExporter
开发者ID:solaise73,项目名称:adaptfm,代码行数:53,代码来源:pipelinesv2.py

示例6: run

    def run(self, args, opts):
        if len(args) != 1:
            return False
        if opts.output:
            file = open(opts.output, 'w+b')
            exporter = XmlItemExporter(file)
            dispatcher.connect(exporter.export_item, signal=signals.item_passed)
            exporter.start_exporting()
        module = _import_file(args[0])

        # schedule spider and start engine
        scrapymanager.queue.append_spider(module.SPIDER)
        scrapymanager.start()

        if opts.output:
            exporter.finish_exporting()
开发者ID:kenzouyeh,项目名称:scrapy,代码行数:16,代码来源:runspider.py

示例7: spider_opened

    def spider_opened(self, spider):

        # fichero de guardado
        self.file = open('datos.xml', 'w+b')

        self.exporter = XmlItemExporter(self.file)
        self.exporter.start_exporting()
开发者ID:vencejo,项目名称:Visor-APOD,代码行数:7,代码来源:pipelines.py

示例8: spider_opened

	def spider_opened(self, spider):
		
		# fichero de guardado
		self.file = open('entradas_no_etiquetadas.xml', 'w+b')
		
		self.exporter = XmlItemExporter(self.file)
		self.exporter.start_exporting()
开发者ID:vencejo,项目名称:scrapyOSL,代码行数:7,代码来源:pipelines.py

示例9: assertExportResult

 def assertExportResult(self, item, expected_value):
     fp = BytesIO()
     ie = XmlItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertXmlEquivalent(fp.getvalue(), expected_value)
开发者ID:505555998,项目名称:scrapy,代码行数:7,代码来源:test_contrib_exporter.py

示例10: spider_opened

	def spider_opened(self, spider):
		# Creamos el fichero .xml
		file = open('items.xml', 'w')
		# Creamos la entrada al diccionario
		self.files[spider] = file
		# Establecemos el exportador xml
		self.exporter = XmlItemExporter(file)
		# Comenzamos a exportar
		self.exporter.start_exporting()
开发者ID:WanManolo,项目名称:oslPostReader,代码行数:9,代码来源:pipelines.py

示例11: spider_opened

 def spider_opened(self, spider):
     logtime = datetime.today()
     file = open(
         "%s/itemlog_%s_%s.xml" % (settings.get("LOG_DIR"), logtime.strftime("%Y-%m-%d_%H_%M"), spider.domain_name),
         "w+b",
     )
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
开发者ID:justinvw,项目名称:officiele-bekendmakingen-scraper,代码行数:9,代码来源:pipelines.py

示例12: test_multivalued_fields

 def test_multivalued_fields(self):
     output = StringIO()
     item = TestItem(name=[u'John\xa3', u'Doe'])
     ie = XmlItemExporter(output)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
     self.assertEqual(output.getvalue(), expected_value)
开发者ID:AJamesPhillips,项目名称:scrapy,代码行数:9,代码来源:test_contrib_exporter.py

示例13: EuropythonXmlExport

class EuropythonXmlExport(object):
	
	def __init__(self):
		self.files = {}

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		file = open('europython_items.xml', 'w+b')
		self.files[spider] = file
		self.exporter = XmlItemExporter(file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
开发者ID:jmortega,项目名称:europython-scrapy,代码行数:26,代码来源:pipelines.py

示例14: TagPipeline

class TagPipeline(object):
    """
        Sólo exporta los posts con etiquetas (tags) definidas
    """

    def __init__(self):
        # Conexión de las señales de apertura y cierre del spider
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def spider_opened(self, spider):

        # Crea el fichero para la exportación
        self.file = open('posts_con_tags.xml', 'w+b')

        # Inicializa el exportardor y comienza la exportación
        self.exporter = XmlItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
    
        # Termina la exportación
        self.exporter.finish_exporting()

        # Cierra el fichero
        self.file.close()

    def process_item(self, item, spider):

        if item['etiquetas']:

            # Al menos una etiqueta definida, exporta el item
            self.exporter.export_item(item)

        return item
开发者ID:camador,项目名称:curso_python,代码行数:35,代码来源:pipelines.py

示例15: RueventsPipeline

class RueventsPipeline(object):
	def __init__(self):
		self.duplicates = {}
		self.files = {}
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)

	def spider_opened(self, spider):
		self.duplicates[spider]=set()
		file = open('%s_items.xml' % spider.name, 'w+b')
		self.files[spider] = file
		self.exporter = XmlItemExporter(file)
		self.exporter.start_exporting()
	
	def spider_closed(self, spider):
		del self.duplicates[spider]
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		if item['event_id'] in self.duplicates[spider]:
			raise DropItem("Duplicate item found!")
		else:
			self.duplicates[spider].add(item['event_id'])
			self.exporter.export_item(item)
			return item
开发者ID:dev-null00,项目名称:RUGoing,代码行数:27,代码来源:pipelines.py


注:本文中的scrapy.contrib.exporter.XmlItemExporter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。