當前位置: 首頁>>代碼示例>>Python>>正文


Python DB.finish_task方法代碼示例

本文整理匯總了Python中db.DB.finish_task方法的典型用法代碼示例。如果您正苦於以下問題:Python DB.finish_task方法的具體用法?Python DB.finish_task怎麽用?Python DB.finish_task使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在db.DB的用法示例。


在下文中一共展示了DB.finish_task方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: Updater

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import finish_task [as 別名]

#.........這裏部分代碼省略.........
		
		
		ftps.retrbinary("RETR " + self.config['ftp_filename'] ,open(self.filename_stock_master, 'wb').write)
		
		ftps.quit()
		
	def load_data_stock(self):
		self.logger.info("[load_data_stock] leyendo...")
		
		self.data_stock = {}
		with open(self.filename_stock_master, 'rb') as f:
			reader = csv.reader(f)
			header = True
			for row in reader:
				if not header:
					data_line = dict(zip(self.config["csv_header"], [r.decode('iso-8859-1').encode('utf8') for r in row]))
					self.data_stock[data_line['id']] = data_line
				header = False
		
		

	def run(self):
		try:
			self.db.init_task(self.id_task)
			
			self.download_stock_master()
			self.load_data_stock()
			
			last_task = self.db.get_last_task_supplier(self.supplier)
			self.logger.info("[run] generando %s" % self.supplier)
			
			ids = []
			
			for data in self.db.get_data_supplier(self.supplier):
				if data['id'] in self.data_stock:
					data_master_stock = self.data_stock[data['id']]
					if data['id'] in ids:
						#url change
						continue
					ids.append(data['id'])
					# stock checks
					print data['id'], last_task, data['last_seen_task']
					if last_task > data['last_seen_task'] and int(data_master_stock['stock']) > 9:
						data_master_stock['catalogid'] = "-%s" % data_master_stock['catalogid']
					
					if data_master_stock['stock'] in ['0', '10', '40']:
						if data['stock'] != 40:
							data_master_stock['stock'] = data['stock']
					
					data_master_stock['instock_message'] = "Pre-Reserva" if data_master_stock['stock'] == "40" \
						else "Añadir a Lista de Espera" if data_master_stock['stock'] == "0" \
						else "Envío 5 a 7 Días" if data_master_stock['stock'] == "10" \
						else "En Stock - 48 Horas" 
					
					
					if not 'categories' in data_master_stock:	
						data_master_stock['categories'] = data['categories']
						data['control'] = ""
					else:
						data['control'] = "" if data_master_stock['categories'] == data['categories'] else "novedad"
					
					
					
					data_master_stock['distributor'] = self.name_supplier
					
					self.print_line(self.get_metas_orderer(data_master_stock))
				else:
					#~ self.print_line(self.get_metas_orderer(data))
					pass
					
			#from master
			self.logger.info("[run] buscando desaparecidos en origen %s" % self.supplier)
			for data in self.data_stock.values():
				if 'distributor' in data and data['distributor'] == self.name_supplier and not data['id'] in ids:
					
					if data['stock'] == "0":
						data['catalogid'] = "-%s" % data['catalogid']
					
					data['instock_message'] = "Pre-Reserva" if data['stock'] == "40" \
						else "Añadir a Lista de Espera" if data['stock'] == "0" \
						else "Envío 5 a 7 Días" if data['stock'] == "10" \
						else "En Stock - 48 Horas" 
						
					if not 'categories' in data:
						data['categories'] = ""
					self.print_line(self.get_metas_orderer(data))
			
			
			self.logger.info("[run] %s generado" % self.supplier)
				
			self.db.finish_task(self.id_task)
		except Exception as e:
			self.db.finish_task(self.id_task, True)
			
			exc_type, exc_obj, exc_tb = sys.exc_info()
			#~ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
			#~ print(exc_type, fname, exc_tb.tb_lineno)
			#~ 
			self.logger.error("%s\n %d: %s" %(traceback.format_exc(), exc_tb.tb_lineno, str(e)))
			raise
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:104,代碼來源:updater.py

示例2: CrawlerComics_3

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import finish_task [as 別名]

#.........這裏部分代碼省略.........
                        except UnicodeDecodeError:
                            pass
                            #~ print meta
                            #~ print repr(self.metas[meta])
                            #~ raise
                        
            #~ print meta, self.metas[meta]
            
        
        
        
        
        self.db.save_data(url, self.metas, self.id_task)
        self.upload_images()
        self.urls_seen.append(url)
        
        return 1
        
    def get_external(self, name):
        
        html = self.download_url_login(self.config['url_search'] % name)
        
        tree = etree.fromstring(html, self.parser)
        
        find = etree.XPath('//*[@id="aspnetForm"]/div[2]/div/div[3]/div/div/div[3]/ul//li')
        products = find(tree)
        for product in products:
            t = clean_spaces(product.xpath(".//div/div/div[1]/span//text()")[0])
            if t == name:
                #todo: ?
                in_novedades = "novedad" in  "".join(product.xpath(".//text()"))
                
                isbn = "".join(product.xpath(".//div/div/div[4]/text()")).split()[-1]
                
                return isbn, in_novedades
                
        
        return None, None
        
        
    def extract_category(self, url, second_level = False):
        """ crawl a category page"""
        
        if url in self.urls_seen:
            return False
        
        html = self.download_url(url)
        tree = etree.fromstring(html, self.parser)
        
        f = open("a.html", "w")
        f.write(html)
        f.close()
    
        if second_level:
            find = etree.XPath('//a/@href')
        else:
            find = etree.XPath('//*[contains(@class, "categorias")]//a/@href')
        links = find(tree)
        self.logger.info("[extract_category] recorriendo %s" % url)
        
        self.urls_seen.append(url)
        
        
        for link in links:
            if "/comics/" in link and (not second_level or "?p=" in link):
                self.extract_category(link, True)
            if "/comic/" in link and second_level:
                self.extract_product(link)
                
        
            
    def run(self):
        """start complete crawler"""
        
        self.logger.info("[run] iniciando(Completo=%s)" % self.mode_complete)
        
        try:
            self.db.init_task(self.id_task)
            
            
            html = self.download_url(self.config['discover_url'])
            tree = etree.fromstring(html, self.parser)
        
            find = etree.XPath('//a/@href')
            links = find(tree)
            for link in links:
                if "/comics/" in link :
                    self.logger.info("[run] recorriendo %s" % link)
                    self.extract_category(link)
                    
            self.generate_csv()
            
            self.db.finish_task(self.id_task)
        except Exception as e:
            self.db.finish_task(self.id_task, True)
            
            exc_type, exc_obj, exc_tb = sys.exc_info()

            self.logger.error("%s\n %d: %s" %(traceback.format_exc(), exc_tb.tb_lineno, str(e)))
            raise
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:104,代碼來源:crawl_3.py

示例3: CrawlerComics_2

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import finish_task [as 別名]

#.........這裏部分代碼省略.........
        #keywords & metatags
        keys_keywords = ["category", "subcategory", "manufacturer", "title", "extra_field_10", "extra_field_3"]
        self.metas['keywords'] = ", ".join(self.metas[i].strip() \
            for i in keys_keywords if i in self.metas and isinstance(self.metas[i], basestring) \
            and len(self.metas[i])>1)
        
        self.metas['extra_field_7'] = "<div>%s</div>" % self.metas['extra_field_7']
        
        def cut_last_comma(s):
            if s[-1] == ",":
                s = s[:-1]
            if len(s) > 1 and s[-2] == ", ":
                s = s[:-2]
            return s
        
        self.metas['keywords'] = cut_last_comma(self.metas['keywords'])
        if 'extra_field_10' in self.metas:
            self.metas['extra_field_10'] = cut_last_comma(self.metas['extra_field_10'])
        
        self.metas['metatags'] = '<META NAME="KEYWORDS" CONTENT="%s">' % self.metas['keywords']
        
        if previous_metas:
            #has been seen already
            if previous_metas['stock'] == self.metas['stock'] and previous_metas['price'] == self.metas['price']:
                #has modifications but not in price or stock. Dont update.
                return 0
                
        #extra_field_11  
        if 'extra_field_11' in self.metas and self.metas['extra_field_11']:
            self.metas['extra_field_11'] = "<div>%s</div>" % self.metas['extra_field_11']
        
        self.metas['price2'] = self.metas['price2'].replace(",", ".")
        
        self.metas['content'] =  normalize_content(self.metas['content'])
        
        
        for meta in self.metas:
            if isinstance(self.metas[meta],float):
                self.metas[meta] = str(round(self.metas[meta],2))
            else:
                if isinstance(self.metas[meta],basestring):
                    try:
                        self.metas[meta] = self.metas[meta].encode("utf-8")
                    except UnicodeDecodeError:
                        pass
            #~ print meta, self.metas[meta]
            
        
            
        self.db.save_data(id_product, self.metas, self.id_task)
        self.upload_images()
        
        return 1

    def run(self):
        """start complete crawler"""
        
        
        self.logger.info("[run] iniciando(Completo=%s)" % self.mode_complete)
        
        try:
            self.db.init_task(self.id_task)
            
            
            for url_discover in self.config['discover_urls']:
                page = 1
                n_products = 1
                self.last_first_id = None
                while n_products > 0:
                    if page > 1 and not "%d" in url_discover:
                        #~ print "Saliendo", url_discover
                        break
                    try:
                        url = url_discover % page
                    except TypeError:
                        url = url_discover
                        
                    self.logger.info("[run] recorriendo %s" % url)
                    
                    if "campana" in url:
                        n_products = self.extract_product_campana(url)
                    else:
                        n_products = self.extract_product(url)
                        
                
                    page += 1
                    #~ if page > 2: break;
                    self.logger.info("[run] extraidos %d productos de %s" % (n_products, url))
                    
                    
            self.generate_csv()
            
            self.db.finish_task(self.id_task)
        except Exception as e:
            self.db.finish_task(self.id_task, True)
            
            exc_type, exc_obj, exc_tb = sys.exc_info()

            self.logger.error("%s\n %d: %s" %(traceback.format_exc(), exc_tb.tb_lineno, str(e)))
            raise
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:104,代碼來源:crawl_2.py


注:本文中的db.DB.finish_task方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。