當前位置: 首頁>>代碼示例>>Python>>正文


Python DB.start_new_task方法代碼示例

本文整理匯總了Python中db.DB.start_new_task方法的典型用法代碼示例。如果您正苦於以下問題:Python DB.start_new_task方法的具體用法?Python DB.start_new_task怎麽用?Python DB.start_new_task使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在db.DB的用法示例。


在下文中一共展示了DB.start_new_task方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: Updater

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import start_new_task [as 別名]
class Updater(object):
	def __init__(self, verbose = False, id_task = None, supplier = None):
		
		self.verbose = verbose
		self.supplier = supplier
		
		
		
		self.config = {}
		config_file = os.path.join(os.path.dirname(__file__), "updater.conf")
		execfile(config_file, self.config)
		
		#logger
		self.logger = logging.getLogger('UPDATER')
		hdlr = logging.handlers.TimedRotatingFileHandler(os.path.join(os.path.dirname(__file__), \
		  self.config['log_file'].replace(".log", "%s.log" % id_task)),"d",2)
		hdlr.suffix = "-%s" % id_task if id_task else "%Y-%m-%d-%H-%M"
		formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
		hdlr.setFormatter(formatter)
		self.logger.addHandler(hdlr)
		self.logger.setLevel(logging.INFO)
		self.logger.info("[__init__]")
		
		#initialite DB
		self.db = DB(self.logger, config_file)
		
		if not id_task:
			self.id_task = self.db.start_new_task()
		else:
			self.id_task = int(id_task)
			
		self.name_supplier = self.db.get_name_supplier(self.supplier)
			
		#initialite csv
		self.filename_csv = os.path.join(os.path.dirname(__file__), "csv/%s" % self.config['csv_filename'] % (self.supplier, self.id_task))
		self.filename_stock_master = os.path.join(os.path.dirname(__file__), "csv/%s" % "STOCK_MASTER_%d.csv" % self.id_task)
		
		self.print_line(self.config["csv_header"], True)
		
	def get_metas_orderer(self, data):
		"""select metas required"""
		
		return [data[meta] for meta in self.config['csv_header'] if meta in data and data[meta]]
		  
	def print_line(self, line, header = False):
		"""print line in csv"""
		
		
		#~ pprint([str(i).replace(",", ".") if is_number(i) else i for i in line])
		#~ pprint([is_number(i) for i in line])
		with open(self.filename_csv, 'wb' if header else 'ab') as csvfile:
			csvwriter = csv.writer(csvfile, delimiter='\t',quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
			csvwriter.writerow(line)
		
	def download_stock_master(self):
		""" download csv to compare stock """
		connected = False
		tries = 0
		self.logger.info("[download_stock_master] Descargando...")
		while not connected:
			try:
				ftps = mFTP_TLS()
				ftps.connect(self.config['ftp_host'], port=990, timeout = 60)
				ftps.login(self.config['ftp_user'], self.config['ftp_pass'])
				ftps.prot_p()
				connected = True
			except:
				tries +=1
				if tries > 5:
					raise
				time.sleep(tries)
		
		
		ftps.retrbinary("RETR " + self.config['ftp_filename'] ,open(self.filename_stock_master, 'wb').write)
		
		ftps.quit()
		
	def load_data_stock(self):
		self.logger.info("[load_data_stock] leyendo...")
		
		self.data_stock = {}
		with open(self.filename_stock_master, 'rb') as f:
			reader = csv.reader(f)
			header = True
			for row in reader:
				if not header:
					data_line = dict(zip(self.config["csv_header"], [r.decode('iso-8859-1').encode('utf8') for r in row]))
					self.data_stock[data_line['id']] = data_line
				header = False
		
		

	def run(self):
		try:
			self.db.init_task(self.id_task)
			
			self.download_stock_master()
			self.load_data_stock()
			
			last_task = self.db.get_last_task_supplier(self.supplier)
#.........這裏部分代碼省略.........
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:103,代碼來源:updater.py

示例2: CrawlerComics_3

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import start_new_task [as 別名]
class CrawlerComics_3(CrawlerComics):
    def __init__(self, verbose = False, id_task = None, mode = "0"):
        self.verbose = verbose
        
        # 0 -> complete
        # 1 -> only updates and deletes
        self.mode_complete = mode == "0"

        self.parser = etree.HTMLParser()
        
        #config
        self.config = {}
        config_file = os.path.join(os.path.dirname(__file__), "crawler_comics_3.conf")
        execfile(config_file, self.config)
        
        #logger
        self.logger = logging.getLogger('CRAWLER')
        hdlr = logging.handlers.TimedRotatingFileHandler(os.path.join(os.path.dirname(__file__), \
          self.config['log_file'].replace(".log", "%s.log" % id_task)),"d",2)
        hdlr.suffix = "-%s" % id_task if id_task else "%Y-%m-%d-%H-%M"
        formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
        hdlr.setFormatter(formatter)
        self.logger.addHandler(hdlr)
        self.logger.setLevel(logging.INFO)
        self.logger.info("[__init__]")
        
        self.xpaths = {"name" : ['//*[contains(@class, "titprod")]/text()'],
            "title" : ['//*[contains(@class, "titprod")]/text()'],
            
            "labels_date" :['//*[contains(@class, "infoprod")]//text()', "(SALIDA|PUBLICACI[^:]*N):(.*)"],
            "description" :['//*[contains(@class, "txtprod")]//text()'],
            "extended_description" :['//*[contains(@class, "txtprod")]//text()'],
            "price2" :['//*[contains(@class, "precio")]/text()'],
            "thumbnail" :['//*[contains(@class, "imgbox")]/img/@src'],
            "image1" :['//*[contains(@class, "imgbox")]/img/@src', "(.*)_156(\..*)"],
            "extra_field_4" :['//*[contains(@class, "infoprod")]//text()', "FORMATO:(.*)"],
            "extra_field_5" :['//*[contains(@class, "infoprod")]//text()', "FORMATO:(.*)"],
            "extra_field_10_a" :['//*[contains(@class, "infoprod")]//text()', "GUI[^:]*N:(.*)"],
            "extra_field_10_b" :['//*[contains(@class, "infoprod")]//text()', "DIBUJO:(.*)"],
            "labels_categories" :['//*[contains(@class, "titulo")]//text()'],
            "content" :['//*[contains(@class, "sec-prod")]//text()'],
            }
        
        
        #~ self.category_alias = {"BABEL" : "COMIC EUROPEO"
            #~ , u"BD - Autores Européos" : "COMIC EUROPEO"
            #~ , u"BD - AUTORES EUROPEOS" : "COMIC EUROPEO"
            #~ , u"Colección Trazado" : "COMIC INDEPENDIENTE"
            #~ , u"Cómics Clásicos" : "HUMOR"
            #~ , u"Cómics Españoles" : u"COMIC ESPAÑOL"
            #~ , u"Cómics Star Wars" : u"COMIC USA"
            #~ , u"Guías Ilustradas Star Wars" : u"COMIC USA"
            #~ , u"Independientes USA" : u"COMIC USA"
            #~ , u"Novelas Star Wars" : u"COMIC USA"
            #~ }
            
        self.category_alias = {}
            
        self.category_ban = {}
        
        self.db = DB(self.logger, config_file)
        self.db.init_model()
        
        if not id_task:
            self.id_task = self.db.start_new_task()
        else:
            self.id_task = int(id_task)
            
        #initialite csv
        self.filename_csv = os.path.join(os.path.dirname(__file__), "csv/%s" % self.config['csv_filename'] % self.id_task)
        
        self.print_line(self.config["csv_header"], True)
        self.cj = None
        self.cj_fool = None
        
        self.urls_seen = []
        

    
    def init_metas(self, previous_metas = False):
        self.metas = {"distributor" : self.config['distributor']
        ,"manufacturer" : self.config['distributor'], "tax_code" : "IVL", "extra_field_13": "Cambio" if previous_metas else "Novedad"}
        
    def download_url(self, url, level = False):
        
        if self.cj_fool is None:
            self.cj_fool = cookielib.CookieJar()
        
        cj = self.cj_fool
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

        opener.addheaders = [('User-agent', self.config['user_agent'])]

        urllib2.install_opener(opener)
        
        url = quote(url.encode("utf-8"),":/?=")
        for b in re.findall(".*?(%[0-9a-zA-Z]{2}).*?", url):
            url = url.replace(b, "")
        
        req = urllib2.Request(url)
#.........這裏部分代碼省略.........
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:103,代碼來源:crawl_3.py

示例3: CrawlerComics_2

# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import start_new_task [as 別名]
class CrawlerComics_2(CrawlerComics):
    def __init__(self, verbose = False, id_task = None, mode = "0"):
        self.verbose = verbose
        
        # 0 -> complete
        # 1 -> only updates and deletes
        self.mode_complete = mode == "0"
        

        self.parser = etree.HTMLParser()
        
        #config
        self.config = {}
        config_file = os.path.join(os.path.dirname(__file__), "crawler_comics_2.conf")
        execfile(config_file, self.config)
        
        #logger
        self.logger = logging.getLogger('CRAWLER')
        hdlr = logging.handlers.TimedRotatingFileHandler(os.path.join(os.path.dirname(__file__), \
          self.config['log_file'].replace(".log", "%s.log" % id_task)),"d",2)
        hdlr.suffix = "-%s" % id_task if id_task else "%Y-%m-%d-%H-%M"
        formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
        hdlr.setFormatter(formatter)
        self.logger.addHandler(hdlr)
        self.logger.setLevel(logging.INFO)
        self.logger.info("[__init__]")
        
            
        self.xpaths = {"name": ['.//table/tr[1]/td/div[1]/text()'],
                "title": ['.//table/tr[1]/td/div[1]/text()'],
                "id": ['.//table/tr[6]/td[2]/text()'],
                "mfgid": ['.//table/tr[6]/td[2]/text()'],
                "extra_field_7": ['.//table/tr[5]/td[2]/text()'],
                "extra_field_11": ['.//table/tr[5]/td[2]/text()'],
                "subcategory": ['.//table/tr[11]/td[2]/text()'],
                "extra_field_5": ['.//table/tr[7]/td[2]/text()'],
                "extra_field_10": ['.//table/tr[2]/td[2]//text()'],
                "extra_field_1": ['.//table/tr[12]/td[2]/text()'],
                "extra_field_4": ['.//table/tr[8]/td[2]/text()'],
                "price2": ['.//table/tr[3]/td[2]/text()'],
                "description": ['.//table/tr[13]/td//text()'],
                "extended_description": ['.//table/tr[13]/td//text()'],
                "image1": ['.//img/@src'],
                "thumbnail": ['.//img/@src'],
                "content": ['.//text()']
            }
        
        
        
        self.category_alias = {"BABEL" : "COMIC EUROPEO"
            , "Babel" : "COMIC EUROPEO"
            , u"BD - Autores Européos" : "COMIC EUROPEO"
            , u"BD - Autores Europeos" : "COMIC EUROPEO"
            , u"BD - AUTORES EUROPEOS" : "COMIC EUROPEO"
            , u"Colección Trazado" : "COMIC INDEPENDIENTE"
            , u"Cómics Clásicos" : "HUMOR"
            , u"Cómics Españoles" : u"COMIC ESPAÑOL"
            , u"Cómics Star Wars" : u"COMIC USA"
            , u"Guías Ilustradas Star Wars" : u"COMIC USA"
            , u"Independientes USA" : u"COMIC USA"
            , u"Novelas Star Wars" : u"COMIC USA"
            }
        self.category_ban = {"MERCHANDISING LOS MUERTOS VIVIENTES":"", "Merchandising Los Muertos Vivientes" : ""}
        
        self.db = DB(self.logger, config_file)
        self.db.init_model()
        
        if not id_task:
            self.id_task = self.db.start_new_task()
        else:
            self.id_task = int(id_task)
            
        #initialite csv
        self.filename_csv = os.path.join(os.path.dirname(__file__), "csv/%s" % self.config['csv_filename'] % self.id_task)
        self.filename_xlsx = os.path.join(os.path.dirname(__file__), "csv/EXTERNAL_%s.xlsx" % self.id_task)
        self.data_external_xml = None
        
        self.print_line(self.config["csv_header"], True)
        self.cj = None
        

    
    def init_metas(self, previous_metas = False):
        self.metas = {"distributor" : self.config['distributor'], "category": "COMICS"
        ,"manufacturer" : self.config['distributor'], "tax_code" : "IVL", "extra_field_13": "Cambio" if previous_metas else "Novedad"}
        
        
    def download_url(self, url, level = False):
        
        if self.cj is None:
            self.cj = cookielib.CookieJar()
        
        cj = self.cj
        
        opener = register_openers()

        # Start the multipart/form-data encoding of the file "DSC0001.jpg"
        # "image1" is the name of the parameter, which is normally set
        # via the "name" parameter of the HTML <input> tag.

#.........這裏部分代碼省略.........
開發者ID:fcalo,項目名稱:crawler_comics,代碼行數:103,代碼來源:crawl_2.py


注:本文中的db.DB.start_new_task方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。