本文整理匯總了Python中db.DB.init_model方法的典型用法代碼示例。如果您正苦於以下問題:Python DB.init_model方法的具體用法?Python DB.init_model怎麽用?Python DB.init_model使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類db.DB
的用法示例。
在下文中一共展示了DB.init_model方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: CrawlerComics_3
# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import init_model [as 別名]
class CrawlerComics_3(CrawlerComics):
def __init__(self, verbose = False, id_task = None, mode = "0"):
self.verbose = verbose
# 0 -> complete
# 1 -> only updates and deletes
self.mode_complete = mode == "0"
self.parser = etree.HTMLParser()
#config
self.config = {}
config_file = os.path.join(os.path.dirname(__file__), "crawler_comics_3.conf")
execfile(config_file, self.config)
#logger
self.logger = logging.getLogger('CRAWLER')
hdlr = logging.handlers.TimedRotatingFileHandler(os.path.join(os.path.dirname(__file__), \
self.config['log_file'].replace(".log", "%s.log" % id_task)),"d",2)
hdlr.suffix = "-%s" % id_task if id_task else "%Y-%m-%d-%H-%M"
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
hdlr.setFormatter(formatter)
self.logger.addHandler(hdlr)
self.logger.setLevel(logging.INFO)
self.logger.info("[__init__]")
self.xpaths = {"name" : ['//*[contains(@class, "titprod")]/text()'],
"title" : ['//*[contains(@class, "titprod")]/text()'],
"labels_date" :['//*[contains(@class, "infoprod")]//text()', "(SALIDA|PUBLICACI[^:]*N):(.*)"],
"description" :['//*[contains(@class, "txtprod")]//text()'],
"extended_description" :['//*[contains(@class, "txtprod")]//text()'],
"price2" :['//*[contains(@class, "precio")]/text()'],
"thumbnail" :['//*[contains(@class, "imgbox")]/img/@src'],
"image1" :['//*[contains(@class, "imgbox")]/img/@src', "(.*)_156(\..*)"],
"extra_field_4" :['//*[contains(@class, "infoprod")]//text()', "FORMATO:(.*)"],
"extra_field_5" :['//*[contains(@class, "infoprod")]//text()', "FORMATO:(.*)"],
"extra_field_10_a" :['//*[contains(@class, "infoprod")]//text()', "GUI[^:]*N:(.*)"],
"extra_field_10_b" :['//*[contains(@class, "infoprod")]//text()', "DIBUJO:(.*)"],
"labels_categories" :['//*[contains(@class, "titulo")]//text()'],
"content" :['//*[contains(@class, "sec-prod")]//text()'],
}
#~ self.category_alias = {"BABEL" : "COMIC EUROPEO"
#~ , u"BD - Autores Européos" : "COMIC EUROPEO"
#~ , u"BD - AUTORES EUROPEOS" : "COMIC EUROPEO"
#~ , u"Colección Trazado" : "COMIC INDEPENDIENTE"
#~ , u"Cómics Clásicos" : "HUMOR"
#~ , u"Cómics Españoles" : u"COMIC ESPAÑOL"
#~ , u"Cómics Star Wars" : u"COMIC USA"
#~ , u"Guías Ilustradas Star Wars" : u"COMIC USA"
#~ , u"Independientes USA" : u"COMIC USA"
#~ , u"Novelas Star Wars" : u"COMIC USA"
#~ }
self.category_alias = {}
self.category_ban = {}
self.db = DB(self.logger, config_file)
self.db.init_model()
if not id_task:
self.id_task = self.db.start_new_task()
else:
self.id_task = int(id_task)
#initialite csv
self.filename_csv = os.path.join(os.path.dirname(__file__), "csv/%s" % self.config['csv_filename'] % self.id_task)
self.print_line(self.config["csv_header"], True)
self.cj = None
self.cj_fool = None
self.urls_seen = []
def init_metas(self, previous_metas = False):
self.metas = {"distributor" : self.config['distributor']
,"manufacturer" : self.config['distributor'], "tax_code" : "IVL", "extra_field_13": "Cambio" if previous_metas else "Novedad"}
def download_url(self, url, level = False):
if self.cj_fool is None:
self.cj_fool = cookielib.CookieJar()
cj = self.cj_fool
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', self.config['user_agent'])]
urllib2.install_opener(opener)
url = quote(url.encode("utf-8"),":/?=")
for b in re.findall(".*?(%[0-9a-zA-Z]{2}).*?", url):
url = url.replace(b, "")
req = urllib2.Request(url)
#.........這裏部分代碼省略.........
示例2: CrawlerComics_2
# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import init_model [as 別名]
class CrawlerComics_2(CrawlerComics):
def __init__(self, verbose = False, id_task = None, mode = "0"):
self.verbose = verbose
# 0 -> complete
# 1 -> only updates and deletes
self.mode_complete = mode == "0"
self.parser = etree.HTMLParser()
#config
self.config = {}
config_file = os.path.join(os.path.dirname(__file__), "crawler_comics_2.conf")
execfile(config_file, self.config)
#logger
self.logger = logging.getLogger('CRAWLER')
hdlr = logging.handlers.TimedRotatingFileHandler(os.path.join(os.path.dirname(__file__), \
self.config['log_file'].replace(".log", "%s.log" % id_task)),"d",2)
hdlr.suffix = "-%s" % id_task if id_task else "%Y-%m-%d-%H-%M"
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
hdlr.setFormatter(formatter)
self.logger.addHandler(hdlr)
self.logger.setLevel(logging.INFO)
self.logger.info("[__init__]")
self.xpaths = {"name": ['.//table/tr[1]/td/div[1]/text()'],
"title": ['.//table/tr[1]/td/div[1]/text()'],
"id": ['.//table/tr[6]/td[2]/text()'],
"mfgid": ['.//table/tr[6]/td[2]/text()'],
"extra_field_7": ['.//table/tr[5]/td[2]/text()'],
"extra_field_11": ['.//table/tr[5]/td[2]/text()'],
"subcategory": ['.//table/tr[11]/td[2]/text()'],
"extra_field_5": ['.//table/tr[7]/td[2]/text()'],
"extra_field_10": ['.//table/tr[2]/td[2]//text()'],
"extra_field_1": ['.//table/tr[12]/td[2]/text()'],
"extra_field_4": ['.//table/tr[8]/td[2]/text()'],
"price2": ['.//table/tr[3]/td[2]/text()'],
"description": ['.//table/tr[13]/td//text()'],
"extended_description": ['.//table/tr[13]/td//text()'],
"image1": ['.//img/@src'],
"thumbnail": ['.//img/@src'],
"content": ['.//text()']
}
self.category_alias = {"BABEL" : "COMIC EUROPEO"
, "Babel" : "COMIC EUROPEO"
, u"BD - Autores Européos" : "COMIC EUROPEO"
, u"BD - Autores Europeos" : "COMIC EUROPEO"
, u"BD - AUTORES EUROPEOS" : "COMIC EUROPEO"
, u"Colección Trazado" : "COMIC INDEPENDIENTE"
, u"Cómics Clásicos" : "HUMOR"
, u"Cómics Españoles" : u"COMIC ESPAÑOL"
, u"Cómics Star Wars" : u"COMIC USA"
, u"Guías Ilustradas Star Wars" : u"COMIC USA"
, u"Independientes USA" : u"COMIC USA"
, u"Novelas Star Wars" : u"COMIC USA"
}
self.category_ban = {"MERCHANDISING LOS MUERTOS VIVIENTES":"", "Merchandising Los Muertos Vivientes" : ""}
self.db = DB(self.logger, config_file)
self.db.init_model()
if not id_task:
self.id_task = self.db.start_new_task()
else:
self.id_task = int(id_task)
#initialite csv
self.filename_csv = os.path.join(os.path.dirname(__file__), "csv/%s" % self.config['csv_filename'] % self.id_task)
self.filename_xlsx = os.path.join(os.path.dirname(__file__), "csv/EXTERNAL_%s.xlsx" % self.id_task)
self.data_external_xml = None
self.print_line(self.config["csv_header"], True)
self.cj = None
def init_metas(self, previous_metas = False):
self.metas = {"distributor" : self.config['distributor'], "category": "COMICS"
,"manufacturer" : self.config['distributor'], "tax_code" : "IVL", "extra_field_13": "Cambio" if previous_metas else "Novedad"}
def download_url(self, url, level = False):
if self.cj is None:
self.cj = cookielib.CookieJar()
cj = self.cj
opener = register_openers()
# Start the multipart/form-data encoding of the file "DSC0001.jpg"
# "image1" is the name of the parameter, which is normally set
# via the "name" parameter of the HTML <input> tag.
#.........這裏部分代碼省略.........