本文整理匯總了Python中db.DB.refresh_seen方法的典型用法代碼示例。如果您正苦於以下問題:Python DB.refresh_seen方法的具體用法?Python DB.refresh_seen怎麽用?Python DB.refresh_seen使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類db.DB
的用法示例。
在下文中一共展示了DB.refresh_seen方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: CrawlerComics_3
# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import refresh_seen [as 別名]
#.........這裏部分代碼省略.........
self.logger.info("[extract_product] %s" % url)
n_products = 0
data_url = self.download_url(url)
self.tree = etree.fromstring(data_url, self.parser)
proximamente = "novedades.php" in url
self.metas = self.db.load_data(url)
previous_metas = {}
if self.metas:
#date in pass?
now = datetime.now()
date_created = time.strptime(self.metas['extra_field_1'].strip(), "%d/%m/%Y")
d_created = datetime(date_created.tm_year, date_created.tm_mon, date_created.tm_mday)
if d_created < now and not proximamente and "PROXIMAMENTE" in self.metas['categories']:
#not modified but publish date exceeded
#to detect change
previous_metas['stock'] = self.metas['stock'] + "0"
else:
#has been seen before
content = normalize_content("".join(self.extracts(self.xpaths['content'][0])))
if crc32(content.strip().encode("utf-8")) == self.metas['crc_content']:
#no modifications
self.db.refresh_seen(url, self.id_task)
#ensure images
if self.config['check_images_without_changes']:
self.upload_images()
return 0
previous_metas['stock'] = self.metas['stock']
previous_metas['price'] = self.metas['price']
self.init_metas(previous_metas)
for meta, _xpath in self.xpaths.items():
xpath = _xpath[0]
extract = "".join([e for e in self.extracts(xpath) if isinstance(e, basestring)])
if not extract:
if self.verbose:
print "\t", meta, _xpath
continue
if self.verbose:
print meta, extract, _xpath
try:
if len(_xpath) > 1:
if meta == "image1":
self.metas[meta] = "".join(re.findall(_xpath[1],extract)[0])
print
else:
try:
示例2: CrawlerComics_2
# 需要導入模塊: from db import DB [as 別名]
# 或者: from db.DB import refresh_seen [as 別名]
#.........這裏部分代碼省略.........
self.init_metas()
try:
self.metas = dict(self.metas.items() + self.get_data_from_external_xml(_id).items())
except AttributeError:
#not finded
continue
id_product = self.metas['id']
previous_metas = self.db.load_data(id_product)
if previous_metas:
self.metas['extra_field_13'] = "Cambio"
#date in pass?
now = datetime.now()
date_created = time.strptime(self.metas['extra_field_1'].strip(), "%d/%m/%Y")
d_created = datetime(date_created.tm_year, date_created.tm_mon, date_created.tm_mday)
if d_created < now and "PROXIMAMENTE" in previous_metas['categories']:
#not modified but publish date exceeded
#to detect change
previous_metas['stock'] = self.metas['stock'] + "0"
else:
#has been seen before
content = normalize_content(self.metas['content'])
try:
if crc32(content.encode("utf-8")) == previous_metas['crc_content']:
#no modifications
self.db.refresh_seen(id_product, self.id_task)
#ensure images
if self.config['check_images_without_changes']:
self.upload_images()
n_products += 1
continue
except UnicodeDecodeError:
pass
n_products += self.process_metas(id_product, previous_metas = previous_metas)
return n_products
def extract_product(self, url):
"""extract metadata from product page"""
self.logger.info("[extract_product] %s" % url)
n_products = 0
data_url = self.download_url(url)
self.logger.info("[extract_product] descargada %s" % url)
f = open("a.html", "w")
f.write(data_url)
f.close()
self.tree = etree.fromstring(data_url, self.parser)
proximamente = "novedades.php" in url
products = self.extracts('//table[@id="llistat_comics"]/tr')