本文整理汇总了Python中models.Product.hit_time方法的典型用法代码示例。如果您正苦于以下问题:Python Product.hit_time方法的具体用法?Python Product.hit_time怎么用?Python Product.hit_time使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Product
的用法示例。
在下文中一共展示了Product.hit_time方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crawl_listing
# 需要导入模块: from models import Product [as 别名]
# 或者: from models.Product import hit_time [as 别名]
def crawl_listing(self, url, ctx='', **kwargs):
res = requests.get(url)
res.raise_for_status()
tree = lxml.html.fromstring(res.content)
category = Category.objects(key=kwargs.get('key')).first()
if not category:
common_failed.send(sender=ctx, url=url, reason='category %s not found in db' % kwargs.get('key'))
return
product_nodes = tree.cssselect('div#searchResults a')
for product_node in product_nodes:
price = None; listprice = None
price = product_node.cssselect('.price-6pm')[0].text
listprice_node = product_node.cssselect('.discount')
listprice = ''.join(listprice_node[0].xpath('text()')) if listprice_node else None
# eliminate products of no discountIndexError:
if price is None or listprice is None:
# common_failed.send(sender=ctx, url=url, \
# reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
continue
key = product_node.get('data-product-id')
if not key:
common_failed.send(sender=ctx, url=url, reason='listing product has no key')
continue
combine_url = product_node.get('href')
key = '%s_%s' % (key, combine_url.split('/')[-1])
match = re.search(r'https?://.+', combine_url)
if not match:
combine_url = '%s%s' % (HOST, combine_url)
brand = product_node.cssselect('.brandName')[0].text.strip()
title = product_node.cssselect('.productName')[0].text.strip()
is_new = False; is_updated = False
product = Product.objects(key=key).first()
if not product:
is_new = True
product = Product(key=key)
product.updated = False
product.event_type = False
if title and title != product.title:
product.title = title
is_updated = True
if brand and brand != product.brand:
product.brand = brand
is_updated = True
if combine_url and combine_url != product.combine_url:
product.combine_url = combine_url
is_updated = True
if price and price != product.price:
product.price = price
is_updated = True
if listprice and listprice != product.listprice:
product.listprice = listprice
is_updated = True
if category.cats and set(category.cats).difference(product.dept):
product.dept = list(set(category.cats) | set(product.dept or []))
is_updated = True
if category.key not in product.category_key:
product.category_key.append(category.key)
is_updated = True
if is_updated:
product.list_update_time = datetime.utcnow()
# To pick the product which fit our needs, such as a certain discount, brand, dept etc.
selected = Picker(site='6pm').pick(product)
if not selected:
continue
product.hit_time = datetime.utcnow()
product.save()
common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
is_new=is_new, is_updated=((not is_new) and is_updated) )
print product.key; print product.brand; print product.title; \
print product.price, ' / ', product.listprice; print product.combine_url; \
print product.dept; print
# Go to the next page to keep on crawling.
next_page = None
page_node = tree.cssselect('div.pagination')
if not page_node:
return
last_node =page_node[0].cssselect('.last')
if last_node:
#.........这里部分代码省略.........