本文整理汇总了Python中models.Product.dept方法的典型用法代码示例。如果您正苦于以下问题:Python Product.dept方法的具体用法?Python Product.dept怎么用?Python Product.dept使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Product
的用法示例。
在下文中一共展示了Product.dept方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crawl_listing
# 需要导入模块: from models import Product [as 别名]
# 或者: from models.Product import dept [as 别名]
def crawl_listing(self, url, ctx='', **kwargs):
if url.startswith('http://blogs.nordstrom.com'):
return
try:
res = requests.get(url, params={'sort': 'sale'})
except requests.exceptions.ConnectionError:
return
res.raise_for_status()
tree = lxml.html.fromstring(res.content)
listing_node = tree.cssselect('div.fashion-results')
if listing_node:
listing_node = listing_node[0]
else:
if tree.cssselect('div#brandsIndex'):
return
self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
return
product_nodes = listing_node.cssselect('div.row div.fashion-item')
if not product_nodes:
self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
return
category = Category.objects(key=kwargs.get('key')).first()
no_discount_num = 0 # sometimes no discount product occurs between the discount ones ordered by sale.
for product_node in product_nodes:
key = product_node.get('id')
if not key:
common_failed.send(sender=ctx, url=url, reason='listing product has no id')
continue
try:
info_node = product_node.cssselect('div.info')[0]
a_node = info_node.cssselect('a')[0]
title = a_node.text.strip()
price = None; listprice = None
price_nodes = info_node.cssselect(".price")
for price_node in price_nodes:
if 'regular' in price_node.get('class'):
listprice = price_node.text
elif 'sale' in price_node.get('class'):
price = price_node.text
if price is None or listprice is None:
no_discount_num += 1
if no_discount_num < 3:
continue
# common_failed.send(sender=ctx, url=url, \
# reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
return
combine_url = a_node.get('href')
if not combine_url:
common_failed.send(sender=ctx, url=url, reason='listing product %s.%s cannot crawl combine_url' % (key, title))
continue
match = re.search(r'https?://.+', combine_url)
if not match:
combine_url = 'http://shop.nordstrom.com%s' % (combine_url)
except IndexError:
print traceback.format_exc()
common_failed.send(sender=ctx, url=url, reason='listing product %s -> %s' % (key, traceback.format_exc()))
continue
is_new = False; is_updated = False
product = Product.objects(key=key).first()
if not product:
is_new = True
product = Product(key=key)
product.updated = False
product.event_type = False
if combine_url and combine_url != product.combine_url:
product.combine_url = combine_url
is_updated = True
if title and title != product.title:
product.title = title
is_updated = True
if price and price != product.price:
product.price = price
is_updated = True
if listprice and listprice != product.listprice:
product.listprice = listprice
is_updated = True
if category.cats and set(category.cats).difference(product.dept):
product.dept = list(set(category.cats) | set(product.dept or []))
is_updated = True
if category.key not in product.category_key:
product.category_key.append(category.key)
#.........这里部分代码省略.........