当前位置: 首页>>代码示例>>Python>>正文


Python Product.hit_time方法代码示例

本文整理汇总了Python中models.Product.hit_time方法的典型用法代码示例。如果您正苦于以下问题:Python Product.hit_time方法的具体用法?Python Product.hit_time怎么用?Python Product.hit_time使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在models.Product的用法示例。


在下文中一共展示了Product.hit_time方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: crawl_listing

# 需要导入模块: from models import Product [as 别名]
# 或者: from models.Product import hit_time [as 别名]
    def crawl_listing(self, url, ctx='', **kwargs):
        res = requests.get(url)
        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)

        category = Category.objects(key=kwargs.get('key')).first()
        if not category:
            common_failed.send(sender=ctx, url=url, reason='category %s not found in db' % kwargs.get('key'))
            return

        product_nodes = tree.cssselect('div#searchResults a')
        for product_node in product_nodes:
            price = None; listprice = None
            price = product_node.cssselect('.price-6pm')[0].text
            listprice_node = product_node.cssselect('.discount')
            listprice = ''.join(listprice_node[0].xpath('text()')) if listprice_node else None

            # eliminate products of no discountIndexError:
            if price is None or listprice is None:
                # common_failed.send(sender=ctx, url=url, \
                #     reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
                continue

            key = product_node.get('data-product-id')
            if not key:
                common_failed.send(sender=ctx, url=url, reason='listing product has no key')
                continue

            combine_url = product_node.get('href')
            key = '%s_%s' % (key, combine_url.split('/')[-1])
            match = re.search(r'https?://.+', combine_url)
            if not match:
                combine_url = '%s%s' % (HOST, combine_url)

            brand = product_node.cssselect('.brandName')[0].text.strip()
            title = product_node.cssselect('.productName')[0].text.strip()

            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if title and title != product.title:
                product.title = title
                is_updated = True

            if brand and brand != product.brand:
                product.brand = brand
                is_updated = True

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='6pm').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )


            print product.key; print product.brand; print product.title; \
            print product.price, ' / ', product.listprice; print product.combine_url; \
            print product.dept; print

        # Go to the next page to keep on crawling.
        next_page = None
        page_node = tree.cssselect('div.pagination')
        if not page_node:
            return

        last_node =page_node[0].cssselect('.last')
        if last_node:
#.........这里部分代码省略.........
开发者ID:mobishift2011,项目名称:amzn,代码行数:103,代码来源:server.py


注:本文中的models.Product.hit_time方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。