当前位置: 首页>>代码示例>>Python>>正文


Python items.ProductLoader类代码示例

本文整理汇总了Python中product_spiders.items.ProductLoader的典型用法代码示例。如果您正苦于以下问题:Python ProductLoader类的具体用法?Python ProductLoader怎么用?Python ProductLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ProductLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_product

    def parse_product(self, response):
        if not isinstance(response, HtmlResponse):
            return

        hxs = HtmlXPathSelector(response)

        name = hxs.select('//h1/text()').extract()[0]
        
        multiple_prices = hxs.select('//select[@class="smalltextblk"]/option/text()').extract()
        single_special_price = hxs.select('//span/text()').re('\xa3(.*[0-9]+)')
        single_price = hxs.select('//td[@class="ProductPrice"]/text()').re('\xa3(.*[0-9])')
        
        products_data = []

        if single_price and not multiple_prices:
            price = single_price[0] if not single_special_price else single_special_price[0]
            products_data.append((name, price))
        else:
            multiple_prices = multiple_prices[1:]
            for name_and_price in multiple_prices:
                name_and_price = re.match('(.*)\xa3(.*\.[0-9]+)', name_and_price).groups()
                products_data.append((name + ' ' + name_and_price[0], name_and_price[1]))

        for item in products_data:
            product = Product()
            loader = ProductLoader(item=product, response=response)
            # try:
            loader.add_value('url', response.url)
            loader.add_value('name', item[0])
            loader.add_value('price', item[1])

            loader.add_value('sku', '')

            yield loader.load_item()
开发者ID:0--key,项目名称:lib,代码行数:34,代码来源:hyperdrugcouk.py

示例2: parse_product

    def parse_product(self, response):
        if not isinstance(response, HtmlResponse):
            return

        # sub products
        hxs = HtmlXPathSelector(response)

        # multiple prices
        name = hxs.select('//h1/text()').extract()[0]
        multiple_prices = hxs.select('//option/text()').extract()
        single_price = hxs.select('//span/b/text()').re('\xa3(.*)')
        products_data = []
        if not single_price:
            for name_and_price in multiple_prices:
              #  try:
                name_and_price = re.sub('[\t\r\n]', '', name_and_price).strip()
                products_data.append(re.match('(.*[0-9,a-z,A-Z\)]).*\xa3(.*[0-9])', name_and_price).groups())
              #  except AttributeError:
              #      continue
        else:
            price = single_price[0]
            products_data.append((name, price), )

        for item in products_data:
            product = Product()
            loader = ProductLoader(item=product, response=response)
            # try:
            loader.add_value('url', response.url)
            loader.add_value('name', item[0])
            loader.add_value('price', item[1])

            loader.add_value('sku', '')

            yield loader.load_item()
开发者ID:0--key,项目名称:lib,代码行数:34,代码来源:wormerscouk.py

示例3: parse_item

    def parse_item(self, response):
        url = response.url

        hxs = HtmlXPathSelector(response)
        name = hxs.select("//div[@class='product-shop']/div[@class='product-name']/h2/text()").extract()
        if not name:
            logging.error("NO NAME! %s" % url)
            return
        name = name[0]

        # adding product
        price = hxs.select("//div[@class='product-shop']/div[@class='price-box']//span[@class='price']/text()").extract()
        if not price:
            logging.error("NO PRICE! %s" % url)
            return
        price = price[0].replace(".", "").replace(",", ".")
#        price_delivery = hxs.select("//div[@class='product-shop']//table[@id='product-attribute-specs-table']/tr/td[(preceding::th[text()='Spese Spedizione'])]/text()").extract()
#        if not price_delivery:
#            logging.error("NO PRICE DELIVERY! %s" % url)
#            return
#        price_delivery = price_delivery[0]
#        price = Decimal(price) + Decimal(price_delivery)

        l = ProductLoader(item=Product(), response=response)
        l.add_value('identifier', str(name))
        l.add_value('name', name)
        l.add_value('url', url)
        l.add_value('price', price)
        yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:29,代码来源:caldaiemurali_it.py

示例4: parse_table_options_type2_single_product_page

    def parse_table_options_type2_single_product_page(self, response):
        hxs = HtmlXPathSelector(response)
        name = hxs.select("//div[@id='mainContent']/center/table/tr[1]/td[1]/p[2][not(@class)][*[local-name()='strong']]/strong[1]//text()").extract()
        if not name:
            logging.error("ERROR!! NO NAME!! %s" % (response.url, ))
            return
        name = name[0]
        subproducts = hxs.select("//div[@id='mainContent']/center/table//table[@class='product_body']/tr[position()>1]")
        for product_el in subproducts:
            add_name = product_el.select("td[1]//text()").extract()
            if not add_name:
                logging.error("ERROR!! NO NAME!! %s" % (response.url, ))
                continue
            add_name = add_name[0]

            url = response.url

            price = product_el.select('td[3]//text()').extract()
            if not price:
                logging.error("ERROR!! NO PRICE!! %s %s" % (name, response.url))
                continue

            price = price[0]
            if re.search(prices_range_regex, price):
                yield Request(url, callback=self.parse_product_list)
                continue

            product = Product()
            loader = ProductLoader(item=product, response=response)
            loader.add_value('url', url)
            loader.add_value('name', "%s %s" % (name, add_name))
            loader.add_value('price', price)
            loader.add_value('sku', '')
            yield loader.load_item()
开发者ID:0--key,项目名称:lib,代码行数:34,代码来源:1stlinecom.py

示例5: parse_product

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        url = response.url

        #name = hxs.select('//div[@id="package_showcase"]/div[@id="description"]/h1/text()').extract()
        name = hxs.select('//h1[@itemprop="name"]/text()').extract()
        if not name:
            print "ERROR!! NO NAME!! %s" % url
            return
        name = name[0]

        #price = hxs.select('//div[@id="package_showcase"]/div[@id="pricing"]/strong[last()]/text()').extract()
        price = hxs.select('//span[@itemprop="price"]/text()').extract()
        if not price:
            print "ERROR!! NO PRICE!! %s" % url
            return
        price = price[-1]

        product = Product()
        loader = ProductLoader(item=product, response=response)
        loader.add_value('url', url)
        loader.add_value('name', name)
        loader.add_value('price', price)

        loader.add_value('sku', response.url.split('/')[-2])

        yield loader.load_item()
开发者ID:0--key,项目名称:lib,代码行数:28,代码来源:fireplaceworldcouk.py

示例6: parse_product

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        url = response.url

        name = hxs.select("//div[@class='primary-content']//div[@id='product-summary']/h1/text()").extract()

        if not name:
            name = hxs.select('//h1/text()').extract()
        if not name:
            logging.error("ERROR! NO NAME! %s" % url)
            return
        name = name[0]

        price = hxs.select("//div[@class='secondary-content']//ul[@class='pricing']/li[@class='current-price']/span/text()").extract()
        if not price:
            logging.error("ERROR! NO PRICE! %s %s" % (url, name))
            return
        price = "".join(price)

        l = ProductLoader(item=Product(), response=response)
        l.add_value('identifier', name)
        l.add_value('name', name)
        l.add_value('url', url)
        l.add_value('price', price)
        yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:26,代码来源:tescocom_sagemcom.py

示例7: parse

    def parse(self, response):
        base_url = get_base_url(response)              
        hxs = HtmlXPathSelector(response)
        items = hxs.select("//div[@class='navArea']/div[@class='navAreaPagging fr']/span[@class='paggingBtnNext']/a/@href").extract()
                    
        for item in items:
            yield Request(urljoin_rfc(base_url,item), callback=self.parse)
            
        content = hxs.select("//div[@class='mainProducts']")
        products = content.select(".//a")
                    
        for product_ in products:
    
            name =  product_.select(".//ul/li/span[@class='productName']/text()").extract()
            url = product_.select(".//@href").extract()
            price =  product_.select(".//ul//li/ul/li[1]/span[@class='orange']/text()").re(r'\xa3(.*)')
            if not price:
                price =  product_.select(".//ul/li/ul/li[1]/span[@class='gray']/text()").re(r'\xa3(.*)')
            if name:
                l = ProductLoader(item=Product(), response=response)
                l.add_value('name', name)        
                l.add_value('url', url)
                l.add_value('price', price)
                l.load_item()
                yield l.load_item()            

            
            
        """content = hxs.select("//div[@class='mainProducts']")
开发者ID:0--key,项目名称:lib,代码行数:29,代码来源:smartbuyglasses.py

示例8: parse_product

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        url = response.url

        name = hxs.select("//h1[@class='pageTitle']/span/text()").extract()
        if not name:
            logging.error("ERROR! NO NAME! %s" % url)
            return
        name = " ".join(name)
        name = re.sub("[\s]+", " ", name)

        price = hxs.select("//div[contains(@class, 'productDetail')]//span[contains(@class, 'currentPrice')]/text()").extract()
        if not price:
            logging.error("ERROR! NO PRICE! %s %s" % (url, name))
            return
        price = price[0]


        l = ProductLoader(item=Product(), response=response)
        l.add_value('identifier', name)
        l.add_value('name', name)
        l.add_value('url', url)
        l.add_value('price', price)
        yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:25,代码来源:curryscouk_sagemcom.py

示例9: parse_search

    def parse_search(self, response):
        hxs = HtmlXPathSelector(response)
        # parse pages
        pages = hxs.select("//ul[@class='pagination']//a/@href").extract()
        for page in pages:
            if page != '#':
                request = Request(page, callback=self.parse_search)
                yield request

        # parse products
        items = hxs.select("//article[contains(@class, 'product')]/div[contains(@class, 'desc')]")
        for item in items:
            name = item.select(".//div/header[@class='productTitle']/a/text()").extract()
            if not name:
                continue
            name = name[0].strip()
            name = re.sub("[\s]+", " ", name)

            url = item.select(".//div/header[@class='productTitle']/a/@href").extract()
            if not url:
                logging.error("ERROR! NO URL! URL: %s. NAME: %s" % (response.url, name))
                continue
            url = url[0]
            price = item.select(".//div//span[@class='currentPrice']/ins/text()").extract()
            if not price:
                logging.error("ERROR! NO PRICE! URL: %s. NAME: %s" % (response.url, name))
                continue
            price = price[0].strip()

            l = ProductLoader(item=Product(), response=response)
            l.add_value('identifier', name)
            l.add_value('name', name)
            l.add_value('url', url)
            l.add_value('price', price)
            yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:35,代码来源:curryscouk_sagemcom.py

示例10: parse_options

    def parse_options(self, response):
        base_url = get_base_url(response)
        hxs = HtmlXPathSelector(response)          
        name = hxs.select('//div[@id="skuinfo"]/h1[@itemprop="name"]/text()').extract()
        if not name:
            name = hxs.select('//div[@class="details"]/h1/text()').extract()
        price = "".join(hxs.select('//div[@class="club"]/span[@itemprop="Price"]/text()').re(r'([0-9\,\. ]+)')).strip()
        if not price:
            price = "".join(hxs.select('//div[@class="details"]/div[@class="special"]/text()').re(r'([0-9\,\. ]+)')).strip()
        specs = hxs.select('//div[@id="specs"]/div/p[@class="specs"]')
        model_no = None
        for spec in specs:
            try:
                spec_text = spec.select('./span/text()').extract()[0]
                if spec_text == 'Mfg Part #:':
                    model_no = "".join(spec.select("./text()").extract()).strip()
            except:
                continue
        
        if name and price:
            sku_ = ''
            if model_no:
                csv_file = UnicodeReader(open(os.path.join(HERE, 'skus.csv')))
                for row in csv_file:
                    if row[3] == model_no:
                        sku_ = row[0]
                        break

            product_loader = ProductLoader(item=Product(), response=response)
            product_loader.add_value('name', name[0])
            product_loader.add_value('sku', sku_)
            product_loader.add_value('price', price)
            product_loader.add_value('url', response.url)
            yield product_loader.load_item()      
开发者ID:0--key,项目名称:lib,代码行数:34,代码来源:campaignworld_do_spider.py

示例11: parse_item

    def parse_item(self, response):
        base_url = get_base_url(response)
        hxs = HtmlXPathSelector(response)

        name = hxs.select("//tr[@id='ProductDetail11_trProductName']/td/text()").extract()
        if name:
            name = name[0].strip()
            url = response.url
            price = hxs.select("//tr[@id='ProductDetail11_trCustomPrice']/td/font/b/text()").extract()
            if not price:
                price = hxs.select("//tr[@id='ProductDetail11_trPrice']/td/text()").extract()

            l = ProductLoader(item=Product(), response=response)
            l.add_value('identifier', str(name))
            l.add_value('name', name)
            l.add_value('url', url)
            l.add_value('price', price)
            yield l.load_item()
        else:
            # may be several products
            products = hxs.select("//table[@id='SearchTemplate13_DataGrid1']// \
                                     table[@id='SearchTemplate13_DataGrid1__ctl3_ProductInfoTable']")
            for product in products:
                url = product.select("//tr[@id='SearchTemplate13_DataGrid1__ctl3_ProductNameRow']/td/a/@href").extract()
                if url:
                    yield Request(urljoin_rfc(base_url, url[0]), callback=self.parse_item)
开发者ID:0--key,项目名称:lib,代码行数:26,代码来源:bestmaterials.py

示例12: parse_product

    def parse_product(self, response):
        if not isinstance(response, HtmlResponse):
            return

        # sub products
        hxs = HtmlXPathSelector(response)

        products = hxs.select('//div[@class="content-box"]/div[contains(@class,"item")]')
        for item in products:
            product = Product()
            price = item.select('.//div[@class="item-price"]').extract()
            url = item.select('.//div[@class="moreinfo"]/a/@href').extract()[0]
            url = urljoin_rfc(self.URLBASE, url)
            if not price:
                yield Request(url)
            else:
                loader = ProductLoader(item=product, response=response)
                try:
                    loader.add_value('url', url)
                    name = item.select('.//div[@class="item-name"]/a/text()').extract()[0]
                    loader.add_value('name', name)
                    loader.add_value('price', price)

                    loader.add_value('sku', '')

                    yield loader.load_item()
                except IndexError:
                    continue
开发者ID:0--key,项目名称:lib,代码行数:28,代码来源:hadrianequinecouk.py

示例13: parse_search

    def parse_search(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)
        # parse pages
        pages = hxs.select("//div[@class='pagination top']//a/@href").extract()
        for page in pages:
            request = Request(urljoin_rfc(base_url, page), callback=self.parse_search)
            yield request

        # parse products
        items = hxs.select("//div[@class='search-result']/form/ul/li")
        for item in items:
            name = item.select("div[@class='prd-infos']/a/p[@class='prd-name']/strong/text()").extract()
            if not name:
                continue
            name = name[0]
            url = item.select("div[@class='prd-infos']/a/@href").extract()
            if not url:
                logging.error("ERROR! NO URL! URL: %s. NAME: %s" % (response.url, name))
                continue
            url = url[0]
            price = item.select("div[@class='prd-actions']/p[@class='prd-amount']/strong/text()").extract()
            if not price:
                logging.error("ERROR! NO PRICE! URL: %s. NAME: %s" % (response.url, name))
                continue
            price = price[0]

            l = ProductLoader(item=Product(), response=response)
            l.add_value('identifier', name)
            l.add_value('name', name)
            l.add_value('url', url)
            l.add_value('price', price)
            yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:33,代码来源:dixonscouk.py

示例14: parse_products

    def parse_products(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        items = hxs.select("//table[@id='ProductDataList']/tr/td[div[contains(@id, 'ModelLinkCell')]]")
        for item in items:
            name = item.select(".//a[contains(@id, 'ModelLink')]//text()").extract()
            if not name:
                logging.error("ERROR! NO NAME! %s" % response.url)
                return
            name = "".join(name)

            url = item.select(".//a[contains(@id, 'ModelLink')]/@href").extract()
            if not url:
                logging.error("ERROR! NO URL! %s %s" % (name, response.url))
                return
            url = urljoin_rfc(base_url, url[0])

            price = item.select("div[contains(@id, 'ModelPrice')]//td[@class='Label11']/text()").re(u'\xa3(.*)')
            if not price:
                logging.error("ERROR! NO PRICE! %s %s" % (url, name))
                return
            price = price[0]

            l = ProductLoader(item=Product(), response=response)
            l.add_value('identifier', name)
            l.add_value('name', name)
            l.add_value('url', url)
            l.add_value('price', price)
            yield l.load_item()
开发者ID:0--key,项目名称:lib,代码行数:30,代码来源:campingworldcouk.py

示例15: parse_product

    def parse_product(self, response):
        if not isinstance(response, HtmlResponse):
            return

        hxs = HtmlXPathSelector(response)
        soup = BeautifulSoup(response.body)

        products = soup.findAll('a', href=re.compile('ProductDetail'))
        products = {product.parent.parent for product in products}

        for product in products:
            product_loader = ProductLoader(item=Product(), response=response)
            name = product.findAll('font')[1].text
            price = product.find('nobr', text=re.compile('\$'))
            url = product.find('a', href=re.compile('ProductDetail'))
            if url:
                url = urljoin_rfc(get_base_url(response), url['href'])
            else:
                url = response.url
            product_loader.add_value('name', name)
            product_loader.add_value('price', price)
            product_loader.add_value('url', url)
            product_loader.add_value('url', url)
            product_loader.add_value('sku', response.meta['sku'])
            #product_loader.add_value('identifier', response.meta['sku'])
            site_mfrgid = product.find('nobr').text
            if site_mfrgid:
                site_mfrgid = site_mfrgid.strip().lower()
                mfrgid = response.meta['mfrgid'].strip().lower()
                if site_mfrgid == mfrgid:
                    yield product_loader.load_item()
开发者ID:0--key,项目名称:lib,代码行数:31,代码来源:rvpartscenter.py


注:本文中的product_spiders.items.ProductLoader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。