本文整理汇总了Python中product_spiders.items.ProductLoader类的典型用法代码示例。如果您正苦于以下问题:Python ProductLoader类的具体用法?Python ProductLoader怎么用?Python ProductLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ProductLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_product
def parse_product(self, response):
if not isinstance(response, HtmlResponse):
return
hxs = HtmlXPathSelector(response)
name = hxs.select('//h1/text()').extract()[0]
multiple_prices = hxs.select('//select[@class="smalltextblk"]/option/text()').extract()
single_special_price = hxs.select('//span/text()').re('\xa3(.*[0-9]+)')
single_price = hxs.select('//td[@class="ProductPrice"]/text()').re('\xa3(.*[0-9])')
products_data = []
if single_price and not multiple_prices:
price = single_price[0] if not single_special_price else single_special_price[0]
products_data.append((name, price))
else:
multiple_prices = multiple_prices[1:]
for name_and_price in multiple_prices:
name_and_price = re.match('(.*)\xa3(.*\.[0-9]+)', name_and_price).groups()
products_data.append((name + ' ' + name_and_price[0], name_and_price[1]))
for item in products_data:
product = Product()
loader = ProductLoader(item=product, response=response)
# try:
loader.add_value('url', response.url)
loader.add_value('name', item[0])
loader.add_value('price', item[1])
loader.add_value('sku', '')
yield loader.load_item()
示例2: parse_product
def parse_product(self, response):
if not isinstance(response, HtmlResponse):
return
# sub products
hxs = HtmlXPathSelector(response)
# multiple prices
name = hxs.select('//h1/text()').extract()[0]
multiple_prices = hxs.select('//option/text()').extract()
single_price = hxs.select('//span/b/text()').re('\xa3(.*)')
products_data = []
if not single_price:
for name_and_price in multiple_prices:
# try:
name_and_price = re.sub('[\t\r\n]', '', name_and_price).strip()
products_data.append(re.match('(.*[0-9,a-z,A-Z\)]).*\xa3(.*[0-9])', name_and_price).groups())
# except AttributeError:
# continue
else:
price = single_price[0]
products_data.append((name, price), )
for item in products_data:
product = Product()
loader = ProductLoader(item=product, response=response)
# try:
loader.add_value('url', response.url)
loader.add_value('name', item[0])
loader.add_value('price', item[1])
loader.add_value('sku', '')
yield loader.load_item()
示例3: parse_item
def parse_item(self, response):
url = response.url
hxs = HtmlXPathSelector(response)
name = hxs.select("//div[@class='product-shop']/div[@class='product-name']/h2/text()").extract()
if not name:
logging.error("NO NAME! %s" % url)
return
name = name[0]
# adding product
price = hxs.select("//div[@class='product-shop']/div[@class='price-box']//span[@class='price']/text()").extract()
if not price:
logging.error("NO PRICE! %s" % url)
return
price = price[0].replace(".", "").replace(",", ".")
# price_delivery = hxs.select("//div[@class='product-shop']//table[@id='product-attribute-specs-table']/tr/td[(preceding::th[text()='Spese Spedizione'])]/text()").extract()
# if not price_delivery:
# logging.error("NO PRICE DELIVERY! %s" % url)
# return
# price_delivery = price_delivery[0]
# price = Decimal(price) + Decimal(price_delivery)
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', str(name))
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例4: parse_table_options_type2_single_product_page
def parse_table_options_type2_single_product_page(self, response):
hxs = HtmlXPathSelector(response)
name = hxs.select("//div[@id='mainContent']/center/table/tr[1]/td[1]/p[2][not(@class)][*[local-name()='strong']]/strong[1]//text()").extract()
if not name:
logging.error("ERROR!! NO NAME!! %s" % (response.url, ))
return
name = name[0]
subproducts = hxs.select("//div[@id='mainContent']/center/table//table[@class='product_body']/tr[position()>1]")
for product_el in subproducts:
add_name = product_el.select("td[1]//text()").extract()
if not add_name:
logging.error("ERROR!! NO NAME!! %s" % (response.url, ))
continue
add_name = add_name[0]
url = response.url
price = product_el.select('td[3]//text()').extract()
if not price:
logging.error("ERROR!! NO PRICE!! %s %s" % (name, response.url))
continue
price = price[0]
if re.search(prices_range_regex, price):
yield Request(url, callback=self.parse_product_list)
continue
product = Product()
loader = ProductLoader(item=product, response=response)
loader.add_value('url', url)
loader.add_value('name', "%s %s" % (name, add_name))
loader.add_value('price', price)
loader.add_value('sku', '')
yield loader.load_item()
示例5: parse_product
def parse_product(self, response):
hxs = HtmlXPathSelector(response)
url = response.url
#name = hxs.select('//div[@id="package_showcase"]/div[@id="description"]/h1/text()').extract()
name = hxs.select('//h1[@itemprop="name"]/text()').extract()
if not name:
print "ERROR!! NO NAME!! %s" % url
return
name = name[0]
#price = hxs.select('//div[@id="package_showcase"]/div[@id="pricing"]/strong[last()]/text()').extract()
price = hxs.select('//span[@itemprop="price"]/text()').extract()
if not price:
print "ERROR!! NO PRICE!! %s" % url
return
price = price[-1]
product = Product()
loader = ProductLoader(item=product, response=response)
loader.add_value('url', url)
loader.add_value('name', name)
loader.add_value('price', price)
loader.add_value('sku', response.url.split('/')[-2])
yield loader.load_item()
示例6: parse_product
def parse_product(self, response):
hxs = HtmlXPathSelector(response)
url = response.url
name = hxs.select("//div[@class='primary-content']//div[@id='product-summary']/h1/text()").extract()
if not name:
name = hxs.select('//h1/text()').extract()
if not name:
logging.error("ERROR! NO NAME! %s" % url)
return
name = name[0]
price = hxs.select("//div[@class='secondary-content']//ul[@class='pricing']/li[@class='current-price']/span/text()").extract()
if not price:
logging.error("ERROR! NO PRICE! %s %s" % (url, name))
return
price = "".join(price)
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', name)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例7: parse
def parse(self, response):
base_url = get_base_url(response)
hxs = HtmlXPathSelector(response)
items = hxs.select("//div[@class='navArea']/div[@class='navAreaPagging fr']/span[@class='paggingBtnNext']/a/@href").extract()
for item in items:
yield Request(urljoin_rfc(base_url,item), callback=self.parse)
content = hxs.select("//div[@class='mainProducts']")
products = content.select(".//a")
for product_ in products:
name = product_.select(".//ul/li/span[@class='productName']/text()").extract()
url = product_.select(".//@href").extract()
price = product_.select(".//ul//li/ul/li[1]/span[@class='orange']/text()").re(r'\xa3(.*)')
if not price:
price = product_.select(".//ul/li/ul/li[1]/span[@class='gray']/text()").re(r'\xa3(.*)')
if name:
l = ProductLoader(item=Product(), response=response)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
l.load_item()
yield l.load_item()
"""content = hxs.select("//div[@class='mainProducts']")
示例8: parse_product
def parse_product(self, response):
hxs = HtmlXPathSelector(response)
url = response.url
name = hxs.select("//h1[@class='pageTitle']/span/text()").extract()
if not name:
logging.error("ERROR! NO NAME! %s" % url)
return
name = " ".join(name)
name = re.sub("[\s]+", " ", name)
price = hxs.select("//div[contains(@class, 'productDetail')]//span[contains(@class, 'currentPrice')]/text()").extract()
if not price:
logging.error("ERROR! NO PRICE! %s %s" % (url, name))
return
price = price[0]
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', name)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例9: parse_search
def parse_search(self, response):
hxs = HtmlXPathSelector(response)
# parse pages
pages = hxs.select("//ul[@class='pagination']//a/@href").extract()
for page in pages:
if page != '#':
request = Request(page, callback=self.parse_search)
yield request
# parse products
items = hxs.select("//article[contains(@class, 'product')]/div[contains(@class, 'desc')]")
for item in items:
name = item.select(".//div/header[@class='productTitle']/a/text()").extract()
if not name:
continue
name = name[0].strip()
name = re.sub("[\s]+", " ", name)
url = item.select(".//div/header[@class='productTitle']/a/@href").extract()
if not url:
logging.error("ERROR! NO URL! URL: %s. NAME: %s" % (response.url, name))
continue
url = url[0]
price = item.select(".//div//span[@class='currentPrice']/ins/text()").extract()
if not price:
logging.error("ERROR! NO PRICE! URL: %s. NAME: %s" % (response.url, name))
continue
price = price[0].strip()
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', name)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例10: parse_options
def parse_options(self, response):
base_url = get_base_url(response)
hxs = HtmlXPathSelector(response)
name = hxs.select('//div[@id="skuinfo"]/h1[@itemprop="name"]/text()').extract()
if not name:
name = hxs.select('//div[@class="details"]/h1/text()').extract()
price = "".join(hxs.select('//div[@class="club"]/span[@itemprop="Price"]/text()').re(r'([0-9\,\. ]+)')).strip()
if not price:
price = "".join(hxs.select('//div[@class="details"]/div[@class="special"]/text()').re(r'([0-9\,\. ]+)')).strip()
specs = hxs.select('//div[@id="specs"]/div/p[@class="specs"]')
model_no = None
for spec in specs:
try:
spec_text = spec.select('./span/text()').extract()[0]
if spec_text == 'Mfg Part #:':
model_no = "".join(spec.select("./text()").extract()).strip()
except:
continue
if name and price:
sku_ = ''
if model_no:
csv_file = UnicodeReader(open(os.path.join(HERE, 'skus.csv')))
for row in csv_file:
if row[3] == model_no:
sku_ = row[0]
break
product_loader = ProductLoader(item=Product(), response=response)
product_loader.add_value('name', name[0])
product_loader.add_value('sku', sku_)
product_loader.add_value('price', price)
product_loader.add_value('url', response.url)
yield product_loader.load_item()
示例11: parse_item
def parse_item(self, response):
base_url = get_base_url(response)
hxs = HtmlXPathSelector(response)
name = hxs.select("//tr[@id='ProductDetail11_trProductName']/td/text()").extract()
if name:
name = name[0].strip()
url = response.url
price = hxs.select("//tr[@id='ProductDetail11_trCustomPrice']/td/font/b/text()").extract()
if not price:
price = hxs.select("//tr[@id='ProductDetail11_trPrice']/td/text()").extract()
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', str(name))
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
else:
# may be several products
products = hxs.select("//table[@id='SearchTemplate13_DataGrid1']// \
table[@id='SearchTemplate13_DataGrid1__ctl3_ProductInfoTable']")
for product in products:
url = product.select("//tr[@id='SearchTemplate13_DataGrid1__ctl3_ProductNameRow']/td/a/@href").extract()
if url:
yield Request(urljoin_rfc(base_url, url[0]), callback=self.parse_item)
示例12: parse_product
def parse_product(self, response):
if not isinstance(response, HtmlResponse):
return
# sub products
hxs = HtmlXPathSelector(response)
products = hxs.select('//div[@class="content-box"]/div[contains(@class,"item")]')
for item in products:
product = Product()
price = item.select('.//div[@class="item-price"]').extract()
url = item.select('.//div[@class="moreinfo"]/a/@href').extract()[0]
url = urljoin_rfc(self.URLBASE, url)
if not price:
yield Request(url)
else:
loader = ProductLoader(item=product, response=response)
try:
loader.add_value('url', url)
name = item.select('.//div[@class="item-name"]/a/text()').extract()[0]
loader.add_value('name', name)
loader.add_value('price', price)
loader.add_value('sku', '')
yield loader.load_item()
except IndexError:
continue
示例13: parse_search
def parse_search(self, response):
hxs = HtmlXPathSelector(response)
base_url = get_base_url(response)
# parse pages
pages = hxs.select("//div[@class='pagination top']//a/@href").extract()
for page in pages:
request = Request(urljoin_rfc(base_url, page), callback=self.parse_search)
yield request
# parse products
items = hxs.select("//div[@class='search-result']/form/ul/li")
for item in items:
name = item.select("div[@class='prd-infos']/a/p[@class='prd-name']/strong/text()").extract()
if not name:
continue
name = name[0]
url = item.select("div[@class='prd-infos']/a/@href").extract()
if not url:
logging.error("ERROR! NO URL! URL: %s. NAME: %s" % (response.url, name))
continue
url = url[0]
price = item.select("div[@class='prd-actions']/p[@class='prd-amount']/strong/text()").extract()
if not price:
logging.error("ERROR! NO PRICE! URL: %s. NAME: %s" % (response.url, name))
continue
price = price[0]
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', name)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例14: parse_products
def parse_products(self, response):
hxs = HtmlXPathSelector(response)
base_url = get_base_url(response)
items = hxs.select("//table[@id='ProductDataList']/tr/td[div[contains(@id, 'ModelLinkCell')]]")
for item in items:
name = item.select(".//a[contains(@id, 'ModelLink')]//text()").extract()
if not name:
logging.error("ERROR! NO NAME! %s" % response.url)
return
name = "".join(name)
url = item.select(".//a[contains(@id, 'ModelLink')]/@href").extract()
if not url:
logging.error("ERROR! NO URL! %s %s" % (name, response.url))
return
url = urljoin_rfc(base_url, url[0])
price = item.select("div[contains(@id, 'ModelPrice')]//td[@class='Label11']/text()").re(u'\xa3(.*)')
if not price:
logging.error("ERROR! NO PRICE! %s %s" % (url, name))
return
price = price[0]
l = ProductLoader(item=Product(), response=response)
l.add_value('identifier', name)
l.add_value('name', name)
l.add_value('url', url)
l.add_value('price', price)
yield l.load_item()
示例15: parse_product
def parse_product(self, response):
if not isinstance(response, HtmlResponse):
return
hxs = HtmlXPathSelector(response)
soup = BeautifulSoup(response.body)
products = soup.findAll('a', href=re.compile('ProductDetail'))
products = {product.parent.parent for product in products}
for product in products:
product_loader = ProductLoader(item=Product(), response=response)
name = product.findAll('font')[1].text
price = product.find('nobr', text=re.compile('\$'))
url = product.find('a', href=re.compile('ProductDetail'))
if url:
url = urljoin_rfc(get_base_url(response), url['href'])
else:
url = response.url
product_loader.add_value('name', name)
product_loader.add_value('price', price)
product_loader.add_value('url', url)
product_loader.add_value('url', url)
product_loader.add_value('sku', response.meta['sku'])
#product_loader.add_value('identifier', response.meta['sku'])
site_mfrgid = product.find('nobr').text
if site_mfrgid:
site_mfrgid = site_mfrgid.strip().lower()
mfrgid = response.meta['mfrgid'].strip().lower()
if site_mfrgid == mfrgid:
yield product_loader.load_item()