本文整理汇总了Python中scrapy.http.request.Request.meta["item"]方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta["item"]方法的具体用法?Python Request.meta["item"]怎么用?Python Request.meta["item"]使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.request.Request
的用法示例。
在下文中一共展示了Request.meta["item"]方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseJsonProduct
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def parseJsonProduct(self, response):
item = response.meta["item"]
# make a valid json file out of it and remove unneeded data
prodResponse = response.body.split("$+$")[0].strip().replace("'", '"')
prodDict = {}
sizeWidthDict = {}
jsonresponse = json.loads(prodResponse)
for product, value in jsonresponse.iteritems():
if item["sku"] not in prodDict:
prodDict[item["sku"]] = {}
if value["c"] not in prodDict[item["sku"]]:
prodDict[item["sku"]][value["c"]] = {}
if value["w"] not in prodDict[item["sku"]][value["c"]]:
prodDict[item["sku"]][value["c"]][value["w"]] = {}
if value["s"] not in sizeWidthDict:
sizeWidthDict[value["s"]] = []
if value["w"] not in sizeWidthDict[value["s"]]:
sizeWidthDict[value["s"]].append(value["w"])
prodDict[item["sku"]][value["c"]][value["w"]][value["s"]] = value["sku"]
item["variant"] = prodDict
item["size_width_list"] = sizeWidthDict
# request first imageset
if item["imageSetUrls"]:
color, href = item["imageSetUrls"].popitem()
if len(href) > 1:
item["imageSetUrls"][color] = href[1:]
request = Request(href[0], callback=self.parseJsonImageSet)
request.meta["item"] = item
return request
self.to_csv(item)
return item
示例2: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def parse(self, response):
sel = Selector(response)
item = DicksItem()
if "&page=" in response.url: # Extracting the Page Number and then using that to assign sort.
pagenumber = float(response.url.split("&page=")[-1])
else:
pagenumber = 1
t = 0 + ((pagenumber-1)*48)
item["Sort_Order"] = {}
producturls= sel.xpath("//div[@class='prod-details']/h2/a/@href").extract()
productnames = sel.xpath("//div[@class='prod-details']/h2/a/@title").extract()
for url,name in zip(producturls,productnames):
item["Sort_Order"]["http://www.dickssportinggoods.com"+url] = t
t=t+1
for i in range(len(urllist)): #comparing the Category URL and assigning LYS Categorization
if urllist[i] == response.url:
item['Category'] = lyscat[i]
item['id1'] = priceid[i]
break
for url,name in zip(producturls,productnames):
if "Fitbit" not in name:
request=Request("http://www.dickssportinggoods.com"+url, self.product_page)
request.meta["item"] = item
yield request
示例3: getItem
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def getItem(self, school):
item = SchoolItem()
logo = school.xpath('div/div[contains(@class,"school_m_img fl")]/a/img/@src').extract()
item["logo"] = logo[0] if logo else ""
# name province city area under school_m_main
school_main = school.xpath('div/div[contains(@class,"school_m_main fl")]')
name = school_main.xpath("li/h3/a/text()").extract()
item["name"] = name[0] if name else ""
item["province"] = ""
item["city"] = ""
item["area"] = ""
tempLocation = school_main.xpath("li[2]/b/text()").extract()
if tempLocation:
location = tempLocation[0].split()
item["province"] = location[0] if len(location) > 0 else ""
item["city"] = location[1] if len(location) > 1 else ""
item["area"] = location[2] if len(location) > 2 else ""
catagery = school_main.xpath("li[3]/b/text()").extract()
schoolType = school_main.xpath("li[4]/ol[1]/b/text()").extract()
level = school_main.xpath("li[4]/ol[2]/b/text()").extract()
item["level"] = level[0] if level else ""
item["catagery"] = catagery[0] if catagery else ""
item["schoolType"] = schoolType[0] if schoolType else ""
# address and phone under school_m_lx
addressAndPhone = school.xpath('ul[contains(@class,"school_m_lx")]')
address = addressAndPhone.xpath("li[1]/b/text()").extract()
item["address"] = address[0] if address else ""
item["phone"] = addressAndPhone.xpath("li[2]/b/text()").extract()
schoollUrl = school_main.xpath("li/h3/a/@href").extract()[0]
request = Request(schoollUrl, callback=self.parse_schoolIntroUrl)
request.meta["item"] = item
return request
示例4: parseJsonImageSet
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def parseJsonImageSet(self, response):
item = response.meta["item"]
imageSetResponse = response.body
# make a valid json file out of it, if only one image available it was a list => make a dict
imageSetResponse = imageSetResponse.replace("/*jsonp*/s7jsonResponse(", "")
imageSetResponse = ",".join(imageSetResponse.split(",")[:-1])
imageSetResponse = imageSetResponse.replace('"item":[', '"item":')
imageSetResponse = imageSetResponse.replace('"item":', '"item":[')
imageSetResponse = imageSetResponse.replace("}]}}", "}}}")
imageSetResponse = imageSetResponse[::-1].replace("}}}", "}}]}")[::-1]
color = response.url.split("-")[1].split("?")[0]
isImageSet = False
if len(response.url.split("-")) > 2:
isImageSet = True
item["Product_Image_File1"][color] = []
jsonresponse = json.loads(imageSetResponse)
for index, imageItem in enumerate(jsonresponse["set"]["item"]):
# check if there is a image set or only one image
if "isDefault" not in imageItem["i"]:
imageUrl = (
"http://roadrunnersports.scene7.com/is/image/" + imageItem["i"]["n"] + "?iv=" + imageItem["iv"]
)
# response url is image set => image can be scaled
if isImageSet:
imageUrl += "&scl=1"
item["Product_Image_File1"][color].append(imageUrl)
else:
# there is no image set append request for default image
if item["color"][color] not in item["imageSetUrls"]:
item["imageSetUrls"][item["color"][color]] = []
if item["color"][color] not in item["imageSetUrls2"]:
item["imageSetUrls2"][item["color"][color]] = []
item["imageSetUrls"][item["color"][color]].append(
"http://roadrunnersports.scene7.com/is/image/roadrunnersports/"
+ item["sku"]
+ "-"
+ color
+ "?req=set,json&scl=1"
)
item["imageSetUrls2"][item["color"][color]].append(
"http://roadrunnersports.scene7.com/is/image/roadrunnersports/"
+ item["sku"]
+ "-"
+ color
+ "?req=set,json&scl=1"
)
if item["imageSetUrls"]:
color, href = item["imageSetUrls"].popitem()
if len(href) > 1:
item["imageSetUrls"][color] = href[1:]
request = Request(href[0], callback=self.parseJsonImageSet)
request.meta["item"] = item
return request
self.to_csv(item)
return item
示例5: parse_schoolIntroUrl
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def parse_schoolIntroUrl(self, response):
sel = Selector(response)
item = response.meta["item"]
schoolIntroUrl = sel.xpath('//div[@class="school_kz fr"]/a/@href').extract()
link = self.start_urls[0]
if schoolIntroUrl:
link = schoolIntroUrl[0]
request = Request(link, callback=self.parse_items)
request.meta["item"] = item
return request
else:
return item
示例6: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
def parse(self, resp):
hxs = Selector(resp)
base_url = "http://www.mangareader.net"
page_links = hxs.xpath("//select[@id='pageMenu']/option/@value").extract()
item = MangaImagesItem()
item["chapter_url"] = resp.url
item["chapter_name"] = hxs.xpath("//div[@id='mangainfo']//h1/text()").extract()[0]
item["total_images"] = len(page_links)
item["image_urls"] = []
# fetch the images from all the pages
for i, p in enumerate(page_links):
page = urljoin_rfc(base_url, p)
request = Request(page, callback=self.parse_img_url)
# pass the index of the image for reordering later
request.meta["index"] = i
request.meta["item"] = item # pass the item to the callback
yield request
yield item