当前位置: 首页>>代码示例>>Python>>正文


Python Request.meta["item"]方法代码示例

本文整理汇总了Python中scrapy.http.request.Request.meta["item"]方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta["item"]方法的具体用法?Python Request.meta["item"]怎么用?Python Request.meta["item"]使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http.request.Request的用法示例。


在下文中一共展示了Request.meta["item"]方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parseJsonProduct

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
    def parseJsonProduct(self, response):
        item = response.meta["item"]
        # make a valid json file out of it and remove unneeded data
        prodResponse = response.body.split("$+$")[0].strip().replace("'", '"')
        prodDict = {}
        sizeWidthDict = {}
        jsonresponse = json.loads(prodResponse)
        for product, value in jsonresponse.iteritems():
            if item["sku"] not in prodDict:
                prodDict[item["sku"]] = {}
            if value["c"] not in prodDict[item["sku"]]:
                prodDict[item["sku"]][value["c"]] = {}
            if value["w"] not in prodDict[item["sku"]][value["c"]]:
                prodDict[item["sku"]][value["c"]][value["w"]] = {}
            if value["s"] not in sizeWidthDict:
                sizeWidthDict[value["s"]] = []
            if value["w"] not in sizeWidthDict[value["s"]]:
                sizeWidthDict[value["s"]].append(value["w"])
            prodDict[item["sku"]][value["c"]][value["w"]][value["s"]] = value["sku"]
        item["variant"] = prodDict
        item["size_width_list"] = sizeWidthDict
        # request first imageset
        if item["imageSetUrls"]:
            color, href = item["imageSetUrls"].popitem()
            if len(href) > 1:
                item["imageSetUrls"][color] = href[1:]
            request = Request(href[0], callback=self.parseJsonImageSet)
            request.meta["item"] = item
            return request

        self.to_csv(item)
        return item
开发者ID:alfonsjose,项目名称:scrapers,代码行数:34,代码来源:RoadRunnerSports.py

示例2: parse

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
	def parse(self, response):
		sel = Selector(response)
		item = DicksItem()		
		if "&page=" in response.url: # Extracting the Page Number and then using that to assign sort.
			pagenumber = float(response.url.split("&page=")[-1]) 
		else:
			pagenumber = 1		
		t = 0 + ((pagenumber-1)*48)
		item["Sort_Order"] = {}
		
		producturls= sel.xpath("//div[@class='prod-details']/h2/a/@href").extract()
		productnames = sel.xpath("//div[@class='prod-details']/h2/a/@title").extract()		
		
		for url,name in zip(producturls,productnames):
			item["Sort_Order"]["http://www.dickssportinggoods.com"+url] = t
			t=t+1
			
		for i in range(len(urllist)): #comparing the Category URL and assigning LYS Categorization
			if urllist[i] == response.url:
				item['Category'] = lyscat[i]
				item['id1'] = priceid[i]
				break
		
		for url,name in zip(producturls,productnames):       
			if "Fitbit" not in name:         
				request=Request("http://www.dickssportinggoods.com"+url, self.product_page)
				request.meta["item"] = item
				yield request
开发者ID:Diwahars,项目名称:scrapers,代码行数:30,代码来源:Dicks.py

示例3: getItem

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
    def getItem(self, school):
        item = SchoolItem()
        logo = school.xpath('div/div[contains(@class,"school_m_img fl")]/a/img/@src').extract()
        item["logo"] = logo[0] if logo else ""

        # name province city area under school_m_main
        school_main = school.xpath('div/div[contains(@class,"school_m_main fl")]')
        name = school_main.xpath("li/h3/a/text()").extract()
        item["name"] = name[0] if name else ""
        item["province"] = ""
        item["city"] = ""
        item["area"] = ""
        tempLocation = school_main.xpath("li[2]/b/text()").extract()
        if tempLocation:
            location = tempLocation[0].split()
            item["province"] = location[0] if len(location) > 0 else ""
            item["city"] = location[1] if len(location) > 1 else ""
            item["area"] = location[2] if len(location) > 2 else ""

        catagery = school_main.xpath("li[3]/b/text()").extract()
        schoolType = school_main.xpath("li[4]/ol[1]/b/text()").extract()
        level = school_main.xpath("li[4]/ol[2]/b/text()").extract()
        item["level"] = level[0] if level else ""
        item["catagery"] = catagery[0] if catagery else ""
        item["schoolType"] = schoolType[0] if schoolType else ""

        # address and phone under school_m_lx
        addressAndPhone = school.xpath('ul[contains(@class,"school_m_lx")]')
        address = addressAndPhone.xpath("li[1]/b/text()").extract()
        item["address"] = address[0] if address else ""
        item["phone"] = addressAndPhone.xpath("li[2]/b/text()").extract()
        schoollUrl = school_main.xpath("li/h3/a/@href").extract()[0]
        request = Request(schoollUrl, callback=self.parse_schoolIntroUrl)
        request.meta["item"] = item
        return request
开发者ID:lindanXmu,项目名称:GreatSchool,代码行数:37,代码来源:xuexiao.py

示例4: parseJsonImageSet

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
    def parseJsonImageSet(self, response):
        item = response.meta["item"]
        imageSetResponse = response.body
        # make a valid json file out of it, if only one image available it was a list => make a dict
        imageSetResponse = imageSetResponse.replace("/*jsonp*/s7jsonResponse(", "")
        imageSetResponse = ",".join(imageSetResponse.split(",")[:-1])
        imageSetResponse = imageSetResponse.replace('"item":[', '"item":')
        imageSetResponse = imageSetResponse.replace('"item":', '"item":[')
        imageSetResponse = imageSetResponse.replace("}]}}", "}}}")
        imageSetResponse = imageSetResponse[::-1].replace("}}}", "}}]}")[::-1]

        color = response.url.split("-")[1].split("?")[0]
        isImageSet = False
        if len(response.url.split("-")) > 2:
            isImageSet = True
        item["Product_Image_File1"][color] = []

        jsonresponse = json.loads(imageSetResponse)
        for index, imageItem in enumerate(jsonresponse["set"]["item"]):
            # check if there is a image set or only one image
            if "isDefault" not in imageItem["i"]:
                imageUrl = (
                    "http://roadrunnersports.scene7.com/is/image/" + imageItem["i"]["n"] + "?iv=" + imageItem["iv"]
                )
                # response url is image set => image can be scaled
                if isImageSet:
                    imageUrl += "&scl=1"
                item["Product_Image_File1"][color].append(imageUrl)
            else:
                # there is no image set append request for default image
                if item["color"][color] not in item["imageSetUrls"]:
                    item["imageSetUrls"][item["color"][color]] = []
                if item["color"][color] not in item["imageSetUrls2"]:
                    item["imageSetUrls2"][item["color"][color]] = []
                item["imageSetUrls"][item["color"][color]].append(
                    "http://roadrunnersports.scene7.com/is/image/roadrunnersports/"
                    + item["sku"]
                    + "-"
                    + color
                    + "?req=set,json&scl=1"
                )
                item["imageSetUrls2"][item["color"][color]].append(
                    "http://roadrunnersports.scene7.com/is/image/roadrunnersports/"
                    + item["sku"]
                    + "-"
                    + color
                    + "?req=set,json&scl=1"
                )

        if item["imageSetUrls"]:
            color, href = item["imageSetUrls"].popitem()
            if len(href) > 1:
                item["imageSetUrls"][color] = href[1:]
            request = Request(href[0], callback=self.parseJsonImageSet)
            request.meta["item"] = item
            return request

        self.to_csv(item)
        return item
开发者ID:alfonsjose,项目名称:scrapers,代码行数:61,代码来源:RoadRunnerSports.py

示例5: parse_schoolIntroUrl

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
    def parse_schoolIntroUrl(self, response):
        sel = Selector(response)
        item = response.meta["item"]

        schoolIntroUrl = sel.xpath('//div[@class="school_kz fr"]/a/@href').extract()
        link = self.start_urls[0]
        if schoolIntroUrl:
            link = schoolIntroUrl[0]
            request = Request(link, callback=self.parse_items)
            request.meta["item"] = item
            return request
        else:
            return item
开发者ID:lindanXmu,项目名称:GreatSchool,代码行数:15,代码来源:xuexiao.py

示例6: parse

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["item"] [as 别名]
    def parse(self, resp):
        hxs = Selector(resp)

        base_url = "http://www.mangareader.net"
        page_links = hxs.xpath("//select[@id='pageMenu']/option/@value").extract()

        item = MangaImagesItem()
        item["chapter_url"] = resp.url
        item["chapter_name"] = hxs.xpath("//div[@id='mangainfo']//h1/text()").extract()[0]
        item["total_images"] = len(page_links)
        item["image_urls"] = []

        # fetch the images from all the pages
        for i, p in enumerate(page_links):
            page = urljoin_rfc(base_url, p)
            request = Request(page, callback=self.parse_img_url)
            # pass the index of the image for reordering later
            request.meta["index"] = i
            request.meta["item"] = item  # pass the item to the callback
            yield request

        yield item
开发者ID:abusalman,项目名称:manga-downloader-flask,代码行数:24,代码来源:mangareader.py


注:本文中的scrapy.http.request.Request.meta["item"]方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。