當前位置: 首頁>>代碼示例>>Python>>正文


Python Request.meta["fobj"]方法代碼示例

本文整理匯總了Python中scrapy.http.request.Request.meta["fobj"]方法的典型用法代碼示例。如果您正苦於以下問題:Python Request.meta["fobj"]方法的具體用法?Python Request.meta["fobj"]怎麽用?Python Request.meta["fobj"]使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.http.request.Request的用法示例。


在下文中一共展示了Request.meta["fobj"]方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: parse

# 需要導入模塊: from scrapy.http.request import Request [as 別名]
# 或者: from scrapy.http.request.Request import meta["fobj"] [as 別名]
    def parse(self, response):

        tabs = []
        tab_selector = response.xpath('//div[@id="siteDirectory"]')
        ### loop for all tabs
        for tab in tab_selector.xpath('.//div[@class="popover-grouping"]'):
            tabNameSel = tab.xpath("h2/text()").extract()

            if tabNameSel:
                tabName = tabNameSel[0]

                fobj = open(tabName + ".txt", "a+")

            cat_selector = tab.xpath(".//ul")

            ### loop for all categories
            for category in cat_selector.xpath("li"):  #'.//div[contains(@class, "ht180")]
                catNameSel = category.xpath(
                    "a/text()"
                ).extract()  # //div[contains(@class, "top-menu unit")]/ul/li/div/div/div/ul/li[@class="heading"]
                # print category.extract()
                if catNameSel:
                    catName = catNameSel[0]
                catLinkSel = category.xpath("a/@href").extract()
                if catLinkSel:
                    catLink = "http://www.amazon.in" + catLinkSel[0]

                request = Request(catLink, callback=self.parse_subcatpage)
                request.meta["fobj"] = fobj
                request.meta["tabName"] = tabName
                request.meta["catName"] = catName
                yield request

        fobj.close()
開發者ID:ank-26,項目名稱:Ecomm,代碼行數:36,代碼來源:amazon_spider.py

示例2: parse_subcatpage

# 需要導入模塊: from scrapy.http.request import Request [as 別名]
# 或者: from scrapy.http.request.Request import meta["fobj"] [as 別名]
    def parse_subcatpage(self, response):
        fobj = response.meta["fobj"]
        tabName = response.meta["tabName"]
        catName = response.meta["catName"]

        subcatSel = response.xpath('//div[@id="refinements"]')
        if subcatSel:  ## in few categories subcategories are not present
            subcatNameSel = subcatSel.xpath("div/ul/li/a/span[1]/text()").extract()
            if subcatNameSel:
                subcatName = subcatNameSel[0]
                print subcatName + "sub category......"

        filter_selector = subcatSel
        index = 1
        for filtr in filter_selector.xpath(".//h2"):
            filterNameSel = filtr.xpath("text()").extract()
            # print  filterNameSel
            if filterNameSel:
                filterName = filterNameSel[0]
                result = str("\n" + tabName + " -> " + catName + " -> " + filterName + " -> ")

            # valueSelector = filter_selector.xpath('.//ul['+str(index)+']') ##fliters = filters(error in SD site)
            valueSelector = filtr.xpath(".//following-sibling::ul[1]")
            valueLinkSel = valueSelector.xpath('.//a[span[contains(@class, "seeMore")]]/@href').extract()
            if valueLinkSel:
                valueLink = "http://www.amazon.in" + valueLinkSel[0]
                requestValue = Request(valueLink, callback=self.parse_valuepage)
                requestValue.meta["fobj"] = fobj
                requestValue.meta["tabName"] = tabName
                requestValue.meta["catName"] = catName
                requestValue.meta["filterName"] = filterName

                yield requestValue
            # print valueSelector.extract()
            else:
                print result.encode("utf-8")
                if fobj.closed:
                    fobj = open(fobj.name, "a+")

                fobj.write(result.encode("utf-8"))
                for value in valueSelector.xpath("li"):
                    valueNameSel = value.xpath(".//span/text()").extract()
                    if valueNameSel:
                        valueName = valueNameSel[0].strip() + " | "
                        print valueName.encode("utf-8")
                        fobj.write(valueName.encode("utf-8"))

            index += 1
        #### brand filter has a different structure here, hence extra code for that
        brand_sel = response.xpath('//ul[@id="ref_3837712031"]')
        print brand_sel.extract()

        brandLinkSel = brand_sel.xpath('li/a[span[contains(@class, "seeMore")]]/@href').extract()
        print "brand link "
        print brandLinkSel
        if brandLinkSel:
            brandLink = "http://www.amazon.in" + brandLinkSel[0]
            print brandLink
            requestBrand = Request(brandLink, callback=self.parse_valuepage)
            requestBrand.meta["fobj"] = fobj
            requestBrand.meta["tabName"] = tabName
            requestBrand.meta["catName"] = catName
            requestBrand.meta["filterName"] = "Brands"
            yield requestBrand
        else:

            filterName = "Brands"
            result = str("\n" + tabName + " -> " + catName + " -> " + filterName + " -> ")
            print result.encode("utf-8")
            for value in brand_sel.xpath('ul[@class="groupMultiSel"]/li'):
                valueNameSel = value.xpath(".//a/span[1]/text()").extract()
                if valueNameSel:
                    valueName = valueNameSel[0].strip() + " | "
                    print valueName.encode("utf-8")
                    fobj.write(valueName.encode("utf-8"))
開發者ID:ank-26,項目名稱:Ecomm,代碼行數:77,代碼來源:amazon_spider.py


注:本文中的scrapy.http.request.Request.meta["fobj"]方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。