当前位置: 首页>>代码示例>>Python>>正文


Python Request.meta["fobj"]方法代码示例

本文整理汇总了Python中scrapy.http.request.Request.meta["fobj"]方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta["fobj"]方法的具体用法?Python Request.meta["fobj"]怎么用?Python Request.meta["fobj"]使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http.request.Request的用法示例。


在下文中一共展示了Request.meta["fobj"]方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["fobj"] [as 别名]
    def parse(self, response):

        tabs = []
        tab_selector = response.xpath('//div[@id="siteDirectory"]')
        ### loop for all tabs
        for tab in tab_selector.xpath('.//div[@class="popover-grouping"]'):
            tabNameSel = tab.xpath("h2/text()").extract()

            if tabNameSel:
                tabName = tabNameSel[0]

                fobj = open(tabName + ".txt", "a+")

            cat_selector = tab.xpath(".//ul")

            ### loop for all categories
            for category in cat_selector.xpath("li"):  #'.//div[contains(@class, "ht180")]
                catNameSel = category.xpath(
                    "a/text()"
                ).extract()  # //div[contains(@class, "top-menu unit")]/ul/li/div/div/div/ul/li[@class="heading"]
                # print category.extract()
                if catNameSel:
                    catName = catNameSel[0]
                catLinkSel = category.xpath("a/@href").extract()
                if catLinkSel:
                    catLink = "http://www.amazon.in" + catLinkSel[0]

                request = Request(catLink, callback=self.parse_subcatpage)
                request.meta["fobj"] = fobj
                request.meta["tabName"] = tabName
                request.meta["catName"] = catName
                yield request

        fobj.close()
开发者ID:ank-26,项目名称:Ecomm,代码行数:36,代码来源:amazon_spider.py

示例2: parse_subcatpage

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta["fobj"] [as 别名]
    def parse_subcatpage(self, response):
        fobj = response.meta["fobj"]
        tabName = response.meta["tabName"]
        catName = response.meta["catName"]

        subcatSel = response.xpath('//div[@id="refinements"]')
        if subcatSel:  ## in few categories subcategories are not present
            subcatNameSel = subcatSel.xpath("div/ul/li/a/span[1]/text()").extract()
            if subcatNameSel:
                subcatName = subcatNameSel[0]
                print subcatName + "sub category......"

        filter_selector = subcatSel
        index = 1
        for filtr in filter_selector.xpath(".//h2"):
            filterNameSel = filtr.xpath("text()").extract()
            # print  filterNameSel
            if filterNameSel:
                filterName = filterNameSel[0]
                result = str("\n" + tabName + " -> " + catName + " -> " + filterName + " -> ")

            # valueSelector = filter_selector.xpath('.//ul['+str(index)+']') ##fliters = filters(error in SD site)
            valueSelector = filtr.xpath(".//following-sibling::ul[1]")
            valueLinkSel = valueSelector.xpath('.//a[span[contains(@class, "seeMore")]]/@href').extract()
            if valueLinkSel:
                valueLink = "http://www.amazon.in" + valueLinkSel[0]
                requestValue = Request(valueLink, callback=self.parse_valuepage)
                requestValue.meta["fobj"] = fobj
                requestValue.meta["tabName"] = tabName
                requestValue.meta["catName"] = catName
                requestValue.meta["filterName"] = filterName

                yield requestValue
            # print valueSelector.extract()
            else:
                print result.encode("utf-8")
                if fobj.closed:
                    fobj = open(fobj.name, "a+")

                fobj.write(result.encode("utf-8"))
                for value in valueSelector.xpath("li"):
                    valueNameSel = value.xpath(".//span/text()").extract()
                    if valueNameSel:
                        valueName = valueNameSel[0].strip() + " | "
                        print valueName.encode("utf-8")
                        fobj.write(valueName.encode("utf-8"))

            index += 1
        #### brand filter has a different structure here, hence extra code for that
        brand_sel = response.xpath('//ul[@id="ref_3837712031"]')
        print brand_sel.extract()

        brandLinkSel = brand_sel.xpath('li/a[span[contains(@class, "seeMore")]]/@href').extract()
        print "brand link "
        print brandLinkSel
        if brandLinkSel:
            brandLink = "http://www.amazon.in" + brandLinkSel[0]
            print brandLink
            requestBrand = Request(brandLink, callback=self.parse_valuepage)
            requestBrand.meta["fobj"] = fobj
            requestBrand.meta["tabName"] = tabName
            requestBrand.meta["catName"] = catName
            requestBrand.meta["filterName"] = "Brands"
            yield requestBrand
        else:

            filterName = "Brands"
            result = str("\n" + tabName + " -> " + catName + " -> " + filterName + " -> ")
            print result.encode("utf-8")
            for value in brand_sel.xpath('ul[@class="groupMultiSel"]/li'):
                valueNameSel = value.xpath(".//a/span[1]/text()").extract()
                if valueNameSel:
                    valueName = valueNameSel[0].strip() + " | "
                    print valueName.encode("utf-8")
                    fobj.write(valueName.encode("utf-8"))
开发者ID:ank-26,项目名称:Ecomm,代码行数:77,代码来源:amazon_spider.py


注:本文中的scrapy.http.request.Request.meta["fobj"]方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。