当前位置: 首页>>代码示例>>Python>>正文


Python http.Request方法代码示例

本文整理汇总了Python中scrapy.http.Request方法的典型用法代码示例。如果您正苦于以下问题:Python http.Request方法的具体用法?Python http.Request怎么用?Python http.Request使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http的用法示例。


在下文中一共展示了http.Request方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def start_requests(self):
        url = 'https://www.assetstore.unity3d.com/login'
        yield Request(
                url = url,
                headers = {
                    'Accept': 'application/json',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
                    'Connection': 'keep-alive',
                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                    'Host': 'www.assetstore.unity3d.com',
                    'Referer': 'https://www.assetstore.unity3d.com/en/',
                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 '
                                  'Firefox/50.0',
                    'X-Kharma-Version': '0',
                    'X-Requested-With': 'UnityAssetStore',
                    'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41',
                },
                meta = {
                },
                dont_filter = True,
                callback = self.get_unity_version,
                errback = self.error_parse,
        ) 
开发者ID:awolfly9,项目名称:IPProxyTool,代码行数:26,代码来源:assetstore.py

示例2: parse_1

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse_1(self, response):
        info('Parse '+response.url)
        #sel = Selector(response)
        #v = sel.css('.gs_ggs a::attr(href)').extract()
        #import pdb; pdb.set_trace()
        x = self.parse_with_rules(response, self.list_css_rules, dict)
        items = []
        if len(x) > 0:
            items = x[0]['.gs_r']
            pp.pprint(items)
        import pdb; pdb.set_trace()
        # return self.parse_with_rules(response, self.css_rules, googlescholarItem)

        for item in items:
            if item['related-url'] == '' or item['related-type'] != '[PDF]':
                continue
            url = item['related-url']
            info('pdf-url: ' + url)
            yield Request(url, callback=self.save_pdf) 
开发者ID:geekan,项目名称:google-scholar-crawler,代码行数:21,代码来源:spider.py

示例3: request_to_dict

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def request_to_dict(self, request):
        '''
        Convert Request object to a dict.
        modified from scrapy.utils.reqser
        '''
        req_dict = {
            # urls should be safe (safe_string_url)
            'url': to_unicode(request.url),
            'method': request.method,
            'headers': dict(request.headers),
            'body': request.body,
            'cookies': request.cookies,
            'meta': request.meta,
            '_encoding': request._encoding,
            'priority': request.priority,
            'dont_filter': request.dont_filter,
             #  callback/errback are assumed to be a bound instance of the spider
            'callback': None if request.callback is None else request.callback.__name__,
            'errback': None if request.errback is None else request.errback.__name__,
        }
        return req_dict 
开发者ID:istresearch,项目名称:scrapy-cluster,代码行数:23,代码来源:distributed_scheduler.py

示例4: process_spider_output

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def process_spider_output(self, response, result, spider):
        '''
        Ensures the meta data from the response is passed
        through in any Request's generated from the spider
        '''
        self.logger.debug("processing meta passthrough middleware")
        for x in result:
            # only operate on requests
            if isinstance(x, Request):
                self.logger.debug("found request")
                # pass along all known meta fields, only if
                # they were not already set in the spider's new request
                for key in list(response.meta.keys()):
                    if key not in x.meta:
                        x.meta[key] = response.meta[key]
            yield x 
开发者ID:istresearch,项目名称:scrapy-cluster,代码行数:18,代码来源:meta_passthrough_middleware.py

示例5: evaluate

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def evaluate(self, meta_object,
                text, expected_raw, expected_requests):
        request = Request(url='http://www.drudgereport.com',
                          meta=meta_object)
        response = HtmlResponse('drudge.url', body=text, request=request,
                                encoding='utf8')

        raw_item_count = 0
        request_count = 0

        for x in self.spider.parse(response):
            if isinstance(x, RawResponseItem):
                raw_item_count = raw_item_count + 1
            elif isinstance(x, Request):
                request_count = request_count + 1

        self.assertEqual(raw_item_count, expected_raw)
        self.assertEqual(request_count, expected_requests) 
开发者ID:istresearch,项目名称:scrapy-cluster,代码行数:20,代码来源:test_link_spider.py

示例6: get_request

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def get_request(self):
        req = None

        # required
        req = Request('http://ex.com')
        req.meta['crawlid'] = "abc123"
        req.meta['appid'] = "myapp"

        req.meta['url'] = "http://ex.com"
        req.meta['spiderid'] = "link"
        req.meta["attrs"] = None
        req.meta["allowed_domains"] = None
        req.meta["allow_regex"] = None
        req.meta["deny_regex"] = None
        req.meta["deny_extensions"] = None
        req.meta['curdepth'] = 0
        req.meta["maxdepth"] = 0
        req.meta['priority'] = 0
        req.meta['retry_times'] = 0
        req.meta['expires'] = 0
        req.meta['useragent'] = None
        req.meta['cookie'] = None

        return req 
开发者ID:istresearch,项目名称:scrapy-cluster,代码行数:26,代码来源:test_distributed_scheduler.py

示例7: parse_ph_key

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse_ph_key(self,response):
        selector = Selector(response)
        logging.debug('request url:------>' + response.url)
        # logging.info(selector)
        divs = selector.xpath('//div[@class="phimage"]')
        for div in divs:
            viewkey = re.findall('viewkey=(.*?)"',div.extract())
            # logging.debug(viewkey)
            yield Request(url='https://www.pornhub.com/embed/%s' % viewkey[0],callback = self.parse_ph_info)
        url_next = selector.xpath('//a[@class="orangeButton" and text()="Next"]/@href').extract()
        # logging.debug(url_next)
        if url_next:
        # if self.test:
            logging.debug(' next page:---------->' + self.host+url_next[0])
            yield Request(url=self.host+url_next[0],callback=self.parse_ph_key)
            # self.test = False 
开发者ID:ceres993434,项目名称:PornHubBot,代码行数:18,代码来源:pornHubSpider.py

示例8: init_request

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def init_request(self):
        """This function is called before crawling starts."""

        # Do not start a request on error,
        # simply return nothing and quit scrapy
        if self.abort:
            return

        logging.info('All set, start crawling with depth: ' + str(self.max_depth))

        # Do a login
        if self.config['login']['enabled']:
            # Start with login first
            logging.info('Login required')
            return Request(url=self.login_url, callback=self.login)
        else:
            # Start with pase function
            logging.info('Not login required')
            return Request(url=self.base_url, callback=self.parse)



    #---------------------------------------------------------------------- 
开发者ID:cytopia,项目名称:crawlpy,代码行数:25,代码来源:crawlpy_spider.py

示例9: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse(self, response):
        for a in response.xpath("//dd/a"):
            url = a.xpath("./@href").extract()[0]
            text = a.xpath("./text()").extract()[0]

            items = text.split(u'升级软件')
            version = items[-1].strip()
            product = items[0].strip().split(u'(')[0].split(' ')[0]

            yield Request(
                url=self.base_url.format(url),
                headers={"Referer": response.url},
                meta={
                    "product":product,
                    "version":version,
                },
                callback=self.parse_product) 
开发者ID:firmadyne,项目名称:scraper,代码行数:19,代码来源:tenda_zh.py

示例10: parse_product

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse_product(self, response):
        # Find the "Software and Firmware" tab link to get to the product-range-download page
        meta = response.meta
        meta['dont_redirect'] = True
        for link in response.css('a.tab-link'):
            href = link.xpath('@href').extract_first()
            if href.endswith(u'software-firmware-tab'):
                logging.debug("Requesting SW+FW page for %s at %s",
                        response.meta['product'], urlparse.urljoin(response.url, href))

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta=meta,
                    callback=self.parse_product_sw_fw)

                break
        else:
            logging.debug("Did not find a 'Software and Firmware' tab for %s",
                    response.meta['product']) 
开发者ID:firmadyne,项目名称:scraper,代码行数:22,代码来源:se.py

示例11: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse(self, response):
        if not response.xpath(
                "//form[@id='productSearchForm']//input[@name='category']/@value").extract()[0]:
            for category in response.xpath("//form[@id='productSearchForm']/div[1]//ul[@class='select-options']//a/@data-id").extract():
                yield FormRequest.from_response(response,
                                                formname="productSearchForm",
                                                formdata={
                                                    "category": category},
                                                callback=self.parse)
        elif not response.xpath("//form[@id='productSearchForm']//input[@name='subCategory']/@value").extract()[0]:
            for subcategory in response.xpath("//form[@id='productSearchForm']/div[2]//ul[@class='select-options']//a/@data-id").extract():
                yield FormRequest.from_response(response,
                                                formname="productSearchForm",
                                                formdata={
                                                    "subCategory": subcategory},
                                                callback=self.parse)
        else:
            for product in response.xpath("//form[@id='productSearchForm']/div[3]//ul[@class='select-options']//a/@data-id").extract():
                yield Request(
                    url=urlparse.urljoin(
                        response.url, "/us/support-product?pid=%s" % (product)),
                    headers={"Referer": response.url},
                    callback=self.parse_product) 
开发者ID:firmadyne,项目名称:scraper,代码行数:25,代码来源:belkin.py

示例12: parse_json

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse_json(self, response):
        json_response = json.loads(response.body_as_unicode())

        if json_response:
            for entry in json_response:
                yield Request(
                    url=urlparse.urljoin(
                        self.base_path, "/getMenuList.html?action=getsubcatlist&catid=%s&appPath=us" % entry["id"]),
                    meta={"cid": entry["id"]},
                    headers={"Referer": response.url,
                             "X-Requested-With": "XMLHttpRequest"},
                    callback=self.parse_json)
        else:
            yield Request(
                url=urlparse.urljoin(
                    self.base_path, "phppage/down-load-model-list.html?showEndLife=false&catid={}&appPath=us".format(response.meta["cid"])),
                headers={"Referer": response.url,
                         "X-Requested-With": "XMLHttpRequest"},
                callback=self.parse_products) 
开发者ID:firmadyne,项目名称:scraper,代码行数:21,代码来源:tp-link_en.py

示例13: parse_product_version

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def parse_product_version(self, response):
        # <div class="hardware-version">
        if response.xpath("//div[@class=\"hardware-version\"]").extract():
            for i in [1, 2]:
                yield Request(
                    url = response.url.replace(".html", "-V{}.html".format(i)),
                    meta = {"product": response.meta['product'],
                            "version": "V{}".format(int(i)+1),
                            },
                    callback = self.parse_product)

        else: #only for v1?
            yield Request(
                url = response.url + "?again=true",
                meta = {"product": response.meta['product'],
                        "version": "V1"
                        },
                callback = self.parse_product) 
开发者ID:firmadyne,项目名称:scraper,代码行数:20,代码来源:tp-link_en.py

示例14: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def start_requests(self):
        for url in self.start_urls:
            yield scrapy.Request(
                url,
                callback=self.parse,
                errback=self.parse_error,
                dont_filter=True,
                meta={
                    "current_request_traversal_page_count": 0,
                    "spider_config": self.spider_config,
                    "manifest": self.manifest
                }
            ) 
开发者ID:invanalabs,项目名称:invana-bot,代码行数:15,代码来源:base.py

示例15: _build_request

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Request [as 别名]
def _build_request(self, rule, link):
        headers = {}
        user_agent_header = os.environ.get("WCP_REQUEST_HEADERS_USER_AGENT")
        if user_agent_header:
            headers = {"User-Agent": user_agent_header}
        r = Request(url=link.url, headers=headers, callback=self._response_downloaded)
        r.meta.update(rule=rule, link_text=link.text)
        return r 
开发者ID:invanalabs,项目名称:invana-bot,代码行数:10,代码来源:base.py


注:本文中的scrapy.http.Request方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。