当前位置: 首页>>代码示例>>Python>>正文


Python FormRequest.from_response方法代码示例

本文整理汇总了Python中scrapy.http.FormRequest.from_response方法的典型用法代码示例。如果您正苦于以下问题:Python FormRequest.from_response方法的具体用法?Python FormRequest.from_response怎么用?Python FormRequest.from_response使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http.FormRequest的用法示例。


在下文中一共展示了FormRequest.from_response方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def login(self, response):
        """Generate a login request."""

        # Add CSRF data to login.
        # Note: scrapy already does this automatically, if it finds
        # pre-filled input fields. If everything works without having
        # to use this custom csrf feature, it could be removed in the future.
        if self.config['login']['csrf']['enabled']:
            field = self.config['login']['csrf']['field']
            csrf = response.xpath('//input[@name="' + field + '"]/@value')[0].extract()
            self.config['login']['fields'][field] = csrf
            logging.info('Adding CSRF data to login. Field: "' + field + '" | value: "' + csrf + "'")

        return FormRequest.from_response(
            response,
            formdata=self.config['login']['fields'],
            method=self.config['login']['method'],
            dont_filter=True,
            callback=self.post_login
        )


    #---------------------------------------------------------------------- 
开发者ID:cytopia,项目名称:crawlpy,代码行数:25,代码来源:crawlpy_spider.py

示例2: parse

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def parse(self, response):
        if not response.xpath(
                "//form[@id='productSearchForm']//input[@name='category']/@value").extract()[0]:
            for category in response.xpath("//form[@id='productSearchForm']/div[1]//ul[@class='select-options']//a/@data-id").extract():
                yield FormRequest.from_response(response,
                                                formname="productSearchForm",
                                                formdata={
                                                    "category": category},
                                                callback=self.parse)
        elif not response.xpath("//form[@id='productSearchForm']//input[@name='subCategory']/@value").extract()[0]:
            for subcategory in response.xpath("//form[@id='productSearchForm']/div[2]//ul[@class='select-options']//a/@data-id").extract():
                yield FormRequest.from_response(response,
                                                formname="productSearchForm",
                                                formdata={
                                                    "subCategory": subcategory},
                                                callback=self.parse)
        else:
            for product in response.xpath("//form[@id='productSearchForm']/div[3]//ul[@class='select-options']//a/@data-id").extract():
                yield Request(
                    url=urlparse.urljoin(
                        response.url, "/us/support-product?pid=%s" % (product)),
                    headers={"Referer": response.url},
                    callback=self.parse_product) 
开发者ID:firmadyne,项目名称:scraper,代码行数:25,代码来源:belkin.py

示例3: post_login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def post_login(self, response):
        print 'Preparing login'
        #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
        xsrf = Selector(response).xpath('//input[@name="_xsrf"]/@value').extract()[0]
        print xsrf
        #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单
        #登陆成功后, 会调用after_login回调函数
        return [FormRequest.from_response(response,   #"http://www.zhihu.com/login",
                            meta = {'cookiejar' : response.meta['cookiejar']},
                            headers = self.headers,
                            formdata = {
                            '_xsrf': xsrf,
                            'email': '123456',
                            'password': '123456'
                            },
                            callback = self.after_login,
                            dont_filter = True
                            )] 
开发者ID:Andrew-liu,项目名称:scrapy_example,代码行数:20,代码来源:zhihu_spider.py

示例4: process_response

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def process_response(self, request, response, spider):
        captchaUrl = self.findCaptchaUrl(response.text)
        if captchaUrl is None:
            return response; # No CAPTCHA is present
        elif request.meta.get(RETRY_KEY, self.MAX_CAPTCHA_ATTEMPTS) == self.MAX_CAPTCHA_ATTEMPTS:
            logger.warning("Too many CAPTCHA attempts; surrendering.")
            raise IgnoreRequest
        captchaSolution = solveCaptcha(imgUrl=captchaUrl, brazen=True)
        if captchaSolution is None:
            logger.error("CAPTCHA page detected, but no solution was proposed.")
            raise IgnoreRequest
        # Return a request to submit the captcha
        logger.info("Submitting solution %s for CAPTCHA at %s", captchaSolution, captchaUrl)
        formRequest = FormRequest.from_response(
            response, formnumber=0, formdata={self.findCaptchaField(response.text):captchaSolution})
        formRequest.meta[RETRY_KEY] = request.meta.get('captcha_retries', 0) + 1
        return formRequest 
开发者ID:owen9825,项目名称:captcha-middleware,代码行数:19,代码来源:middleware.py

示例5: post_login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def post_login(self, response):
        print 'Preparing login'
        # 下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
        xsrf = Selector(response).xpath('//input[@name="_xsrf"]/@value').extract()[0]
        print xsrf
        # FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单
        # 登陆成功后, 会调用after_login回调函数
        return [FormRequest.from_response(response,  # "http://www.zhihu.com/login",
                                          meta={'cookiejar': response.meta['cookiejar']},
                                          headers=self.headers,  # 注意此处的headers
                                          formdata={
                                              '_xsrf': xsrf,
                                              'email': '1009137312@qq.com',
                                              'password': '6yhn6yhn'
                                          },
                                          callback=self.after_login,
                                          dont_filter=True
                                          )] 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:20,代码来源:zhihu_spider.py

示例6: post_login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def post_login(self, response):
        # username and password from settings.py
        self.set_username_and_password()
        username, password = PixivSpider.username, PixivSpider.password
        # check username and password
        if username is None or password is None:
            raise CloseSpider('username or password is null!')

        self.logger.debug('Preparing login, username = %s password = %s' % (username, password))
        post_key = response.css('#old-login input[name=post_key]::attr(value)').extract_first()
        # FormRequest for dealing with HTML forms
        # function from_response for simulate a user login
        self.headers['Referer'] = response.url
        return FormRequest.from_response(
            response,
            meta={'cookiejar': response.meta['cookiejar']},
            headers=self.headers,
            formdata={
                'pixiv_id': username,
                'password': password,
                'post_key': post_key,
                'mode': 'login'
            },
            callback=self.after_login,
            dont_filter=True
        )

    # username and password from settings.py 
开发者ID:SylvanasSun,项目名称:scrapy-picture-spider,代码行数:30,代码来源:pixiv_spider.py

示例7: parse

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def parse(self, response):
        '''
        Handle login with provided credentials
        '''
        return FormRequest.from_response(
                response,
                formxpath='//form[contains(@action, "login")]',
                formdata={'email': self.email,'pass': self.password},
                callback=self.parse_home
                ) 
开发者ID:rugantio,项目名称:fbcrawl,代码行数:12,代码来源:fbcrawl.py

示例8: _steady_login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def _steady_login(self, response):
        response = yield scrapy.Request(
            "https://steadyhq.com/oauth/authorize?"
            + "client_id=0c29f006-1a98-48f1-8a63-2c0652c59f28&"
            + "redirect_uri=https://uebermedien.de&scope=read&"
            + "response_type=code&refresh_only=false",
            meta={"cache_expires": timedelta(days=1)},
        )

        response = yield FormRequest.from_response(
            response,
            formdata=OrderedDict(
                [("user[email]", self._username), ("user[password]", self._password)]
            ),
            dont_filter=True,
            meta={"handle_httpstatus_list": [301], "cache_expires": timedelta(days=1)},
        )

        try:
            code = parse_qs(urlparse(response.url).query)["code"][0]
        except KeyError:
            self.logger.error("Login failed: Wrong username and password")
            return

        body = OrderedDict(
            [
                ("client_id", "0c29f006-1a98-48f1-8a63-2c0652c59f28"),
                ("grant_type", "authorization_code"),
                ("code", code),
                ("redirect_uri", "https://uebermedien.de"),
            ]
        )
        response = yield scrapy.Request(
            "https://steadyhq.com/api/v1/oauth/token",
            method="POST",
            body=json.dumps(body),
            headers={"Accept": "application/json", "Content-Type": "application/json"},
            meta={"cache_expires": timedelta(days=1)},
        )
        self._steady_token = json.loads(response.text)["access_token"] 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:42,代码来源:uebermedien_de.py

示例9: parse

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def parse(self, response):
        sel = scrapy.Selector(response)
        item = DmozItem()
        sel = scrapy.Selector(response)
        href = str(response.url)
        hidden = sel.xpath("//input[@name='__RequestVerificationToken']/@value").extract()
        return [FormRequest.from_response(response, \
                                          formdata=self.formdata, \
                                          headers=self.headers, \
                                          meta={
                                              '__RequestVerificationToken': 'BSDY33UtJXv0XqMkIvAJXAdMXC-jqACBsiZb6-mx4uW8Hr89aArTh9DfLtQFDh6NwQsqHXiZMTzheuim3ETI78PhOzQf263wliXL8ArkTrA1'}, \
                                          callback=self.parse_item)] 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:14,代码来源:a51newren.py

示例10: login

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def login(self, response):
        self._log_page(response, 'amazon_login.html')
        return [FormRequest.from_response(response, \
                                          formdata=self.formdata, \
                                          headers=self.headers, \
                                          meta={'cookiejar': response.meta['cookiejar']}, \
                                          callback=self.parse_item)]  # success login 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:9,代码来源:AmazonSpider.py

示例11: parse_home

# 需要导入模块: from scrapy.http import FormRequest [as 别名]
# 或者: from scrapy.http.FormRequest import from_response [as 别名]
def parse_home(self, response):
        '''
        This method has multiple purposes:
        1) Handle failed logins due to facebook 'save-device' redirection
        2) Set language interface, if not already provided
        3) Navigate to given page 
        '''
        #handle 'save-device' redirection
        if response.xpath("//div/a[contains(@href,'save-device')]"):
            self.logger.info('Going through the "save-device" checkpoint')
            return FormRequest.from_response(
                response,
                formdata={'name_action_selected': 'dont_save'},
                callback=self.parse_home
                )
            
        #set language interface
        if self.lang == '_':
            if response.xpath("//input[@placeholder='Search Facebook']"):
                self.logger.info('Language recognized: lang="en"')
                self.lang = 'en'
            elif response.xpath("//input[@placeholder='Buscar en Facebook']"):
                self.logger.info('Language recognized: lang="es"')
                self.lang = 'es'
            elif response.xpath("//input[@placeholder='Rechercher sur Facebook']"):
                self.logger.info('Language recognized: lang="fr"')
                self.lang = 'fr'
            elif response.xpath("//input[@placeholder='Cerca su Facebook']"):
                self.logger.info('Language recognized: lang="it"')
                self.lang = 'it'
            elif response.xpath("//input[@placeholder='Pesquisa no Facebook']"):
                self.logger.info('Language recognized: lang="pt"')
                self.lang = 'pt'
            else:
                raise AttributeError('Language not recognized\n'
                                     'Change your interface lang from facebook ' 
                                     'and try again')
                                                                 
        #navigate to provided page
        href = response.urljoin(self.page)
        self.logger.info('Scraping facebook page {}'.format(href))
        return scrapy.Request(url=href,callback=self.parse_page,meta={'index':1}) 
开发者ID:rugantio,项目名称:fbcrawl,代码行数:44,代码来源:fbcrawl.py


注:本文中的scrapy.http.FormRequest.from_response方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。