当前位置: 首页>>代码示例>>Python>>正文


Python scrapy.FormRequest类代码示例

本文整理汇总了Python中scrapy.FormRequest的典型用法代码示例。如果您正苦于以下问题:Python FormRequest类的具体用法?Python FormRequest怎么用?Python FormRequest使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了FormRequest类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _request_number_of_pages

    def _request_number_of_pages(self, date_str):
        url = self.base_url + '/consulta_paginarBusquedaVisitas'

        request = FormRequest(url=url,
                              meta={
                                  'date': date_str,
                              },
                              formdata={
                                  'fechaDesde': date_str,
                                  'fechaHasta': date_str,
                                  'paginaActual': '1',
                                  'visita.visitanteNombres': '',
                                  'visita.personalNombre': '',
                                  'visita.oficinaNombre': '',
                                  'visita.sedeId': '00',
                                  'visita.ano': '',
                                  'visita.mes': '',
                                  'visita.fechaIngreso': '',
                                  'paginaNueva': '0',
                                  'visita.visitanteId': '0',
                                  'visita.personalId': '0',
                                  'visita.oficinaId': '0',
                              },
                              dont_filter=True,
                              callback=self.parse_initial_request)

        request.meta['date'] = date_str
        return request
开发者ID:manolo-rocks,项目名称:manolo_scraper,代码行数:28,代码来源:justicia.py

示例2: _parse_list

 def _parse_list(self, response):
     report_list = response.xpath("//div[@class=\"reportlist bluelink\"]/ul//a/@href")
     for report_url in report_list:
         request = FormRequest(urljoin(self.base_url, report_url.extract()), callback=self.parse_item, dont_filter=False)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = response.meta["report_type"]
         yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:8,代码来源:chinaidrSpider.py

示例3: parse

 def parse(self, response):
     large_categories = response.xpath(".//*[@class='rptmap']//strong//a")
     for large_category in large_categories:
         large_category_name = clean_text(large_category.xpath("./text()").extract()[0].strip())
         page_url = large_category.xpath("./@href").extract()[0]
         url = urljoin(self.base_url, page_url)
         request = FormRequest(url, callback=self.parse_middle_category, dont_filter=True)
         request.meta["large_category"] = large_category_name
         yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:9,代码来源:IndustryReportSpiderOcn.py

示例4: parse_middle_category

 def parse_middle_category(self, response):
     report_types = response.xpath(u"//li[contains(text(),'报告')]")
     for report_type in report_types:
         mid_category_url = urljoin(self.base_url, report_type.xpath(u"./preceding-sibling::span[1]/a/@href").extract()[0])
         request = FormRequest(mid_category_url, callback=self.parse_page, dont_filter=True)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = clean_text(report_type.xpath("./text()").extract()[0].strip())
         request.meta["page_base_url"] = mid_category_url
         yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:10,代码来源:chinaidrSpider.py

示例5: parse_middle_category

 def parse_middle_category(self, response):
     mid_categories = response.xpath(".//*[@class='report2']//h2//a")
     for mid_category in mid_categories:
         mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0].strip())
         page_url = mid_category.xpath("./@href").extract()[0]
         url = urljoin(self.base_url, page_url)
         request = FormRequest(url, callback=self._parse_item, dont_filter=True)
         request.meta["large_category"] = response.meta["large_category"]
         request.meta["mid_category"] = mid_category_name
         request.meta["first_url"] = url
         yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:11,代码来源:IndustryReportSpiderOcn.py

示例6: parse

 def parse(self, response):
     large_categories = response.xpath("//*[@class='tabContent bluelink']//*[contains(@style, 'padding')]/a")
     for large_category in large_categories:
         large_category_name = clean_text(large_category.xpath(".//text()").extract()[0].strip())
         mid_categorys = large_category.xpath("./parent::*/following-sibling::*[1]/a")
         for mid_category in mid_categorys:
             mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0])
             mid_category_url = urljoin(self.base_url, mid_category.xpath("./@href").extract()[0])
             request = FormRequest(mid_category_url, callback=self.parse_middle_category, dont_filter=True)
             request.meta["large_category_name"] = large_category_name
             request.meta["mid_category_name"] = mid_category_name
             yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:12,代码来源:chinaidrSpider.py

示例7: _parse_page_free

 def _parse_page_free(self, response):
     total_pages = int(clean_text(response.xpath(".//*[@class='pages']//a//text()").extract()[-2].strip()))
     first_url = response.meta["first_url"]
     request = FormRequest(first_url, callback=self._parse_free, dont_filter=True)
     request.meta["large_category"] = response.meta["large_category"]
     yield request
     if total_pages>1:
         for i in xrange(1,total_pages):
             next_page = first_url[:-5] + '-p' + str(i+1) + '.html'
             request = FormRequest(next_page, callback=self._parse_free, dont_filter=True)
             request.meta["large_category"] = response.meta["large_category"]
             yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:12,代码来源:IndustryReportSpider51report.py

示例8: parse

 def parse(self, response):
     large_categories = response.xpath(".//*[@class='shopleft_bt']//a")
     middle_categories = response.xpath(".//*[@class='shopnav2']")
     for i in xrange(len(large_categories)):
         large_category_name = clean_text(large_categories[i].xpath("./text()").extract()[0].strip())
         middle_category_list = middle_categories[i].xpath(".//*[@class='shopleft_wt']")
         for middle_category in middle_category_list:
             middle_category_name = clean_text(middle_category.xpath(".//a/text()").extract())
             page_url = middle_category.xpath(".//a//@href").extract()[0]
             url = urljoin(self.base_url, page_url)
             request = FormRequest(url, callback=self._parse_item, dont_filter=True)
             request.meta["large_category"] = large_category_name
             request.meta["mid_category"] = middle_category_name
             yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:14,代码来源:IndustryReportSpiderOlxoz.py

示例9: _request_next_page

    def _request_next_page(self, response, date_str, callback):
        current_page = int(response.meta['current_page'])

        total_string = response.css('#LblTotal').xpath('./text()').extract_first(default='')

        total = re.search(r'(\d+)', total_string)

        if total:
            # Deal with the next page.
            total = total.group(1)
            number_of_pages = self._get_number_of_pages(int(total))

            if current_page < number_of_pages:
                current_page += 1

                formdata = {
                    'TxtFecha': date_str,
                    'BtnBuscar': 'Buscar',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.x': '1',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.y': '1'
                }

                request = FormRequest.from_response(response,
                                                    formdata=formdata,
                                                    dont_click=True,
                                                    dont_filter=True,
                                                    callback=callback,
                                                    )

                request.meta['date'] = date_str
                request.meta['current_page'] = current_page

                return request
开发者ID:andyfires,项目名称:manolo_scraper,代码行数:33,代码来源:congreso.py

示例10: parse

    def parse(self, response):
        """
        这是默认的回调方法,得到response后:
        1. 如果需要登录,则先通过FormRequest登录论坛;
        2. 如果不需要登录,通过Request继续请求;
        :param response:
        :return:
        """
        # 需要登录,使用FormRequest.from_response模拟登录
        if 'id="lsform"' in response.body:
            logging.info('in parse, need to login, url: {0}'.format(response.url))
            form_data = {'handlekey': 'ls', 'quickforward': 'yes', 'username': 'daniell123', 'password': 'admin123'}
            request = FormRequest.from_response(response=response,
                                                headers=self.headers,
                                                formxpath='//form[contains(@id, "lsform")]',
                                                formdata=form_data,
                                                callback=self.parse_list
                                                )
        else:
            logging.info('in parse, NOT need to login, url: {0}'.format(response.url))
            request = Request(url=response.url,
                              headers=self.headers,
                              callback=self.parse_list,
                              )

        yield request
开发者ID:allhu,项目名称:scrapy_in_practice,代码行数:26,代码来源:xiaochuncnjp_spider.py

示例11: parse

    def parse(self, response) :
#        test_urls = [
#        "http://ntiaoji.kaoyan.com/tjadm/1.html",
#        "http://ntiaoji.kaoyan.com/tjadm/2.html",
#        "http://ntiaoji.kaoyan.com/tjadm/3.html",
#        "http://ntiaoji.kaoyan.com/tjadm/4.html",
#        "http://ntiaoji.kaoyan.com/tjadm/5.html",
#        "http://ntiaoji.kaoyan.com/tjadm/6.html",
#        "http://ntiaoji.kaoyan.com/tjadm/7.html"
#	]
#
#	for url in test_urls :
#	    print url
#	    time.sleep(2)
#	    self.headers['Referer'] = url
#            yield FormRequest.from_response(response,
#	        headers = self.headers,
#	        formdata = {
#	        'username' : 'kytj1',
#	        'password' : '6ujBJ4XQyLeGmJmB'
#	        },
#	        callback = self.download_page,
#	        dont_filter = True
#	    )
        return FormRequest.from_response(response,
	    headers = self.headers,
	    formdata = {
	        'username' : 'kytj1',
	        'password' : '6ujBJ4XQyLeGmJmB'
	    },
	    callback = self.after_login,
	    dont_filter = True
        )
开发者ID:TonyDoen,项目名称:python_code_review,代码行数:33,代码来源:dmoz_spider.py

示例12: parse_page

 def parse_page(self, response):
     request_list = self._parse_list(response)
     for r in request_list:
         yield r
     next_page = response.xpath(u"//*[@id='AspNetPager1']/a[text()=\"下一页\"]/@href")
     if len(next_page) > 0:
         next_page_url = urljoin(self.base_url, next_page.extract()[0])
         if not next_page_url.startswith(response.meta["page_base_url"]):
             if next_page_url.endswith("html"):
                 next_page_url = response.meta["page_base_url"] + next_page_url[next_page_url.rindex("/") + 1:len(next_page_url)]
         request = FormRequest(next_page_url, callback=self.parse_page, dont_filter=True)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = response.meta["report_type"]
         request.meta["page_base_url"] = response.meta["page_base_url"]
         yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:16,代码来源:chinaidrSpider.py

示例13: parse

 def parse(self, response):
     form_data = {'username': '[email protected]', 'password': '123456', 'remember_me': '1'}
     return FormRequest.from_response(response,
                                      headers=self.headers,
                                      formxpath='//form[@class="form-login"]',
                                      formdata=form_data,
                                      callback=self.after_login,
                                      )
开发者ID:allhu,项目名称:scrapy_in_practice,代码行数:8,代码来源:fishsaying_spider.py

示例14: parse

 def parse(self, response):
     yield FormRequest.from_response(
         response,
         formname='aspnetForm',
         formdata={'Skin$body$FundingSourceChoices$0': '1',
                   'Skin$body$FundingSourceChoices$1': '0'},
         meta={'curr_listing_page': 1,  'flag': False},
         callback=self.after_login)
开发者ID:jorbecalona,项目名称:umichemploymentscrape,代码行数:8,代码来源:job_listing_spider.py

示例15: parse

    def parse(self,response):
        ## page_count_text {string}
        # @example
        #
        # if(ANP_checkInput('AspNetPager1_input',3270,'页索引超出范围!','页索引不是有效的数值!'))
        # {ANP_goToPage('AspNetPager1_input','page','http://www.bgku.cn/sitemap_1',
        # 'http://www.bgku.cn/sitemap_{page}','',3270,false);};return false;
        ##

        page_count_text= response.xpath('//*[@id="AspNetPager1_btn"]/@onclick').extract()[0]
        match= re.search(',\d{4,},',page_count_text)
        page_count= int(match.group(0).strip(','))
        for page in range(1,page_count+1):
            url= 'http://www.bgku.cn/sitemap_'+str(page)
            request = FormRequest(url, callback=self.parse_index_page, dont_filter=True)
            request.meta["page"] = page
            yield request
开发者ID:hanwei2008,项目名称:crawl,代码行数:17,代码来源:bgkuSpider.py


注:本文中的scrapy.FormRequest类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。