當前位置: 首頁>>代碼示例>>Python>>正文


Python scrapy.FormRequest類代碼示例

本文整理匯總了Python中scrapy.FormRequest的典型用法代碼示例。如果您正苦於以下問題:Python FormRequest類的具體用法?Python FormRequest怎麽用?Python FormRequest使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了FormRequest類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _request_number_of_pages

    def _request_number_of_pages(self, date_str):
        url = self.base_url + '/consulta_paginarBusquedaVisitas'

        request = FormRequest(url=url,
                              meta={
                                  'date': date_str,
                              },
                              formdata={
                                  'fechaDesde': date_str,
                                  'fechaHasta': date_str,
                                  'paginaActual': '1',
                                  'visita.visitanteNombres': '',
                                  'visita.personalNombre': '',
                                  'visita.oficinaNombre': '',
                                  'visita.sedeId': '00',
                                  'visita.ano': '',
                                  'visita.mes': '',
                                  'visita.fechaIngreso': '',
                                  'paginaNueva': '0',
                                  'visita.visitanteId': '0',
                                  'visita.personalId': '0',
                                  'visita.oficinaId': '0',
                              },
                              dont_filter=True,
                              callback=self.parse_initial_request)

        request.meta['date'] = date_str
        return request
開發者ID:manolo-rocks,項目名稱:manolo_scraper,代碼行數:28,代碼來源:justicia.py

示例2: _parse_list

 def _parse_list(self, response):
     report_list = response.xpath("//div[@class=\"reportlist bluelink\"]/ul//a/@href")
     for report_url in report_list:
         request = FormRequest(urljoin(self.base_url, report_url.extract()), callback=self.parse_item, dont_filter=False)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = response.meta["report_type"]
         yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:8,代碼來源:chinaidrSpider.py

示例3: parse

 def parse(self, response):
     large_categories = response.xpath(".//*[@class='rptmap']//strong//a")
     for large_category in large_categories:
         large_category_name = clean_text(large_category.xpath("./text()").extract()[0].strip())
         page_url = large_category.xpath("./@href").extract()[0]
         url = urljoin(self.base_url, page_url)
         request = FormRequest(url, callback=self.parse_middle_category, dont_filter=True)
         request.meta["large_category"] = large_category_name
         yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:9,代碼來源:IndustryReportSpiderOcn.py

示例4: parse_middle_category

 def parse_middle_category(self, response):
     report_types = response.xpath(u"//li[contains(text(),'報告')]")
     for report_type in report_types:
         mid_category_url = urljoin(self.base_url, report_type.xpath(u"./preceding-sibling::span[1]/a/@href").extract()[0])
         request = FormRequest(mid_category_url, callback=self.parse_page, dont_filter=True)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = clean_text(report_type.xpath("./text()").extract()[0].strip())
         request.meta["page_base_url"] = mid_category_url
         yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:10,代碼來源:chinaidrSpider.py

示例5: parse_middle_category

 def parse_middle_category(self, response):
     mid_categories = response.xpath(".//*[@class='report2']//h2//a")
     for mid_category in mid_categories:
         mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0].strip())
         page_url = mid_category.xpath("./@href").extract()[0]
         url = urljoin(self.base_url, page_url)
         request = FormRequest(url, callback=self._parse_item, dont_filter=True)
         request.meta["large_category"] = response.meta["large_category"]
         request.meta["mid_category"] = mid_category_name
         request.meta["first_url"] = url
         yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:11,代碼來源:IndustryReportSpiderOcn.py

示例6: parse

 def parse(self, response):
     large_categories = response.xpath("//*[@class='tabContent bluelink']//*[contains(@style, 'padding')]/a")
     for large_category in large_categories:
         large_category_name = clean_text(large_category.xpath(".//text()").extract()[0].strip())
         mid_categorys = large_category.xpath("./parent::*/following-sibling::*[1]/a")
         for mid_category in mid_categorys:
             mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0])
             mid_category_url = urljoin(self.base_url, mid_category.xpath("./@href").extract()[0])
             request = FormRequest(mid_category_url, callback=self.parse_middle_category, dont_filter=True)
             request.meta["large_category_name"] = large_category_name
             request.meta["mid_category_name"] = mid_category_name
             yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:12,代碼來源:chinaidrSpider.py

示例7: _parse_page_free

 def _parse_page_free(self, response):
     total_pages = int(clean_text(response.xpath(".//*[@class='pages']//a//text()").extract()[-2].strip()))
     first_url = response.meta["first_url"]
     request = FormRequest(first_url, callback=self._parse_free, dont_filter=True)
     request.meta["large_category"] = response.meta["large_category"]
     yield request
     if total_pages>1:
         for i in xrange(1,total_pages):
             next_page = first_url[:-5] + '-p' + str(i+1) + '.html'
             request = FormRequest(next_page, callback=self._parse_free, dont_filter=True)
             request.meta["large_category"] = response.meta["large_category"]
             yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:12,代碼來源:IndustryReportSpider51report.py

示例8: parse

 def parse(self, response):
     large_categories = response.xpath(".//*[@class='shopleft_bt']//a")
     middle_categories = response.xpath(".//*[@class='shopnav2']")
     for i in xrange(len(large_categories)):
         large_category_name = clean_text(large_categories[i].xpath("./text()").extract()[0].strip())
         middle_category_list = middle_categories[i].xpath(".//*[@class='shopleft_wt']")
         for middle_category in middle_category_list:
             middle_category_name = clean_text(middle_category.xpath(".//a/text()").extract())
             page_url = middle_category.xpath(".//a//@href").extract()[0]
             url = urljoin(self.base_url, page_url)
             request = FormRequest(url, callback=self._parse_item, dont_filter=True)
             request.meta["large_category"] = large_category_name
             request.meta["mid_category"] = middle_category_name
             yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:14,代碼來源:IndustryReportSpiderOlxoz.py

示例9: _request_next_page

    def _request_next_page(self, response, date_str, callback):
        current_page = int(response.meta['current_page'])

        total_string = response.css('#LblTotal').xpath('./text()').extract_first(default='')

        total = re.search(r'(\d+)', total_string)

        if total:
            # Deal with the next page.
            total = total.group(1)
            number_of_pages = self._get_number_of_pages(int(total))

            if current_page < number_of_pages:
                current_page += 1

                formdata = {
                    'TxtFecha': date_str,
                    'BtnBuscar': 'Buscar',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.x': '1',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.y': '1'
                }

                request = FormRequest.from_response(response,
                                                    formdata=formdata,
                                                    dont_click=True,
                                                    dont_filter=True,
                                                    callback=callback,
                                                    )

                request.meta['date'] = date_str
                request.meta['current_page'] = current_page

                return request
開發者ID:andyfires,項目名稱:manolo_scraper,代碼行數:33,代碼來源:congreso.py

示例10: parse

    def parse(self, response):
        """
        這是默認的回調方法,得到response後:
        1. 如果需要登錄,則先通過FormRequest登錄論壇;
        2. 如果不需要登錄,通過Request繼續請求;
        :param response:
        :return:
        """
        # 需要登錄,使用FormRequest.from_response模擬登錄
        if 'id="lsform"' in response.body:
            logging.info('in parse, need to login, url: {0}'.format(response.url))
            form_data = {'handlekey': 'ls', 'quickforward': 'yes', 'username': 'daniell123', 'password': 'admin123'}
            request = FormRequest.from_response(response=response,
                                                headers=self.headers,
                                                formxpath='//form[contains(@id, "lsform")]',
                                                formdata=form_data,
                                                callback=self.parse_list
                                                )
        else:
            logging.info('in parse, NOT need to login, url: {0}'.format(response.url))
            request = Request(url=response.url,
                              headers=self.headers,
                              callback=self.parse_list,
                              )

        yield request
開發者ID:allhu,項目名稱:scrapy_in_practice,代碼行數:26,代碼來源:xiaochuncnjp_spider.py

示例11: parse

    def parse(self, response) :
#        test_urls = [
#        "http://ntiaoji.kaoyan.com/tjadm/1.html",
#        "http://ntiaoji.kaoyan.com/tjadm/2.html",
#        "http://ntiaoji.kaoyan.com/tjadm/3.html",
#        "http://ntiaoji.kaoyan.com/tjadm/4.html",
#        "http://ntiaoji.kaoyan.com/tjadm/5.html",
#        "http://ntiaoji.kaoyan.com/tjadm/6.html",
#        "http://ntiaoji.kaoyan.com/tjadm/7.html"
#	]
#
#	for url in test_urls :
#	    print url
#	    time.sleep(2)
#	    self.headers['Referer'] = url
#            yield FormRequest.from_response(response,
#	        headers = self.headers,
#	        formdata = {
#	        'username' : 'kytj1',
#	        'password' : '6ujBJ4XQyLeGmJmB'
#	        },
#	        callback = self.download_page,
#	        dont_filter = True
#	    )
        return FormRequest.from_response(response,
	    headers = self.headers,
	    formdata = {
	        'username' : 'kytj1',
	        'password' : '6ujBJ4XQyLeGmJmB'
	    },
	    callback = self.after_login,
	    dont_filter = True
        )
開發者ID:TonyDoen,項目名稱:python_code_review,代碼行數:33,代碼來源:dmoz_spider.py

示例12: parse_page

 def parse_page(self, response):
     request_list = self._parse_list(response)
     for r in request_list:
         yield r
     next_page = response.xpath(u"//*[@id='AspNetPager1']/a[text()=\"下一頁\"]/@href")
     if len(next_page) > 0:
         next_page_url = urljoin(self.base_url, next_page.extract()[0])
         if not next_page_url.startswith(response.meta["page_base_url"]):
             if next_page_url.endswith("html"):
                 next_page_url = response.meta["page_base_url"] + next_page_url[next_page_url.rindex("/") + 1:len(next_page_url)]
         request = FormRequest(next_page_url, callback=self.parse_page, dont_filter=True)
         request.meta["large_category_name"] = response.meta["large_category_name"]
         request.meta["mid_category_name"] = response.meta["mid_category_name"]
         request.meta["report_type"] = response.meta["report_type"]
         request.meta["page_base_url"] = response.meta["page_base_url"]
         yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:16,代碼來源:chinaidrSpider.py

示例13: parse

 def parse(self, response):
     form_data = {'username': '[email protected]', 'password': '123456', 'remember_me': '1'}
     return FormRequest.from_response(response,
                                      headers=self.headers,
                                      formxpath='//form[@class="form-login"]',
                                      formdata=form_data,
                                      callback=self.after_login,
                                      )
開發者ID:allhu,項目名稱:scrapy_in_practice,代碼行數:8,代碼來源:fishsaying_spider.py

示例14: parse

 def parse(self, response):
     yield FormRequest.from_response(
         response,
         formname='aspnetForm',
         formdata={'Skin$body$FundingSourceChoices$0': '1',
                   'Skin$body$FundingSourceChoices$1': '0'},
         meta={'curr_listing_page': 1,  'flag': False},
         callback=self.after_login)
開發者ID:jorbecalona,項目名稱:umichemploymentscrape,代碼行數:8,代碼來源:job_listing_spider.py

示例15: parse

    def parse(self,response):
        ## page_count_text {string}
        # @example
        #
        # if(ANP_checkInput('AspNetPager1_input',3270,'頁索引超出範圍!','頁索引不是有效的數值!'))
        # {ANP_goToPage('AspNetPager1_input','page','http://www.bgku.cn/sitemap_1',
        # 'http://www.bgku.cn/sitemap_{page}','',3270,false);};return false;
        ##

        page_count_text= response.xpath('//*[@id="AspNetPager1_btn"]/@onclick').extract()[0]
        match= re.search(',\d{4,},',page_count_text)
        page_count= int(match.group(0).strip(','))
        for page in range(1,page_count+1):
            url= 'http://www.bgku.cn/sitemap_'+str(page)
            request = FormRequest(url, callback=self.parse_index_page, dont_filter=True)
            request.meta["page"] = page
            yield request
開發者ID:hanwei2008,項目名稱:crawl,代碼行數:17,代碼來源:bgkuSpider.py


注:本文中的scrapy.FormRequest類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。