本文整理汇总了Python中scrapy.FormRequest类的典型用法代码示例。如果您正苦于以下问题:Python FormRequest类的具体用法?Python FormRequest怎么用?Python FormRequest使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FormRequest类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _request_number_of_pages
def _request_number_of_pages(self, date_str):
url = self.base_url + '/consulta_paginarBusquedaVisitas'
request = FormRequest(url=url,
meta={
'date': date_str,
},
formdata={
'fechaDesde': date_str,
'fechaHasta': date_str,
'paginaActual': '1',
'visita.visitanteNombres': '',
'visita.personalNombre': '',
'visita.oficinaNombre': '',
'visita.sedeId': '00',
'visita.ano': '',
'visita.mes': '',
'visita.fechaIngreso': '',
'paginaNueva': '0',
'visita.visitanteId': '0',
'visita.personalId': '0',
'visita.oficinaId': '0',
},
dont_filter=True,
callback=self.parse_initial_request)
request.meta['date'] = date_str
return request
示例2: _parse_list
def _parse_list(self, response):
report_list = response.xpath("//div[@class=\"reportlist bluelink\"]/ul//a/@href")
for report_url in report_list:
request = FormRequest(urljoin(self.base_url, report_url.extract()), callback=self.parse_item, dont_filter=False)
request.meta["large_category_name"] = response.meta["large_category_name"]
request.meta["mid_category_name"] = response.meta["mid_category_name"]
request.meta["report_type"] = response.meta["report_type"]
yield request
示例3: parse
def parse(self, response):
large_categories = response.xpath(".//*[@class='rptmap']//strong//a")
for large_category in large_categories:
large_category_name = clean_text(large_category.xpath("./text()").extract()[0].strip())
page_url = large_category.xpath("./@href").extract()[0]
url = urljoin(self.base_url, page_url)
request = FormRequest(url, callback=self.parse_middle_category, dont_filter=True)
request.meta["large_category"] = large_category_name
yield request
示例4: parse_middle_category
def parse_middle_category(self, response):
report_types = response.xpath(u"//li[contains(text(),'报告')]")
for report_type in report_types:
mid_category_url = urljoin(self.base_url, report_type.xpath(u"./preceding-sibling::span[1]/a/@href").extract()[0])
request = FormRequest(mid_category_url, callback=self.parse_page, dont_filter=True)
request.meta["large_category_name"] = response.meta["large_category_name"]
request.meta["mid_category_name"] = response.meta["mid_category_name"]
request.meta["report_type"] = clean_text(report_type.xpath("./text()").extract()[0].strip())
request.meta["page_base_url"] = mid_category_url
yield request
示例5: parse_middle_category
def parse_middle_category(self, response):
mid_categories = response.xpath(".//*[@class='report2']//h2//a")
for mid_category in mid_categories:
mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0].strip())
page_url = mid_category.xpath("./@href").extract()[0]
url = urljoin(self.base_url, page_url)
request = FormRequest(url, callback=self._parse_item, dont_filter=True)
request.meta["large_category"] = response.meta["large_category"]
request.meta["mid_category"] = mid_category_name
request.meta["first_url"] = url
yield request
示例6: parse
def parse(self, response):
large_categories = response.xpath("//*[@class='tabContent bluelink']//*[contains(@style, 'padding')]/a")
for large_category in large_categories:
large_category_name = clean_text(large_category.xpath(".//text()").extract()[0].strip())
mid_categorys = large_category.xpath("./parent::*/following-sibling::*[1]/a")
for mid_category in mid_categorys:
mid_category_name = clean_text(mid_category.xpath("./text()").extract()[0])
mid_category_url = urljoin(self.base_url, mid_category.xpath("./@href").extract()[0])
request = FormRequest(mid_category_url, callback=self.parse_middle_category, dont_filter=True)
request.meta["large_category_name"] = large_category_name
request.meta["mid_category_name"] = mid_category_name
yield request
示例7: _parse_page_free
def _parse_page_free(self, response):
total_pages = int(clean_text(response.xpath(".//*[@class='pages']//a//text()").extract()[-2].strip()))
first_url = response.meta["first_url"]
request = FormRequest(first_url, callback=self._parse_free, dont_filter=True)
request.meta["large_category"] = response.meta["large_category"]
yield request
if total_pages>1:
for i in xrange(1,total_pages):
next_page = first_url[:-5] + '-p' + str(i+1) + '.html'
request = FormRequest(next_page, callback=self._parse_free, dont_filter=True)
request.meta["large_category"] = response.meta["large_category"]
yield request
示例8: parse
def parse(self, response):
large_categories = response.xpath(".//*[@class='shopleft_bt']//a")
middle_categories = response.xpath(".//*[@class='shopnav2']")
for i in xrange(len(large_categories)):
large_category_name = clean_text(large_categories[i].xpath("./text()").extract()[0].strip())
middle_category_list = middle_categories[i].xpath(".//*[@class='shopleft_wt']")
for middle_category in middle_category_list:
middle_category_name = clean_text(middle_category.xpath(".//a/text()").extract())
page_url = middle_category.xpath(".//a//@href").extract()[0]
url = urljoin(self.base_url, page_url)
request = FormRequest(url, callback=self._parse_item, dont_filter=True)
request.meta["large_category"] = large_category_name
request.meta["mid_category"] = middle_category_name
yield request
示例9: _request_next_page
def _request_next_page(self, response, date_str, callback):
current_page = int(response.meta['current_page'])
total_string = response.css('#LblTotal').xpath('./text()').extract_first(default='')
total = re.search(r'(\d+)', total_string)
if total:
# Deal with the next page.
total = total.group(1)
number_of_pages = self._get_number_of_pages(int(total))
if current_page < number_of_pages:
current_page += 1
formdata = {
'TxtFecha': date_str,
'BtnBuscar': 'Buscar',
'LwVisitasCR$DpVisitasCR$ctl02$ctl00.x': '1',
'LwVisitasCR$DpVisitasCR$ctl02$ctl00.y': '1'
}
request = FormRequest.from_response(response,
formdata=formdata,
dont_click=True,
dont_filter=True,
callback=callback,
)
request.meta['date'] = date_str
request.meta['current_page'] = current_page
return request
示例10: parse
def parse(self, response):
"""
这是默认的回调方法,得到response后:
1. 如果需要登录,则先通过FormRequest登录论坛;
2. 如果不需要登录,通过Request继续请求;
:param response:
:return:
"""
# 需要登录,使用FormRequest.from_response模拟登录
if 'id="lsform"' in response.body:
logging.info('in parse, need to login, url: {0}'.format(response.url))
form_data = {'handlekey': 'ls', 'quickforward': 'yes', 'username': 'daniell123', 'password': 'admin123'}
request = FormRequest.from_response(response=response,
headers=self.headers,
formxpath='//form[contains(@id, "lsform")]',
formdata=form_data,
callback=self.parse_list
)
else:
logging.info('in parse, NOT need to login, url: {0}'.format(response.url))
request = Request(url=response.url,
headers=self.headers,
callback=self.parse_list,
)
yield request
示例11: parse
def parse(self, response) :
# test_urls = [
# "http://ntiaoji.kaoyan.com/tjadm/1.html",
# "http://ntiaoji.kaoyan.com/tjadm/2.html",
# "http://ntiaoji.kaoyan.com/tjadm/3.html",
# "http://ntiaoji.kaoyan.com/tjadm/4.html",
# "http://ntiaoji.kaoyan.com/tjadm/5.html",
# "http://ntiaoji.kaoyan.com/tjadm/6.html",
# "http://ntiaoji.kaoyan.com/tjadm/7.html"
# ]
#
# for url in test_urls :
# print url
# time.sleep(2)
# self.headers['Referer'] = url
# yield FormRequest.from_response(response,
# headers = self.headers,
# formdata = {
# 'username' : 'kytj1',
# 'password' : '6ujBJ4XQyLeGmJmB'
# },
# callback = self.download_page,
# dont_filter = True
# )
return FormRequest.from_response(response,
headers = self.headers,
formdata = {
'username' : 'kytj1',
'password' : '6ujBJ4XQyLeGmJmB'
},
callback = self.after_login,
dont_filter = True
)
示例12: parse_page
def parse_page(self, response):
request_list = self._parse_list(response)
for r in request_list:
yield r
next_page = response.xpath(u"//*[@id='AspNetPager1']/a[text()=\"下一页\"]/@href")
if len(next_page) > 0:
next_page_url = urljoin(self.base_url, next_page.extract()[0])
if not next_page_url.startswith(response.meta["page_base_url"]):
if next_page_url.endswith("html"):
next_page_url = response.meta["page_base_url"] + next_page_url[next_page_url.rindex("/") + 1:len(next_page_url)]
request = FormRequest(next_page_url, callback=self.parse_page, dont_filter=True)
request.meta["large_category_name"] = response.meta["large_category_name"]
request.meta["mid_category_name"] = response.meta["mid_category_name"]
request.meta["report_type"] = response.meta["report_type"]
request.meta["page_base_url"] = response.meta["page_base_url"]
yield request
示例13: parse
def parse(self, response):
form_data = {'username': '[email protected]', 'password': '123456', 'remember_me': '1'}
return FormRequest.from_response(response,
headers=self.headers,
formxpath='//form[@class="form-login"]',
formdata=form_data,
callback=self.after_login,
)
示例14: parse
def parse(self, response):
yield FormRequest.from_response(
response,
formname='aspnetForm',
formdata={'Skin$body$FundingSourceChoices$0': '1',
'Skin$body$FundingSourceChoices$1': '0'},
meta={'curr_listing_page': 1, 'flag': False},
callback=self.after_login)
示例15: parse
def parse(self,response):
## page_count_text {string}
# @example
#
# if(ANP_checkInput('AspNetPager1_input',3270,'页索引超出范围!','页索引不是有效的数值!'))
# {ANP_goToPage('AspNetPager1_input','page','http://www.bgku.cn/sitemap_1',
# 'http://www.bgku.cn/sitemap_{page}','',3270,false);};return false;
##
page_count_text= response.xpath('//*[@id="AspNetPager1_btn"]/@onclick').extract()[0]
match= re.search(',\d{4,},',page_count_text)
page_count= int(match.group(0).strip(','))
for page in range(1,page_count+1):
url= 'http://www.bgku.cn/sitemap_'+str(page)
request = FormRequest(url, callback=self.parse_index_page, dont_filter=True)
request.meta["page"] = page
yield request