本文整理汇总了Python中scrapy.FormRequest方法的典型用法代码示例。如果您正苦于以下问题:Python scrapy.FormRequest方法的具体用法?Python scrapy.FormRequest怎么用?Python scrapy.FormRequest使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy
的用法示例。
在下文中一共展示了scrapy.FormRequest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):
'Extrai os nomes dos cursos disponíveis'
html = response.body_as_unicode()
codigo_cursos = [line for line in html.splitlines()
if 'var listaProcurar' in line][0]
json_cursos = codigo_cursos.replace('var listaProcurar =', '').strip()[:-1]
for curso_busca in json.loads(json_cursos):
curso_busca = curso_busca['id']
yield FormRequest(
callback=self.parse_cidades,
formdata={'opcao': '1', 'tipo': '3', 'valor': curso_busca},
meta={'curso_busca': curso_busca},
method='POST',
url='http://prounialuno.mec.gov.br/consulta/resultado-procurar/',
)
示例2: get_news
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def get_news(self, response):
response.meta['iter_time'] += 1
news_items = json.loads(response.text)
if news_items:
for n in news_items:
yield {
'website': '公視',
'url': ARTICLE_PREFIX + n['news_id'],
'title': n['subject'],
'date': n['news_date'],
'content': n['content'],
'category': n['program_name']
}
yield scrapy.FormRequest(
url="https://news.pts.org.tw/list/getmore.php",
callback=self.get_news,
meta=response.meta,
formdata={
'page': str(response.meta['iter_time'])
})
示例3: parse_person_center
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_person_center(self, response):
"""
解析个人中心页面
:param response:
:return:
"""
if response.url == self.person_center_url:
print('进入到个人中心页面了')
ck = response.xpath('//input[@name="ck"]/@value').get()
print('获取的ck是:%s' % ck)
formdata = {
'ck': ck,
'signature': '时光如水,岁月如斯'
}
# 发送post请求来更改签名
yield scrapy.FormRequest(self.edit_signature, formdata=formdata)
else:
print('进入个人中心页面失败')
示例4: request_inventory_data
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def request_inventory_data(self):
today = pd.Timestamp.today()
requests = []
for date in pd.date_range(start=today.date()-pd.Timedelta(weeks=520),end=today):
the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip'
if(date.dayofweek<5 and not os.path.exists(the_dir)):
requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={
'batchExportFlag':'batch',
'contract.contract_id':'all',
'contract.variety_id':'a',
'year':str(date.year),
'month':str(date.month-1),
'day':str(date.day),
'memberDealPosiQuotes.trade_type':'0',
'memberDealPosiQuotes.variety':'all'
},callback=self.download_dce_kline_data,meta={
'filename':the_dir
}))
return requests
示例5: request_currentyear_kdata
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def request_currentyear_kdata(self):
today = pd.Timestamp.today()
requests=[]
for date in pd.date_range(start=today.date()-pd.Timedelta(days=today.dayofyear-1),end=today):
the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls'
if(date.dayofweek<5 and not os.path.exists(the_dir)):
requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={
'year':str(date.year),
'month':str(date.month-1),
'day':str(date.day),
'dayQuotes.trade_type':'0',
'dayQuotes.variety':'all',
'exportType':'excel'
},callback=self.download_dce_kline_data,meta={
'filename':the_dir
}))
return requests
示例6: parse_login
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_login(self, response):
self._check_login_params()
self._login = False
form_data = {
self.username_field: self.username,
self.password_field: self.password
}
if hasattr(self, 'form_xpath'):
return scrapy.FormRequest.from_response(
response,
formxpath=self.form_xpath,
formdata=form_data,
callback=self.parse_after_login
)
elif hasattr(self, 'form_url'):
return scrapy.FormRequest(
self.form_url,
formdata=form_data,
callback=self.parse_after_login
)
示例7: parse
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):
item_loader = ItemLoader(item=MyItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
#item_loader.add_css("", "")
#item_loader.add_css("", "")
#item_loader.add_css("", "")
yield FormRequest("POST_URL", formdata={'parameter': 'p'},
meta={'item': item_loader.load_item()}, callback=self.populate_field)
示例8: gen_detail
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_detail(self, **kwargs):
"""
生成查询详情的请求
:param patent_id, sipo, data_item, nrdAn, nrdPn:
:return:
"""
patent_id = str(kwargs.pop('patent_id'))
formdata = url_detail.get('form_data')
formdata.__setitem__('nrdAn', patent_id.split('.')[0])
formdata.__setitem__('cid', patent_id)
formdata.__setitem__('sid', patent_id)
return FormRequest(
url=url_detail.get('url'),
formdata=formdata,
headers=url_detail.get('headers'),
callback=self.parse_patent_detail,
meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item'), 'patent_id': patent_id,
'law_info': {'nrdAn': kwargs.pop('nrdAn'), 'nrdPn': kwargs.pop('nrdPn')}}
)
示例9: gen_related_info
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_related_info(self, **kwargs):
"""
生成相关信息的请求,包含法律信息和同族信息
:param sipo:
:param data_item:
:param nrdAn:
:param nrdPn:
:return:
"""
form_data = url_related_info.get('form_data')
form_data.__setitem__('literaInfo.nrdAn', kwargs.pop('nrdAn'))
form_data.__setitem__('literaInfo.nrdPn', kwargs.pop('nrdPn'))
return FormRequest(
url=url_related_info.get('url'),
method='POST',
dont_filter=True, # 此处可能会发生重复采集,但是还是想要采集,所以关闭过滤
formdata=form_data,
callback=self.parse_related_info,
meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item'), 'patent_id': kwargs.pop('patent_id')}
)
示例10: gen_full_text
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_full_text(self, **kwargs):
"""
生成全文文本的请求
:param patent_id:
:param sipo:
:param data_item:
:return:
"""
patent_id = str(kwargs.pop('patent_id'))
form_data = url_full_text.get('form_data')
form_data.__setitem__('nrdAn', patent_id.split('.')[0])
form_data.__setitem__('cid', patent_id)
form_data.__setitem__('sid', patent_id)
return FormRequest(
url=url_full_text.get('url'),
method='POST',
dont_filter=True, # 此处可能会发生重复采集,但是还是想要采集,所以关闭过滤
formdata=form_data,
callback=self.parse_full_text,
meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item')}
)
示例11: start_requests
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def start_requests(self):
"""
初始请求
:return:
"""
for sipo in self.query_list:
headers = url_search.get('headers')
search_exp_cn = sipo.search_exp_cn
logger.info('检索表达式--- %s' % search_exp_cn)
form_data = url_search.get('form_data')
form_data.__setitem__('searchCondition.searchExp', search_exp_cn)
yield FormRequest(
url=url_search.get('url'),
callback=self.parse,
method="POST",
headers=headers,
formdata=form_data,
meta={'sipo': sipo}
)
示例12: _login
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def _login(self, response):
response = yield scrapy.Request(
"https://www.{}/login/".format(self.name),
meta={"cache_expires": timedelta(days=14)},
)
response = yield scrapy.FormRequest(
"https://www.{}/login/".format(self.name),
formdata=OrderedDict(
[
("user[control][login]", "true"),
("permanent", "checked"),
("username", self._username),
("password", self._password),
]
),
meta={"cache_expires": timedelta(days=14)},
)
if response and response.css(".notloggedin"):
# We tried to login but we failed.
self.logger.error("Login failed: Username or password wrong")
示例13: parse_item
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_item(self,response):
#print('parse_item] url:', response.url)
#print('parse_item] text:', response.text)
#for quote in response.xpath('//div[contains(@style,"overflow-x:auto")]'):
# for row in quote.xpath('./table[contains(@class,"table-striped")]/tbody/tr'):
# link = row.xpath('td[1]/a/@href').extract_first()
# yield scrapy.Request(link, callback=self.parse_product)
for row in response.xpath('//table[@name="MVCGridTable_advancesearchawardedprojectsp"]/tbody/tr'):
link = row.xpath('.//a/@href').get()
#title = row.xpath('.//a/text()').get()
yield scrapy.Request(link, callback=self.parse_product)
# create request for next page
onclick = response.xpath('//a[@aria-label="Next page"]/@onclick').get()
if onclick:
# next page
self.args['page'] += 1
args = urllib.parse.urlencode(self.args)
url = 'https://researchgrant.gov.sg/eservices/mvcgrid?' + args
yield scrapy.FormRequest(url, callback=self.parse_item, method='POST', formdata=self.params, headers={'X-Requested-With': 'XMLHttpRequest'})
示例14: extract_more_news
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def extract_more_news(self):
pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
script = response.css('script[type*="text/javascript"]').extract()[-1]
try:
id = pattern.search(script).group(1)
except:
id = response['min_news_id']
r = scrapy.FormRequest('https://www.inshorts.com/en/ajax/more_news', formdata={'news_offset':id})
scrapy.fetch(r)
contents = json.loads(response.text)
html = lxml.html.fromstring(contents['html'])
for news in lxml.cssselect('div.news-card'):
item = {
'title': news.cssselect('a.clickable>span::text').extract_first(),
'author': news.css('span.author::text').extract_first(),
'time': news.css('span.time::text').extract_first(),
'date': news.css('span[clas*=date]::text').extract_first(),
'content': news.css('div[itemprop*=articleBody]::text').extract_first(),
'link' : news.css('div.read-more>a::attr(href)')
}
yield item
示例15: parse
# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):
for news in response.css('div.news-card'):
item = {
'headline': news.css('a.clickable>span::text').extract_first(),
'author': news.css('span.author::text').extract_first(),
'time': news.css('span.time::text').extract_first(),
'date': news.css('span[clas*=date]::text').extract_first(),
'body': news.css('div[itemprop*=articleBody]::text').extract_first(),
}
yield item
while self.pages > 1:
pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
script = response.css('script[type*="text/javascript"]').extract()[-1]
id = pattern.search(script).group(1)
r = scrapy.FormRequest('https://www.inshorts.com/en/ajax/more_news', callback=self.parse, formdata={'news_offset':id})
yield scrapy.fetch(r)
self.pages -= 1