本文整理汇总了Python中scrapy.http.FormRequest方法的典型用法代码示例。如果您正苦于以下问题:Python http.FormRequest方法的具体用法?Python http.FormRequest怎么用?Python http.FormRequest使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http
的用法示例。
在下文中一共展示了http.FormRequest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
# small images 200x200
#urls = response.xpath('//div[@id="thumbsContainer"]//img/@data-original').extract()
#urls = response.xpath('//img[@class="res-photo-thumbnail thumb-load lazy-photo-inner"]/@data-original').extract()
#yield {'image_urls': urls}
# big images 800x600
#urls = [url.replace('200%3A200', '800%3A600') for url in urls]
#yield {'image_urls': urls}
# big images 1900x1200
#urls = [url.replace('200%3A200', '1900%3A1200') for url in urls]
#yield {'image_urls': urls}
data = {
'res_id': '16761868', #, '16780723', # place ID
'offset': '30', # change it
'category': 'all', # 'food'
'action': 'fetch_photos',
'index': '30',
'limit': '10', # chage it
}
url = 'https://www.zomato.com/php/load_more_res_pics.php'
yield FormRequest(url, callback=self.parse_post, formdata=data)
示例2: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
for songid in response.xpath('//a/@href').re('/song/(\d+)'):
print('songIds:', songid)
data = {'songIds': songid} # 257524668
yield FormRequest(url=self.songlink_url, formdata=data, callback=self.parse_song)
# break
示例3: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
print 'Preparing login'
return [FormRequest("https://accounts.coursera.org/api/v1/login",
headers = self.make_header(response),
formdata = {
"email": "1095511864@qq.com",
"password": "HUAZANG.55789260",
"webrequest": "true"
},
callback = self.parse_page
)]
示例4: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
return [FormRequest(
"http://www.zhihu.com/login",
formdata = {'email':'xxx@gmail.com',
'password':'123456'
},
callback = self.after_login
)]
示例5: gen_topic_form
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def gen_topic_form(self, response):
# yield the beginning topics
sel = Selector(response)
for topic_sel in sel.xpath('//div[@id="zh-profile-topic-list"]/div[contains(@class, "zm-profile-section-item")]'):
# new user-topic relationship
yield self.get_UT_item(topic_sel, response.url)
# get the number of topics of one user
num_topic = sel.xpath('//div[contains(@class, "zm-profile-section-wrap")]/div[contains(@class, "zm-profile-section-head")]//span[contains(@class, "zm-profile-section-name")]/text()')
number_str = num_topic.extract()[0]
# print number_str
p = re.compile(r'\d+')
m = p.findall(number_str)
if m:
num_topic = int(m[0])
# crawl the remainding topics of a user
base_line = 20
if num_topic > 20:
while num_topic > 0:
yield FormRequest(
url = response.url,
formdata = {
'start': '0',
'offset': str(base_line),
'_xsrf': self.xsrf
},
callback=self.parse
)
num_topic = num_topic - 20
base_line += 20
示例6: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
return [FormRequest(
"http://www.zhihu.com/login",
formdata = {'email':'example.com',
'password':'123456'
},
callback = self.after_login
)]
示例7: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
url = "https://downloadcenter.intel.com/SearchResult.aspx?lang=eng"
search_form = {
"search_downloads": ".BIO",
"ctl00$body$submit_search_downloads": "Search downloads",
"ctl00$body$searchKeyword": "BIO"
}
return [FormRequest(url= url, method= "POST",
formdata= search_form, callback= self.parse_form)]
示例8: parse_again
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse_again(self, response):
sel = Selector(response)
hidden_fields = {}
inputs = sel.xpath("//input")
for ele in inputs:
input_type = ele.xpath(".//@type").extract()[0]
value = ele.xpath(".//@value").extract()[0]
name = ele.xpath(".//@name").extract()[0]
if input_type not in ["hidden"]:
continue
hidden_fields[name] = value
for product_type in self.product_types:
### Create a POST form and apply a generated ScriptManager
form_data = _select_form(1, product_type)
for field in hidden_fields:
### Replace static fields with page-generated inputs.
form_data[field] = hidden_fields[field]
#print form_data
yield FormRequest(formdata= form_data, method= "POST",
headers= {
"Content-Type": "application/x-www-form-urlencoded",
#"X-MicrosoftAjax": "Delta=true",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": self._get_uas()
},
url= self.select_urls[0],
#meta= {"cookiejar": "GLOBAL"},
callback= self.parse_series)
return
示例9: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
### Generate a search for AMD and Intel chips
intel_search = self._get_vars(170, 1)
amd_search = self._get_vars(171, 1)
yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
formdata= intel_search, callback= self.parse_search)
yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
formdata= amd_search, callback= self.parse_search)
示例10: parse_search
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse_search(self, response):
sel = Selector(response)
### Parse each sub-product type.
searches = []
product_selector = sel.css(".mr20").xpath("@no")
if product_selector:
pno = product_selector.extract()[0]
products = sel.css(".ProdSel-item")
for product in products:
no = product.xpath("@no").extract()[0]
searches.append((no, pno))
#print searches
### Parse the actual products/boards.
boards = []
items = sel.css(".Prod-item")
for item in items:
title = item.xpath("@title").extract()[0]
no = item.xpath("@no").extract()[0]
boards.append((title, no))
#print boards
for sub_search in searches:
search_vars = self._get_vars(sub_search[0], sub_search[1])
yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
formdata= search_vars, callback= self.parse_search)
for board in boards:
url = "http://us.msi.com/product/mb/%s.html" % board[0]
item = MsiUpdateLinkItem()
item["id"] = board[1]
item["title"] = board[0]
item["url"] = url
yield Request(url= "%s#/?div=BIOS" % url, callback= self.parse_board,
meta= {"attrs": item})
pass
示例11: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
hidden = lambda id: response.xpath(
'/html/body/input[@id="{}"]/@data-value'.
format(id)).extract_first()
total_pages = int(hidden('quantidadeTotalPaginas').replace('.',''))
hashfragment = OrderedDict([
('pagina', None),
('semente', self.seed or hidden('semente')),
])
formdata = OrderedDict([
('tipoOferta', '1'),
('paginaAtual', None),
('pathName', parse_url(response.url).path),
('hashFragment', ''),
])
headers = {'X-Requested-With': 'XMLHttpRequest'}
url = 'https://www.zapimoveis.com.br/Busca/RetornarBuscaAssincrona/'
from_page = self.start
if self.count:
to_page = min(self.start + self.count - 1, total_pages)
else:
to_page = total_pages
self.crawler.stats.set_value('total_pages', total_pages)
self.crawler.stats.set_value('selected_pages',
max(0, to_page - from_page + 1))
for page in range(from_page, to_page + 1):
hashfragment['pagina'] = formdata['paginaAtual'] = str(page)
formdata['hashFragment'] = json.dumps(hashfragment,
separators=(',', ':'))
yield FormRequest(
url,
headers=headers,
formdata=formdata,
callback=self.parse_busca)
示例12: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
return [FormRequest(
"http://www.zhihu.com/login",
formdata={'email': 'june.chan@foxmail.com',
'password': 'czj0617_zhihu'
},
callback=self.after_login
)]
示例13: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
for i, url in enumerate(self.start_urls):
yield FormRequest(url, meta={'cookiejar': i}, \
headers=self.headers, \
cookies=self.cookies,
callback=self.parse_item) # jump to login page
示例14: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
try:
for news in response.css('div.news-card'):
self.urls_parsed += 1
try:
item = ScrapenewsItem()
item['image'] = news.css('div.news-card-image::attr(style)').extract_first()[23:-3]
item['title'] = news.css('a.clickable>span::text').extract_first()
item['content'] = news.css('div[itemprop*=articleBody]::text').extract_first()
item['newsDate'] = news.css('span.time::attr(content)').extract_first()[:-5]
item['link'] = news.css('div.read-more>a::attr(href)').extract_first()
item['source'] = 105
yield item
self.urls_scraped += 1
except Exception as e:
logger.error(__name__ + " [UNHANDLED] Unable to Extract Data : " + str(e))
self.urls_dropped += 1
#news_id extraction
pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
js = response.xpath('//script[@type="text/javascript"]/text()').extract()[-1]
self.news_id = pattern.search(js).group(1)
while (self.pages > 1 and not self.infinite):
yield FormRequest('https://www.inshorts.com/en/ajax/more_news',
formdata={'news-offset' : self.news_id},
callback=self.parse_more_news,
errback=self.errorRequestHandler,
dont_filter=True)
self.pages -= 1
while (self.infinite):
yield FormRequest('https://www.inshorts.com/en/ajax/more_news',
formdata={'news-offset' : self.news_id},
callback=self.parse_more_news,
errback=self.errorRequestHandler,
dont_filter=True)
except Exception as e:
logger.error(__name__ + " [UNHANDLED] " + str(e) + " for response url " + response.url)
示例15: start_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
count = self.sql.get_proxy_count(self.name)
count_httpbin = self.sql.get_proxy_count(config.httpbin_table)
ids = self.sql.get_proxy_ids(self.name)
ids_httpbin = self.sql.get_proxy_ids(config.httpbin_table)
for i in range(0, count + count_httpbin):
table = self.name if (i < count) else config.httpbin_table
id = ids[i] if i < count else ids_httpbin[i - len(ids)]
proxy = self.sql.get_proxy_with_id(table, id)
if proxy == None:
continue
for url in self.urls:
cur_time = time.time()
yield FormRequest(
url = url,
headers = self.headers,
method = 'POST',
meta = {
'cur_time': cur_time,
'download_timeout': self.timeout,
'proxy_info': proxy,
'table': table,
'id': proxy.id,
'proxy': 'http://%s:%s' % (proxy.ip, proxy.port),
'vali_count': proxy.vali_count,
},
cookies = {
'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488937030',
'_ga': 'GA1.2.40497390.1488937014',
'TG-TRACK-CODE': 'search_code',
'index_location_city': '%E5%8C%97%E4%BA%AC',
'LGRID': '20170308093710-bf6755eb-039f-11e7-8025-525400f775ce',
'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488881288,1488936799,1488936947,1488937014',
'JSESSIONID': 'BDCBB6167F960CE43AF54B75A651F586',
'LGSID': '20170308093653-b59316f0-039f-11e7-9229-5254005c3644',
'LGUID': '20170308093653-b593185f-039f-11e7-9229-5254005c3644',
'user_trace_token': '20170308093654-723efcfac8fb4c28a670d073d5113e02',
'SEARCH_ID': '4db4dc3dea1c46b49018ae5421b53ffa'
},
formdata = {
'first': 'true',
'kd': 'ios',
'pn': '1',
},
dont_filter = True,
callback = self.success_parse,
errback = self.error_parse,
)