本文整理匯總了Python中spider.Spider.get方法的典型用法代碼示例。如果您正苦於以下問題:Python Spider.get方法的具體用法?Python Spider.get怎麽用?Python Spider.get使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類spider.Spider
的用法示例。
在下文中一共展示了Spider.get方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_ip_info
# 需要導入模塊: from spider import Spider [as 別名]
# 或者: from spider.Spider import get [as 別名]
from spider import Spider, Content
#from model.models import Ip
def get_ip_info(html_response):
""" 清理內容得到IP信息 """
ips_list = []
soup = BeautifulSoup(html_response.body, "html.parser")
ip_list_table = soup.find(id='ip_list')
for ip_info in ip_list_table.find_all('tr'):
ip_detail = ip_info.find_all('td')
if ip_detail:
# 注意:為什麽我用list和str方法?否則就是bs4對象!!!
ips_list.append(dict(ip=str(list(ip_detail)[1].string),
port=str(list(ip_detail)[2].string)))
return ips_list
s = Spider('http://www.xicidaili.com/nn/')
response = s.get()
ips = get_ip_info(response)
# 默認存到運行運行腳本的目錄,文件名:data.txt
Content().save_to_file(ips)
# 存到數據庫
#t = Content(Ip)
# for ip_data in ips:
# t.save(ip_data)
示例2: SinaWeibo
# 需要導入模塊: from spider import Spider [as 別名]
# 或者: from spider.Spider import get [as 別名]
class SinaWeibo(object):
def __init__( self ):
self.WBCLIENT = 'ssologin.js(v1.4.5)'
self.session = Spider().session
def encrypt_passwd( self, passwd, pubkey, servertime, nonce):
key = rsa.PublicKey(int(pubkey, 16), int('10001', 16))
message = str(servertime) + '\t' + str(nonce) + '\n' + str(passwd)
passwd = rsa.encrypt(message, key)
return binascii.b2a_hex(passwd)
def post_weibo(self, text ):
data = {
'text' :text,
'location' :'home',
'pic_id' :'',
'rank' :'0',
'rankid' :'',
'_surl' :'',
'hottopicid':'',
'location' :'home',
'module' :'stissue',
'_t' :'0',
}
ts = long(time.time()*1000)
self.session.headers['X-Request-With'] = 'XMLHttpRequest'
resp = self.session.post(
'http://weibo.com/aj/mblog/add?_wv=5&__rnd=%s'%(str(ts)),
data=data
)
if resp.url.find( 'sorry') != -1:
raise Exception('POST BLOG FA, %s'%(resp.url))
return resp
def wblogin(self, username, password):
resp = self.session.get(
'http://login.sina.com.cn/sso/prelogin.php?'
'entry=sso&callback=sinaSSOController.preloginCallBack&'
'su=%s&rsakt=mod&client=%s' %
(base64.b64encode(username), self.WBCLIENT)
)
pre_login_str = re.match(r'[^{]+({.+?})', resp.content).group(1)
pre_login = json.loads(pre_login_str)
data = {
'entry': 'weibo',
'gateway': 1,
'from': '',
'savestate': 7,
'userticket': 1,
'ssosimplelogin': 1,
'su': base64.b64encode(urllib.quote(username)),
'service': 'miniblog',
'servertime': pre_login['servertime'],
'nonce': pre_login['nonce'],
'vsnf': 1,
'vsnval': '',
'pwencode': 'rsa2',
'sp': self.encrypt_passwd(password, pre_login['pubkey'],
pre_login['servertime'], pre_login['nonce']),
'rsakv' : pre_login['rsakv'],
'encoding': 'UTF-8',
'prelt': '115',
'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.si'
'naSSOController.feedBackUrlCallBack',
'returntype': 'META'
}
resp = self.session.post(
'http://login.sina.com.cn/sso/login.php?client=%s' % self.WBCLIENT,
data=data
)
login_url = re.search(r'replace\([\"\']([^\'\"]+)[\"\']',
resp.content).group(1)
resp = self.session.get(login_url)
#pat_to_extract_json_result
pattern = r'\((.+?)\)'
#pattern = r'[^{]+({.+?}})'
login_str = re.search(pattern, resp.content).group(1)
self.loginResult = json.loads(login_str)
#self.session.get( self.loginResult['redirect'])
resp = self.session.get('http://weibo.com/')
self.session.headers['Referer'] = resp.url
self.session.headers['Origin'] = 'http://weibo.com'
self.session.headers['Host'] = 'weibo.com'