本文整理汇总了Python中urllib.request.build_opener函数的典型用法代码示例。如果您正苦于以下问题:Python build_opener函数的具体用法?Python build_opener怎么用?Python build_opener使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了build_opener函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _http_get
def _http_get(uri, silent=False):
if PYTHON3:
opener = urllib2.build_opener(urllib2.ProxyHandler(urllib.request.getproxies()))
else:
opener = urllib2.build_opener(urllib2.ProxyHandler(urllib.getproxies()))
for repo in repos:
if 'storage.jcloud.com' in repo:
_uri = uri
for p in ('/', 'dev', 'master', 'update', 'plugins'):
_uri = _uri.lstrip(p).lstrip('/')
url = repo + '/' + _uri
else:
url = repo + '/raw/' + uri
try:
resp = opener.open(urllib2.Request(url, headers=headers), timeout = 15)
body = resp.read()
try:
f = StringIO(body)
gz = gzip.GzipFile(fileobj = f)
body = gz.read()
except:
pass
except urllib2.HTTPError as e:
if not silent:
print('HTTP Error %s when fetching %s' % (e.code, url))
except urllib2.URLError as e:
pass
else:
return body
示例2: urlopen
def urlopen(url, headers=None, data=None, timeout=None):
"""
An URL opener with the User-agent set to gPodder (with version)
"""
username, password = username_password_from_url(url)
if username is not None or password is not None:
url = url_strip_authentication(url)
password_mgr = HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, url, username, password)
handler = HTTPBasicAuthHandler(password_mgr)
opener = build_opener(handler)
else:
opener = build_opener()
if headers is None:
headers = {}
else:
headers = dict(headers)
headers.update({'User-agent': USER_AGENT})
request = Request(url, data=data, headers=headers)
if timeout is None:
return opener.open(request)
else:
return opener.open(request, timeout=timeout)
示例3: run
def run(self):
self.preprocess()
if self.config["isProxy"]:
proxy_handler = urllib.request.ProxyHandler({'http':'http://proxy.statestreet.com:80'})
#proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
#proxy_auth_handler.add_password('realm', '123.123.2123.123', 'user', 'password')
urllib2.build_opener(urllib.request.HTTPHandler, proxy_handler)
self.request = urllib2.Request(self.requestUrl, self.data, self.headers)
try:
self.response = urllib2.urlopen(self.request)
self.responseBody = self.response.read()
self.responseHeaders = self.response.headers
#if self.responseHeaders["Content-Type"] == "csv":
if True:
#csv file
#fileName = self.responseHeaders["Content-disposition"];
#fileName = fileName.split("\"").reverse()[1];
#print("fileName ", fileName)
f = open("a.file", "wb")
f.write(self.responseBody)
f.close()
except urllib.error.HTTPError as e:
print(e)
self.responseStatus = e.code
示例4: POST
def POST(url, args={}, cred=None):
"""do http post
url is the URL you want
args is a dict of cgi args
cred is ( host, realm, username, password )
"""
auth_handler = None
arg_string = ''
if cred is not None:
(host, realm, username, password) = cred
auth_handler = HTTPBasicAuthHandler()
auth_handler.add_password(realm, host, username, password)
if auth_handler:
opener = build_opener(cookie_processor, auth_handler)
else:
opener = build_opener(cookie_processor)
install_opener(opener)
print("URL %s" % url)
data = urlencode(args)
req = Request(url, data)
f = urlopen(req)
return f
示例5: get_page
def get_page( self ):
"""
Strip A Given Page For Links, Returning Them In A List - Takes 1 Argument
page_number - Page Number To Parse
"""
if self.config['proxy'] is not None:
proxy = ProxyHandler( { 'http': self.config['proxy'] } )
opener = build_opener( proxy )
else:
opener = build_opener()
# Dirty User Agent Override
opener.addheaders[0] = ( 'User-Agent', choice( self.config['Agents'] ) )
try:
rep = opener.open( self.config['url'].format( self.config['query'], self.config['page'] ) )
except URLError:
self.die( '\t[-] Unable To Retrieve URL' )
html = rep.read()
links = self.strip_links( Soup( html ) )
return links
示例6: opener_for_url_prefix
def opener_for_url_prefix(
url_prefix, username=None, password=None, cache_dict=None
):
if cache_dict is not None:
cache_key = (url_prefix, username, password)
try:
return cache_dict[cache_key]
except KeyError:
pass
if username or password:
auth_handler = HTTPBasicAuthHandler()
auth_handler.add_password(
realm="Open Amiga Game Database",
uri="{0}".format(url_prefix),
user=username,
passwd=password,
)
auth_handler.add_password(
realm="OpenRetro",
uri="{0}".format(url_prefix),
user=username,
passwd=password,
)
opener = build_opener(auth_handler)
else:
opener = build_opener()
if cache_dict is not None:
cache_key = (url_prefix, username, password)
cache_dict[cache_key] = opener
return opener
示例7: _get_json5_from_google
def _get_json5_from_google(self, text):
escaped_source = quote(text, '')
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
if self.proxyok == 'yes':
if self.proxytp == 'socks5':
opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS5, self.proxyho, int(self.proxypo)))
else:
if self.proxytp == 'socks4':
opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS4, self.proxyho, int(self.proxypo)))
else:
opener = build_opener(SocksiPyHandler(PROXY_TYPE_HTTP, self.proxyho, int(self.proxypo)))
req = Request(self.api_urls['translate']+"&sl=%s&tl=%s&text=%s" % (self.source, self.target, escaped_source), headers = headers)
result = opener.open(req, timeout = 2).read()
json = result
else:
try:
req = Request(self.api_urls['translate']+"&sl=%s&tl=%s&text=%s" % (self.source, self.target, escaped_source), headers = headers)
result = urlopen(req, timeout = 2).read()
json = result
except IOError:
raise GoogleTranslateException(self.error_codes[501])
except ValueError:
raise GoogleTranslateException(result)
return json
示例8: __open
def __open(self, url, headers={}, data=None, baseurl=""):
"""Raw urlopen command"""
if not baseurl:
baseurl = self.baseurl
req = Request("%s%s" % (baseurl, url), headers=headers)
try:
req.data = urlencode(data).encode('utf-8') # Python 3
except:
try:
req.add_data(urlencode(data)) # Python 2
except:
pass
# Proxy support
if self.proxy_url is not None:
if self.proxy_user is None:
handler = ProxyHandler({'https': self.proxy_url})
opener = build_opener(handler)
else:
proxy = ProxyHandler({'https': 'https://%s:%[email protected]%s' % (self.proxy_user,
self.proxy_password, self.proxy_url)})
auth = HTTPBasicAuthHandler()
opener = build_opener(proxy, auth, HTTPHandler)
resp = opener.open(req)
else:
resp = urlopen(req)
charset = resp.info().get('charset', 'utf-8')
return json.loads(resp.read().decode(charset))
示例9: getFile
def getFile(cls, getfile, unpack=True):
if cls.getProxy():
proxy = req.ProxyHandler({'http': cls.getProxy(), 'https': cls.getProxy()})
auth = req.HTTPBasicAuthHandler()
opener = req.build_opener(proxy, auth, req.HTTPHandler)
req.install_opener(opener)
if cls.ignoreCerts():
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
opener = req.build_opener(urllib.request.HTTPSHandler(context=ctx))
req.install_opener(opener)
response = req.urlopen(getfile)
data = response
# TODO: if data == text/plain; charset=utf-8, read and decode
if unpack:
if 'gzip' in response.info().get('Content-Type'):
buf = BytesIO(response.read())
data = gzip.GzipFile(fileobj=buf)
elif 'bzip2' in response.info().get('Content-Type'):
data = BytesIO(bz2.decompress(response.read()))
elif 'zip' in response.info().get('Content-Type'):
fzip = zipfile.ZipFile(BytesIO(response.read()), 'r')
if len(fzip.namelist())>0:
data=BytesIO(fzip.read(fzip.namelist()[0]))
return (data, response)
示例10: _get_json5_from_google
def _get_json5_from_google(self, text):
escaped_source = quote(text, '')
headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'}
if self.proxyok == 'yes':
if self.proxytp == 'socks5':
opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS5, self.proxyho, int(self.proxypo)))
else:
if self.proxytp == 'socks4':
opener = build_opener(SocksiPyHandler(PROXY_TYPE_SOCKS4, self.proxyho, int(self.proxypo)))
else:
opener = build_opener(SocksiPyHandler(PROXY_TYPE_HTTP, self.proxyho, int(self.proxypo)))
req = Request(self.api_urls['translate']+"?key=%s&source=%s&target=%s&q=%s" % (self.apikey, self.source, self.target, escaped_source), headers = headers)
result = opener.open(req, timeout = 2).read()
json = result
else:
try:
req = Request(self.api_urls['translate']+"?key=%s&source=%s&target=%s&q=%s" % (self.apikey, self.source, self.target, escaped_source), headers = headers)
result = urlopen(req, timeout = 2).read()
json = result
except IOError:
raise GoogleTranslateException(self.error_codes[501])
except ValueError:
raise GoogleTranslateException(result)
return json
示例11: resetProxies
def resetProxies(self, httpProxyTuple):
# for ntlm user and password are required
self.hasNTLM = False
if isinstance(httpProxyTuple,(tuple,list)) and len(httpProxyTuple) == 5:
useOsProxy, _urlAddr, _urlPort, user, password = httpProxyTuple
_proxyDirFmt = proxyDirFmt(httpProxyTuple)
# only try ntlm if user and password are provided because passman is needed
if user and not useOsProxy:
for pluginXbrlMethod in pluginClassMethods("Proxy.HTTPNtlmAuthHandler"):
HTTPNtlmAuthHandler = pluginXbrlMethod()
if HTTPNtlmAuthHandler is not None:
self.hasNTLM = True
if not self.hasNTLM: # try for python site-packages ntlm
try:
from ntlm import HTTPNtlmAuthHandler
self.hasNTLM = True
except ImportError:
pass
if self.hasNTLM:
pwrdmgr = proxyhandlers.HTTPPasswordMgrWithDefaultRealm()
pwrdmgr.add_password(None, _proxyDirFmt["http"], user, password)
self.proxy_handler = proxyhandlers.ProxyHandler({})
self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler(pwrdmgr)
self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler(pwrdmgr)
self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(pwrdmgr)
self.opener = proxyhandlers.build_opener(self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler)
if not self.hasNTLM:
self.proxy_handler = proxyhandlers.ProxyHandler(proxyDirFmt(httpProxyTuple))
self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler()
self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler()
self.opener = proxyhandlers.build_opener(self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler)
示例12: urlrequest
def urlrequest(stream, url, headers, write_lock, debug=0):
"""URL request function"""
if debug:
print("Input for urlrequest", url, headers, debug)
req = UrlRequest('GET', url=url, headers=headers)
if debug:
hdlr = urllib2.HTTPHandler(debuglevel=1)
opener = urllib2.build_opener(hdlr)
else:
opener = urllib2.build_opener()
time0 = time.time()
fdesc = opener.open(req)
data = fdesc.read()
ctime = time.time() - time0
fdesc.close()
# just use elapsed time if we use html format
if headers['Accept'] == 'text/html':
response = {'ctime': str(ctime)}
else:
decoder = JSONDecoder()
response = decoder.decode(data)
if isinstance(response, dict):
write_lock.acquire()
stream.write(str(response) + '\n')
stream.flush()
write_lock.release()
示例13: main
def main():
pagestart = int(sys.argv[1]);
pageend= int(sys.argv[2]);
dirout= sys.argv[3];
url = r'http://jobsearch.naukri.com/jobs-in-india-'
url2 ='?ql=india&qs=f'
outputfile = dirout+"\\tables_"+str(pagestart)+"_"+str(pageend)+".json";
file = open(outputfile, 'w+')
mylist = list()
j=0
for i in range(pagestart,pageend):
temp = url+str(i)+url2;
opener = build_opener()
opener.addheaders = [('User-agent', 'Try/'+str(i)+".0")]
response = opener.open(temp)
soup = BeautifulSoup(response)
for content in soup.find("form").findAll('a',attrs={"target":"_blank"}) :
listingurl = content.get('href');
openerurl = build_opener()
responseurl = openerurl.open(listingurl)
soupurl = None
try:
soupurl = BeautifulSoup(responseurl)
DataMatrix = setjdRows(soupurl.findAll('div',attrs={"class":"jdRow"}))
DataMatrix['jobTitle']=soupurl.find('h1',attrs={"class":"jobTitle"}).getText()
DataMatrix['date'] =str(soupurl.find('span',attrs={"class":"fr"})).split('span')[3][1:][:-2].split()
DataMatrix['url'] = listingurl
DataMatrix['company'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[2][:-2][2:]
if len(str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')) >=7 :
DataMatrix['alias'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[4][:-2][2:]
DataMatrix['location'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[6][:-6][2:].split()
elif len(str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')) >=4 :
DataMatrix['location'] = str(soupurl.find('div',attrs={"class":"jobDet"})).split('span')[4][:-6][2:].split()
if len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >=4 & len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')) >= 2:
DataMatrix['experienceMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')[0][2:]
DataMatrix['experienceMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[3].split('to')[1][:-10]
elif len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >= 11 :
DataMatrix['openings'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[11][:-2][1:]
DataMatrix['salaryMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[1]
DataMatrix['salaryMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[1]
DataMatrix['currency'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[0]
DataMatrix['salaryRate']= str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[2]
elif len(str(soupurl.find('span',attrs={"class":"fl"})).split('span')) >= 7 :
if 'Opening' in str(soupurl.find('span',attrs={"class":"fl"})):
DataMatrix['opening'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:]
else :
DataMatrix['salaryMin'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[1]
DataMatrix['salaryMax'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[1]
DataMatrix['currency'] = str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[0].split(' ')[0]
DataMatrix['salaryRate']= str(soupurl.find('span',attrs={"class":"fl"})).split('span')[7][:-2][1:].strip().split('-')[1].split(' ')[2]
t=postprocess(DataMatrix)
mylist.append(t)
except Exception as e:
j=j+1
print(j)
json.dump(mylist, file)
file.close()
示例14: write_cookie_file
def write_cookie_file(className, username, password):
"""
Automatically generate a cookie file for the Coursera site.
"""
try:
global csrftoken
global session
hn, fn = tempfile.mkstemp()
cookies = cjlib.LWPCookieJar()
handlers = [
urllib2.HTTPHandler(),
urllib2.HTTPSHandler(),
urllib2.HTTPCookieProcessor(cookies)
]
opener = urllib2.build_opener(*handlers)
req = urllib2.Request(get_syllabus_url(className))
res = opener.open(req)
for cookie in cookies:
if cookie.name == 'csrf_token':
csrftoken = cookie.value
break
opener.close()
# Now make a call to the authenticator url:
cj = cjlib.MozillaCookieJar(fn)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj),
urllib2.HTTPHandler(),
urllib2.HTTPSHandler())
# Preparation of headers and of data that we will send in a POST
# request.
std_headers = {
'Cookie': ('csrftoken=%s' % csrftoken),
'Referer': 'https://www.coursera.org',
'X-CSRFToken': csrftoken,
}
auth_data = {
'email_address': username,
'password': password
}
formatted_data = urlparse.urlencode(auth_data).encode('ascii')
req = urllib2.Request(AUTH_URL, formatted_data, std_headers)
opener.open(req)
except HTTPError as e:
if e.code == 404:
raise LookupError(className)
else:
raise
cj.save()
opener.close()
os.close(hn)
return fn
示例15: get_cookie
def get_cookie(domain):
ssl._create_default_https_context = ssl._create_unverified_context
cookie_filename="cookie.txt"
account_filemane="account.json"
header_dict = {'Content-Type': 'application/json'}
session_str=domain+"rest/auth/1/session"
req = request.Request(url=session_str,headers=header_dict)
cookie = load_cookie_from_file(cookie_filename)
if cookie==None:
cookie = update_cookie_to_file(cookie_filename,account_filemane,session_str,header_dict)
if cookie==None:
print('Login error:%s' % "cookie==None")
return False,None
opener = request.build_opener(request.HTTPCookieProcessor(cookie))
cookie_expired_error=False
try:
r = opener.open(req)
except URLError as e:
if hasattr(e, 'code'):
print('Error code: ', e.code)
if e.code==401:
cookie_expired_error=True
if hasattr(e, 'reason'):
print('Reason: ', e.reason)
if cookie_expired_error==False:
print('Login error:%s' % "URLError")
return False,None
if cookie_expired_error==True:
cookie_expired_error=False
cookie = update_cookie_to_file(cookie_filename,account_filemane,session_str,header_dict)
if cookie==None:
print('Login error:%s' % "cookie==None 2")
return False,None
opener = request.build_opener(request.HTTPCookieProcessor(cookie))
req = request.Request(url=session_str,headers=header_dict)
try:
r = opener.open(req)
except URLError as e:
if hasattr(e, 'code'):
print('Error code: ', e.code)
if hasattr(e, 'reason'):
print('Reason: ', e.reason)
print('Login error:%s' % "URLError 2")
return False,None
res_str=r.read().decode('utf-8')
res=json.loads(res_str)
if 'errorMessages' in res:
print('Login error:%s' % res.get('errorMessages'))
return False,None
else:
## print('Login succeed!\nres=\n%s' % res)
print('Login succeed!')
return True,cookie