本文整理汇总了Python中urllib.proxy_bypass函数的典型用法代码示例。如果您正苦于以下问题:Python proxy_bypass函数的具体用法?Python proxy_bypass怎么用?Python proxy_bypass使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了proxy_bypass函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: using_http_proxy
def using_http_proxy(url):
"""
Return True if the url will use HTTP proxy.
Returns False otherwise.
"""
up = urlparse(url)
return up.scheme.lower() in getproxies() and not proxy_bypass(up.netloc)
示例2: proxy_open
def proxy_open(self, req, proxy, type):
orig_type = req.get_type()
proxy_type, user, password, hostport = _parse_proxy(proxy)
if proxy_type is None:
proxy_type = orig_type
if req.host and proxy_bypass(req.host):
return None
if user and password:
user_pass = '%s:%s' % (unquote(user), unquote(password))
creds = base64.b64encode(user_pass).strip()
req.add_header('Proxy-authorization', 'Basic ' + creds)
hostport = unquote(hostport)
req.set_proxy(hostport, proxy_type)
if orig_type == proxy_type or orig_type == 'https':
# let other handlers take care of it
return None
else:
# need to start over, because the other handlers don't
# grok the proxy's URL type
# e.g. if we have a constructor arg proxies like so:
# {'http': 'ftp://proxy.example.com'}, we may end up turning
# a request for http://acme.example.com/a into one for
# ftp://proxy.example.com/a
return self.parent.open(req, timeout=req.timeout)
示例3: process_request
def process_request(self, request, spider):
for p in self.no_proxy_patterns:
if p.search(request.url):
return
retries = request.meta.get('retry_times', None)
#已手动制定代理的不设置
if 'proxy' in request.meta:
if retries is None:
return
#当到达最大重试次数时,使用本机直接访问,确保失败时始终有一次本机访问.
if retries == self.max_retry_times:
now = time.time()
should_sleep = self.local_interval - (now - self.local_last_use_time)
if should_sleep > 0:
log.msg('ProxyMiddleware:use proxy fail,local sleep %s' % should_sleep, log.DEBUG)
time.sleep(should_sleep)
return
parsed = urlparse_cached(request)
scheme = parsed.scheme
# 'no_proxy' is only supported by http schemes
if scheme in ('http', 'https') and proxy_bypass(parsed.hostname):
return
self._set_proxy(request, scheme)
示例4: get_environ_proxies
def get_environ_proxies(netloc):
"""Return a dict of environment proxies."""
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
# First check whether no_proxy is defined. If it is, check that the URL
# we're getting isn't in the no_proxy list.
no_proxy = get_proxy('no_proxy')
if no_proxy:
# We need to check whether we match here. We need to see if we match
# the end of the netloc, both with and without the port.
no_proxy = no_proxy.replace(' ', '').split(',')
for host in no_proxy:
if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
# The URL does match something in no_proxy, so we don't want
# to apply the proxies on this URL.
return {}
# If the system proxy settings indicate that this URL should be bypassed,
# don't proxy.
if proxy_bypass(netloc):
return {}
# If we get here, we either didn't have no_proxy set or we're not going
# anywhere that no_proxy applies to, and the system settings don't require
# bypassing the proxy for the current URL.
return getproxies()
示例5: open_http
def open_http(url, data=None):
"""Use HTTP protocol."""
import httplib
user_passwd = None
proxy_passwd= None
if isinstance(url, str):
host, selector = urllib.splithost(url)
if host:
user_passwd, host = urllib.splituser(host)
host = urllib.unquote(host)
realhost = host
else:
host, selector = url
# check whether the proxy contains authorization information
proxy_passwd, host = urllib.splituser(host)
# now we proceed with the url we want to obtain
urltype, rest = urllib.splittype(selector)
url = rest
user_passwd = None
if urltype.lower() != 'http':
realhost = None
else:
realhost, rest = urllib.splithost(rest)
if realhost:
user_passwd, realhost = urllib.splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
if urllib.proxy_bypass(realhost):
host = realhost
#print "proxy via http:", host, selector
if not host: raise IOError, ('http error', 'no host given')
if proxy_passwd:
import base64
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
import base64
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
c = FakeHTTPConnection(host)
if data is not None:
c.putrequest('POST', selector)
c.putheader('Content-Type', 'application/x-www-form-urlencoded')
c.putheader('Content-Length', '%d' % len(data))
else:
c.putrequest('GET', selector)
if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: c.putheader('Authorization', 'Basic %s' % auth)
if realhost: c.putheader('Host', realhost)
for args in urllib.URLopener().addheaders: c.putheader(*args)
c.endheaders()
return c
示例6: ignore_proxy_host
def ignore_proxy_host (self):
"""Check if self.host is in the $no_proxy ignore list."""
if urllib.proxy_bypass(self.host):
return True
no_proxy = os.environ.get("no_proxy")
if no_proxy:
entries = [parse_host_port(x) for x in no_proxy.split(",")]
for host, port in entries:
if host.lower() == self.host and port == self.port:
return True
return False
示例7: process_request
def process_request(self, request, spider):
# ignore if proxy is already seted
if 'proxy' in request.meta:
return
parsed = urlparse_cached(request)
scheme = parsed.scheme
# 'no_proxy' is only supported by http schemes
if scheme in ('http', 'https') and proxy_bypass(parsed.hostname):
return
if scheme in self.proxies:
self._set_proxy(request, scheme)
示例8: proxy_open
def proxy_open(self, req, proxy, type):
orig_type = req.get_type()
proxy_type, user, password, hostport = _parse_proxy(proxy)
if proxy_type is None:
proxy_type = orig_type
if req.host and proxy_bypass(req.host):
return
if user and password:
user_pass = '%s:%s' % (unquote(user), unquote(password))
creds = base64.b64encode(user_pass).strip()
req.add_header('Proxy-authorization', 'Basic ' + creds)
hostport = unquote(hostport)
req.set_proxy(hostport, proxy_type)
if orig_type == proxy_type or orig_type == 'https':
return
else:
return self.parent.open(req, timeout=req.timeout)
示例9: find_proxy
def find_proxy(url):
scheme, netloc, path, pars, query, fragment = urlparse.urlparse(url)
proxies = urllib.getproxies()
proxyhost = None
if scheme in proxies:
if '@' in netloc:
sidx = netloc.find('@') + 1
else:
sidx = 0
eidx = netloc.find(':')
if eidx == -1:
eidx = len(netloc)
host = netloc[sidx:eidx]
if not (host == '127.0.0.1' or urllib.proxy_bypass(host)):
proxyurl = proxies[scheme]
proxyelems = urlparse.urlparse(proxyurl)
proxyhost = proxyelems[1]
if DEBUG:
print >> sys.stderr, 'find_proxy: Got proxies', proxies, 'selected', proxyhost, 'URL was', url
return proxyhost
示例10: process_request
def process_request(self, request, spider):
# ignore if proxy is already seted
if 'proxy' in request.meta:
# parse out user/password
# if exists setup authentication
creds, proxy = self._get_proxy(request.meta['proxy'], 'http')
if creds:
request.meta['proxy'] = proxy
request.headers['Proxy-Authorization'] = 'Basic ' + creds
return
parsed = urlparse_cached(request)
scheme = parsed.scheme
# 'no_proxy' is only supported by http schemes
if scheme in ('http', 'https') and proxy_bypass(parsed.hostname):
return
if scheme in self.proxies:
self._set_proxy(request, scheme)
示例11: find_proxy
def find_proxy(url):
""" Returns proxy host as "host:port" string """
(scheme, netloc, path, pars, query, fragment) = urlparse.urlparse(url)
proxies = urllib.getproxies()
proxyhost = None
if scheme in proxies:
if '@' in netloc:
sidx = netloc.find('@')+1
else:
sidx = 0
# IPVSIX TODO: what if host is IPv6 address
eidx = netloc.find(':')
if eidx == -1:
eidx = len(netloc)
host = netloc[sidx:eidx]
if not (host == "127.0.0.1" or urllib.proxy_bypass(host)):
proxyurl = proxies[scheme]
proxyelems = urlparse.urlparse(proxyurl)
proxyhost = proxyelems[1]
if DEBUG:
print >>sys.stderr,"find_proxy: Got proxies",proxies,"selected",proxyhost,"URL was",url
return proxyhost
示例12: _setup_connection
def _setup_connection(self, protocol, netloc):
"""Takes care of managing proxies if any. This is a first attempt to
manage proxies. Authentication is not yet taken into account. This all
stuff is not tested yet.
Parameters
----------
protocol: str
http or https
netloc: str
url to connect to
Returns
-------
HTTP(S)Session
properly set up in case of proxies
"""
proxies = urllib.getproxies()
# We process proxy if a proxy is defined for this protocol and the
# netloc to connect to is not in the bypass list.
if protocol in proxies and urllib.proxy_bypass(netloc) == 0:
proxy = proxies[protocol]
urltype, proxyhost = urllib.splittype(proxy)
host, selector = urllib.splithost(proxyhost)
host, port = urllib.splitport(host)
if protocol == 'https':
self.connections[protocol+netloc] = client.HTTPSConnection(host, port)
self.connections[protocol+netloc].set_tunnel(netloc, 443)
else:
self.connections[protocol+netloc] = client.HTTPSConnection(host, port)
self.connections[protocol+netloc].set_tunnel(netloc, 80)
else:
if protocol == 'https':
self.connections[protocol+netloc] = client.HTTPSConnection(netloc)
else:
self.connections[protocol+netloc] = client.HTTPConnection(netloc)
示例13: _build_opener
def _build_opener(url):
from osc.core import __version__
global config
apiurl = urljoin(*parse_apisrv_url(None, url))
if "last_opener" not in _build_opener.__dict__:
_build_opener.last_opener = (None, None)
if apiurl == _build_opener.last_opener[0]:
return _build_opener.last_opener[1]
# respect no_proxy env variable
if urllib.proxy_bypass(apiurl):
# initialize with empty dict
proxyhandler = urllib2.ProxyHandler({})
else:
# read proxies from env
proxyhandler = urllib2.ProxyHandler()
# workaround for http://bugs.python.org/issue9639
authhandler_class = urllib2.HTTPBasicAuthHandler
if (
sys.version_info >= (2, 6, 6)
and sys.version_info < (2, 7, 1)
and not "reset_retry_count" in dir(urllib2.HTTPBasicAuthHandler)
):
print >> sys.stderr, "warning: your urllib2 version seems to be broken. " "Using a workaround for http://bugs.python.org/issue9639"
class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
def http_error_401(self, *args):
response = urllib2.HTTPBasicAuthHandler.http_error_401(self, *args)
self.retried = 0
return response
def http_error_404(self, *args):
self.retried = 0
return None
authhandler_class = OscHTTPBasicAuthHandler
elif sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1):
class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
def http_error_404(self, *args):
self.reset_retry_count()
return None
authhandler_class = OscHTTPBasicAuthHandler
elif sys.version_info >= (2, 6, 5) and sys.version_info < (2, 6, 6):
# workaround for broken urllib2 in python 2.6.5: wrong credentials
# lead to an infinite recursion
class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
def retry_http_basic_auth(self, host, req, realm):
# don't retry if auth failed
if req.get_header(self.auth_header, None) is not None:
return None
return urllib2.HTTPBasicAuthHandler.retry_http_basic_auth(self, host, req, realm)
authhandler_class = OscHTTPBasicAuthHandler
options = config["api_host_options"][apiurl]
# with None as first argument, it will always use this username/password
# combination for urls for which arg2 (apisrv) is a super-url
authhandler = authhandler_class(urllib2.HTTPPasswordMgrWithDefaultRealm())
authhandler.add_password(None, apiurl, options["user"], options["pass"])
if options["sslcertck"]:
try:
import oscssl
from M2Crypto import m2urllib2
except ImportError, e:
print e
raise NoSecureSSLError(
"M2Crypto is needed to access %s in a secure way.\nPlease install python-m2crypto." % apiurl
)
cafile = options.get("cafile", None)
capath = options.get("capath", None)
if not cafile and not capath:
for i in ["/etc/pki/tls/cert.pem", "/etc/ssl/certs"]:
if os.path.isfile(i):
cafile = i
break
elif os.path.isdir(i):
capath = i
break
ctx = oscssl.mySSLContext()
if ctx.load_verify_locations(capath=capath, cafile=cafile) != 1:
raise Exception("No CA certificates found")
opener = m2urllib2.build_opener(
ctx,
oscssl.myHTTPSHandler(ssl_context=ctx, appname="osc"),
urllib2.HTTPCookieProcessor(cookiejar),
authhandler,
proxyhandler,
)
示例14: __init__
def __init__(
self,
url,
method="GET",
data=None,
headers=None,
headers_only=False,
user_agent=None,
follow_location=False,
force_quiet=True,
):
GObjectWrapper.__init__(self)
self.result = StringIO.StringIO()
self.result_headers = StringIO.StringIO()
if isinstance(url, unicode):
self.url = url.encode("utf-8")
else:
self.url = url
self.method = method
self.data = data
self.headers = headers
self.status = None
# the actual curl request object
self.curl = pycurl.Curl()
if logging.root.level == logging.DEBUG and not force_quiet:
self.curl.setopt(pycurl.VERBOSE, 1)
self.curl.setopt(pycurl.WRITEFUNCTION, self.result.write)
self.curl.setopt(pycurl.HEADERFUNCTION, self.result_headers.write)
# We want to use gzip and deflate if possible:
self.curl.setopt(pycurl.ENCODING, "") # use all available encodings
self.curl.setopt(pycurl.URL, self.url)
# let's set the HTTP request method
if method == "GET":
self.curl.setopt(pycurl.HTTPGET, 1)
elif method == "POST":
self.curl.setopt(pycurl.POST, 1)
elif method == "PUT":
self.curl.setopt(pycurl.UPLOAD, 1)
else:
self.curl.setopt(pycurl.CUSTOMREQUEST, method)
if data:
if method == "PUT":
self.data = StringIO.StringIO(data)
self.curl.setopt(pycurl.READFUNCTION, self.data.read)
self.curl.setopt(pycurl.INFILESIZE, len(self.data.getvalue()))
else:
self.curl.setopt(pycurl.POSTFIELDS, self.data)
self.curl.setopt(pycurl.POSTFIELDSIZE, len(self.data))
if headers:
self.curl.setopt(pycurl.HTTPHEADER, headers)
if headers_only:
self.curl.setopt(pycurl.HEADER, 1)
self.curl.setopt(pycurl.NOBODY, 1)
if user_agent:
self.curl.setopt(pycurl.USERAGENT, user_agent)
if follow_location:
self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
if libproxy:
for proxy in proxy_factory.getProxies(self.url):
# only use the first one
self.curl.setopt(pycurl.PROXY, proxy)
break
else:
# Proxy: let's be careful to isolate the protocol to ensure that we
# support the case where http and https might use different proxies
split_url = self.url.split("://", 1)
if len(split_url) > 1:
# We were able to get a protocol
protocol, address = split_url
host, _path = urllib.splithost("//" + address)
proxies = urllib.getproxies()
if protocol in proxies and not urllib.proxy_bypass(host):
self.curl.setopt(pycurl.PROXY, proxies[protocol])
# self reference required, because CurlMulti will only return
# Curl handles
self.curl.request = self
示例15: loadPage
def loadPage(self, url, uri=None, method="GET", params="", additionalParams=""):
if not url:
logging.error("Request URL undefined")
tools.exitErr()
if not url.startswith("http"):
url = "https://" + url
urlData = urlparse(url)
if not uri:
url = "%s://%s" (urlData.scheme, urlData.netloc)
uri = urlData.path + '?' + urlData.query
# prepare params, append to uri
if params:
params = urlencode(params) + additionalParams
if method == "GET":
uri += ('?' if uri.find('?') == -1 else '&') + params
params = ""
# insert local cookies in request
headers = {
"Cookie": '; '.join([key + '=' + self.cookies[key] for key in self.cookies.keys()])
}
if method == "POST":
headers["Content-type"] = "application/x-www-form-urlencoded"
if self._proxy is None or proxy_bypass(urlData.hostname):
host = urlData.hostname
port = urlData.port
real_host = real_port = None
else:
host = self._proxy.hostname
port = self._proxy.port
real_host = urlData.hostname
real_port = urlData.port
logging.debug("Request URL: %s:/%s > %s # %s", url,
uri, unquote(params), headers["Cookie"])
conn = httplib.HTTPSConnection(host, port)
if real_host is not None:
conn.set_tunnel(real_host, real_port, headers=self._proxy_auth)
if config.DEBUG:
conn.set_debuglevel(1)
conn.request(method, url + uri, params, headers)
response = conn.getresponse()
data = response.read()
conn.close()
logging.debug("Response : %s > %s",
response.status,
response.getheaders())
result = tools.Struct(status=response.status,
location=response.getheader('location', None),
data=data)
# update local cookies
sk = Cookie.SimpleCookie(response.getheader("Set-Cookie", ""))
for key in sk:
self.cookies[key] = sk[key].value
# delete cookies whose content is "deleteme"
for key in self.cookies.keys():
if self.cookies[key] == "deleteme":
del self.cookies[key]
return result