本文整理汇总了Python中urllib._urlopener方法的典型用法代码示例。如果您正苦于以下问题:Python urllib._urlopener方法的具体用法?Python urllib._urlopener怎么用?Python urllib._urlopener使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类urllib
的用法示例。
在下文中一共展示了urllib._urlopener方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_google
# 需要导入模块: import urllib [as 别名]
# 或者: from urllib import _urlopener [as 别名]
def scrape_google(dom):
"""
Function for enumerating sub-domains and hosts by scrapping Google. It returns a unique
list if host name extracted from the HREF entries from the Google search.
"""
results = []
filtered = []
searches = ["100", "200", "300", "400", "500"]
data = ""
urllib._urlopener = AppURLopener()
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
headers = {'User-Agent': user_agent, }
#opener.addheaders = [('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')]
for n in searches:
url = "http://google.com/search?hl=en&lr=&ie=UTF-8&q=%2B" + dom + "&start=" + n + "&sa=N&filter=0&num=100"
try:
sock = urllib.urlopen(url)
data += sock.read()
sock.close()
except AttributeError:
request = urllib.request.Request(url, None, headers)
response = urllib.request.urlopen(request)
data += str(response.read())
results.extend(unique(re.findall("href=\"htt\w{1,2}:\/\/([^:?]*[a-b0-9]*[^:?]*\." + dom + ")\/", data)))
# Make sure we are only getting the host
for f in results:
filtered.extend(re.findall("^([a-z.0-9^]*" + dom + ")", f))
time.sleep(2)
return unique(filtered)
示例2: wsopen
# 需要导入模块: import urllib [as 别名]
# 或者: from urllib import _urlopener [as 别名]
def wsopen(self, url, post, **params):
noparam = params.pop('noparam',False)
if noparam:
params = {}
else:
if self.user is not None:
params['user'] = self.user
if self.password is not None:
params.pop('hmac', None)
HMAC=hmac.new(self.password)
for k,v in sorted(params.items()):
HMAC.update("%s=%s" % (k,v))
params.update({'hmac':HMAC.hexdigest()})
query = urllib.urlencode(params)
if post:
body = query
elif query:
url = "{}?{}".format(url, query)
if self.debug:
if post:
print("POST:\n{}\n{!r}\n".format(url, body), file=sys.stderr)
else:
print("GET:\n{}\n".format(url), file=sys.stderr)
class URLopener(urllib.FancyURLopener):
def http_error_default(self, url, fp, errcode, errmsg, headers):
return urllib.addinfourl(fp, headers, "http:" + url, errcode)
try:
urllib._urlopener = URLopener()
if post:
resp = urllib.urlopen(url, body)
else:
resp = urllib.urlopen(url)
except IOError as e:
raise WSError(url, msg=e)
if self.debug:
print("RESPONSE:\n{}\n{}".format(resp.getcode(), resp.info()), file=sys.stderr)
if resp.getcode() != 200:
raise WSError(url, resp.getcode(), resp.read())
return resp
示例3: install_patches
# 需要导入模块: import urllib [as 别名]
# 或者: from urllib import _urlopener [as 别名]
def install_patches():
if six.PY3:
# The old urllib does not exist in Py3, so delegate to urllib2 patcher
from . import urllib2
urllib2.install_patches()
return
import urllib
import urlparse
log.info('Instrumenting urllib methods for tracing')
class TracedURLOpener(urllib.FancyURLopener):
def open(self, fullurl, data=None):
parsed_url = urlparse.urlparse(fullurl)
host = parsed_url.hostname or None
port = parsed_url.port or None
span = utils.start_child_span(
operation_name='urllib', parent=current_span_func())
span.set_tag(ext_tags.SPAN_KIND, ext_tags.SPAN_KIND_RPC_CLIENT)
# use span as context manager so that its finish() method is called
with span:
span.set_tag(ext_tags.HTTP_URL, fullurl)
if host:
span.set_tag(ext_tags.PEER_HOST_IPV4, host)
if port:
span.set_tag(ext_tags.PEER_PORT, port)
# TODO add callee service name
# TODO add headers to propagate trace
# cannot use super here, this is an old style class
fileobj = urllib.FancyURLopener.open(self, fullurl, data)
if fileobj.getcode() is not None:
span.set_tag(ext_tags.HTTP_STATUS_CODE, fileobj.getcode())
return fileobj
def retrieve(self, url, filename=None, reporthook=None, data=None):
raise NotImplementedError
urllib._urlopener = TracedURLOpener()