本文整理汇总了Python中urlparse.urlsplit函数的典型用法代码示例。如果您正苦于以下问题:Python urlsplit函数的具体用法?Python urlsplit怎么用?Python urlsplit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了urlsplit函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_suggestions
def get_suggestions(self, keywords, keyword_confidence):
stackoverflow_query = keywords + " error stackoverflow"
askubuntu_query = keywords + " error askubuntu"
suggestions = []
question_ids = []
for url in search(stackoverflow_query, tld='es', lang='en', stop=5):
hostname = urlparse.urlparse(url).hostname
if(hostname == "stackoverflow.com"):
path = urlparse.urlsplit(url).path
pathx = str(path).split('/')
question_ids.append(pathx[2])
if len(question_ids)!=0:
print "#DRAK : Fetched Stackoverflow Questions\n#DRAK : Fetching answers"
suggestions.extend(self.so.get_suggestions(question_ids))
print "#DRAK : Answers fetched successfully"
question_ids = []
for url in search(askubuntu_query, tld='es', lang='en', stop=5):
hostname = urlparse.urlparse(url).hostname
if(hostname == "askubuntu.com"):
path = urlparse.urlsplit(url).path
pathx = str(path).split('/')
question_ids.append(pathx[2])
if len(question_ids)!=0:
print "#DRAK : Fetched AskUbuntu Questions\n#DRAK : Fetching answers"
suggestions.extend(self.au.get_suggestions(question_ids))
print "#DRAK : Answers fetched successfully"
for suggestion in suggestions:
suggestion.keyword_confidence = keyword_confidence
return suggestions
示例2: __init__
def __init__(self, enc_password=""):
if enc_password == "":
print "MtGoxHMAC: Enter your API key file encryption password."
enc_password = getpass.getpass() # raw_input()
try:
f = open("./config/salt.txt", "r")
salt = f.read()
f.close()
hash_pass = hashlib.sha256(enc_password + salt).digest()
f = open("./config/api_key.txt")
ciphertext = f.read()
f.close()
decryptor = AES.new(hash_pass, AES.MODE_CBC, ciphertext[: AES.block_size])
plaintext = decryptor.decrypt(ciphertext[AES.block_size :])
d = json.loads(plaintext)
self.key = d["key"]
self.secret = d["secret"]
except:
print "\n\n\nError: you may have entered an invalid password or the encrypted api key file doesn't exist"
print "If you haven't yet generated the encrypted key file, run the encrypt_api_key.py script."
while 1:
pass
self.buff = ""
self.timeout = 15
self.__url_parts = urlparse.urlsplit("https://mtgox.com/api/0/")
self.__url_parts_1 = urlparse.urlsplit("https://mtgox.com/api/1/")
self.clock_window = time.time()
self.clock = time.time()
self.query_count = 0
self.query_limit_per_time_slice = 5
self.query_time_slice = 10
示例3: test_password_reset
def test_password_reset(self):
"""
Tests the forgotten/reset password workflow.
"""
c = Client()
resp = c.get(reverse('password_reset'))
self.assertTrue(resp.status_code, 200)
resp = c.post(reverse('password_reset'), data={'email': '[email protected]'})
self.assertEqual(resp.status_code, 302)
self.assertEqual(len(mail.outbox), 1)
token = resp.context[0]['token']
uid = resp.context[0]['uid']
# Grab the token and uidb64 so that we can hit the reset url
resp = c.get(reverse('password_reset_confirm', kwargs={'token': token, 'uidb64': uid}))
self.assertEqual(resp.status_code, 200)
self.assertTrue(resp.template_name.endswith('password_reset_confirm.html'))
resp = c.post(reverse('password_reset_confirm', kwargs={'token': token, 'uidb64': uid}),
{'new_password1': 'mynewpassword', 'new_password2': 'mynewpassword'})
self.assertEqual(resp.status_code, 302)
self.assertEqual(resolve(urlsplit(resp.url).path).url_name, 'password_reset_complete')
resp = c.post(reverse('login'), {'username': 'tester_mcgee', 'password': 'mynewpassword'})
# User is returned to the login page on error vs redirected by default
self.assertEqual(resp.status_code, 302)
self.assertNotEqual(resolve(urlsplit(resp.url).path).url_name, 'login')
示例4: create_yaml_profile
def create_yaml_profile(json_data):
data = []
filename = None
if 'image_data' in json_data:
for k, v in json_data['image_data'].items():
filename = os.path.basename(urlparse.urlsplit(v['uri'])[2])
abs_path = os.path.join(json_data['output'], filename)
stdout = execute('gunzip', '-ql', abs_path)[0]
try:
size = int(stdout.split()[1])
except (ValueError, KeyError) as e:
size = None
stdout = execute('gunzip', '-qc', abs_path, '|', 'md5sum')[0]
try:
md5 = stdout.split()[0]
except (ValueError, KeyError) as e:
md5 = None
if not md5 or not size:
raise Exception("Either md5 or size of %s couldn't be "
"calculated" % abs_path)
data.append({k: {
'md5': md5,
'size': size,
'filename': filename,
'container': v['container'],
'format': v['format']}})
data.append({'repos': json_data['repos']})
else:
raise Exception("Couldn't find any information about images")
filename = os.path.basename(
urlparse.urlsplit(json_data['image_data']
['/']['uri'])[2]).split('.')[0]
with open(os.path.join(json_data['output'], filename + '.yaml'), 'w') as f:
f.write(yaml.dump(data))
示例5: logout_client
def logout_client():
"""
Client-initiated logout
"""
client = Client.query.filter_by(key=request.args['client_id']).first()
if client is None:
# No such client. Possible CSRF. Don't logout and don't send them back
flash(logout_errormsg, 'error')
return redirect(url_for('index'))
if client.trusted:
# This is a trusted client. Does the referring domain match?
clienthost = urlparse.urlsplit(client.redirect_uri).hostname
if request.referrer:
if clienthost != urlparse.urlsplit(request.referrer).hostname:
# Doesn't. Don't logout and don't send back
flash(logout_errormsg, 'error')
return redirect(url_for('index'))
# else: no referrer? Either stripped out by browser or a proxy, or this is a direct link.
# We can't do anything about that, so assume it's a legit case.
#
# If there is a next destination, is it in the same domain?
if 'next' in request.args:
if clienthost != urlparse.urlsplit(request.args['next']).hostname:
# Doesn't. Assume CSRF and redirect to index without logout
flash(logout_errormsg, 'error')
return redirect(url_for('index'))
# All good. Log them out and send them back
logout_internal()
return redirect(get_next_url(external=True))
else:
# We know this client, but it's not trusted. Send back without logout.
return redirect(get_next_url(external=True))
示例6: get_download_links
def get_download_links(self):
self.get_links()
# for state, link in self.links.iteritems():
# if state in self.Meta.states:
# self.download_links[state] = link
for state in self.states:
self.download_links[state] = {}
try:
self.download_links[state]['link'] = self.links[state]
except:
print "failed to set link for state %s" % (state)
self.download_links[state]['link'] = ['NA']
try:
self.download_links[state]['file_name'] = os.path.basename(
urlsplit(self.links[state]).path)
except:
print "failed to set file_name for %s " % (state)
self.download_links[state]['file_name'] = []
try:
self.download_links[state]['file_type'] = os.path.splitext(
os.path.basename(urlsplit(self.links[state]).path)
)[1]
except:
print "couldnt find a type for file"
return self.download_links
示例7: __init__
def __init__(self, uri=None, path=None):
if uri is None:
uri = get_request().site
parts = list(urlparse.urlsplit(uri))
if path is not None:
parts[2] = urlparse.urlsplit(path)[2]
self.uri = urlparse.urlunsplit(parts)
示例8: getsession
def getsession(url, user, password, domain="default"):
scheme, location, path, query, fragment = urlparse.urlsplit(url)
if scheme is None and location is None and query is None:
url="http://"+url
if url[-1]!="/": url+="/"
url+="xmlrpc.php"
sp=xmlrpclib.ServerProxy(url)
res=sp.system.login({"username": user, "password": password, "domain": domain})
if "sessionid" not in res or "kp3" not in res:
raise Exception("Invalid username or password")
scheme, location, path, query, fragment = urlparse.urlsplit(url)
if location.find("@")>=0:
location=location[location.find("@")+1:]
newurl=urlparse.urlunsplit( (scheme, "%s:%[email protected]%s" % (res["sessionid"], res["kp3"], location), path, query, fragment) )
return Session(xmlrpclib.ServerProxy(newurl), res)
示例9: get_correctedFiles
def get_correctedFiles(path, save, url, img):
if not os.path.exists(save):
os.makedirs(save)
for f in os.listdir(path):
print "correcting file %s" % f
infile = open(os.path.join(path, f)).read()
soup = BeautifulSoup(infile, "html5lib")
for tag in soup.find_all(lambda t: 'href' in t.attrs or 'src' in t.attrs):
if 'href' in tag.attrs:
url_parts = urlparse.urlsplit(tag.attrs["href"])
full_path = tag.attrs["href"]
hrefpath = url_parts.path
if full_path[0:4] != "http" or full_path[0:5] != " http":
# for wiki conversion (moin moin wikis)
# hrefpath = hrefpath.replace("/", "|")
if hrefpath[0:6] == "|wiki|":
hrefpath = hrefpath[6:]
tag.attrs["href"] = urlparse.urljoin(url, hrefpath)
else:
url_parts = urlparse.urlsplit(tag.attrs["src"])
srcpath = url_parts.path
srcparts = srcpath.split("/")
srcpath = srcparts[len(srcparts) -1]
tag.attrs["src"] = urlparse.urljoin(img, srcpath)
outfile = open(os.path.join(save, f), "w")
outfile.write(soup.encode("ascii", "xmlcharrefreplace"))
outfile.close()
示例10: get_from_form
def get_from_form(str_page, response, opener=None):
page = lxml.html.document_fromstring(str_page)
if len(page.forms) == 1: form = page.forms[0]
else: form = page.forms[1] # для Яндекса на этапе полтверждения прав:(
# Собираем параметры
key_value = {}
for inpt in form.inputs:
value = inpt.value
name = inpt.name
if None not in [name, value]: key_value[name] = value.encode('utf-8')
#if key_value.has_key(None): del key_value[None] # У кнопки обычно нет имени.
# Извлекаем адрес отправки формы
action_url = form.action
if action_url == None: action_url = response.geturl()
parts = urlparse.urlsplit(action_url)
# если относительный адрес...
if parts.scheme == '' and parts.netloc == '':
# относительно сервера
if action_url[0] == '/':
netloc = urlparse.urlsplit(response.geturl()).netloc
action_url = 'https://' + netloc + action_url
# относительно адреса текущей страницы
else: action_url = response.geturl() +'/'+ action_url
#print 'action url after parse: ', action_url
# проверяем наличие капчи (for vk.com only)
if key_value.has_key('captcha_key'):
img = form.cssselect('img.captcha_img')[0]
captcha_url = img.attrib['src']
captcha_img = opener.open(captcha_url).read()
dataMngt.write('oauth/logs/captcha.jpg', captcha_img, 'wb')
captcha_key = raw_input('Input the captcha number:')
key_value['captcha_key'] = captcha_key
return key_value, action_url
示例11: clean
def clean(self):
from django.core.validators import URLValidator, validate_ipv46_address
port_re = "(:[0-9]{1,5}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])"
cleaned_data = super(AddEndpointForm, self).clean()
if 'endpoint' in cleaned_data and 'product' in cleaned_data:
endpoint = cleaned_data['endpoint']
product = cleaned_data['product']
if isinstance(product, Product):
self.product = product
else:
self.product = Product.objects.get(id=int(product))
else:
raise forms.ValidationError('Please enter a valid URL or IP address.',
code='invalid')
endpoints = endpoint.split()
count = 0
error = False
for endpoint in endpoints:
try:
url_validator = URLValidator()
url_validator(endpoint)
protocol, host, path, query, fragment = urlsplit(endpoint)
self.endpoints_to_process.append([protocol, host, path, query, fragment])
except forms.ValidationError:
try:
# do we have a port number?
host = endpoint
regex = re.compile(port_re)
if regex.findall(endpoint):
for g in regex.findall(endpoint):
host = re.sub(port_re, '', host)
validate_ipv46_address(host)
protocol, host, path, query, fragment = ("", endpoint, "", "", "")
self.endpoints_to_process.append([protocol, host, path, query, fragment])
except forms.ValidationError:
try:
regex = re.compile(
r'^(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
validate_hostname = RegexValidator(regex=regex)
validate_hostname(host)
protocol, host, path, query, fragment = (None, host, None, None, None)
if "/" in host or "?" in host or "#" in host:
# add a fake protocol just to join, wont use in update to database
host_with_protocol = "http://" + host
p, host, path, query, fragment = urlsplit(host_with_protocol)
self.endpoints_to_process.append([protocol, host, path, query, fragment])
except forms.ValidationError:
raise forms.ValidationError(
'Please check items entered, one or more do not appear to be a valid URL or IP address.',
code='invalid')
return cleaned_data
示例12: _resolveLocation
def _resolveLocation(self, requestURI, location):
from twisted.web.client import _urljoin
from urlparse import urlparse, urlsplit
old_url = urlsplit(requestURI)[1].split(":")
go_to = urlsplit(location)[1].split(":")
if self._onRedirect == "sticky":
location = location.replace(go_to[0], old_url[0])
elif self._onRedirect == "stickyport":
def _preparePort(url):
urlsplited = urlsplit(url)[1].split(":")
scheme = urlsplit(url).scheme \
if urlsplit(url).scheme else "http"
if scheme == "http":
url = url.replace(urlsplited[0], urlsplited[0]+":80")
elif scheme == "https":
url = url.replace(urlsplited[0], urlsplited[0]+":443")
return url
if len(old_url) != 2:
requestURI = _preparePort(requestURI)
old_url = urlsplit(requestURI)[1].split(":")
if len(go_to) != 2:
location = _preparePort(location)
go_to = urlsplit(location)[1].split(":")
if not self._proxy:
location = location.replace(go_to[1], str(self._port))
else:
location = location.replace(go_to[1], old_url[1])
location = _urljoin(requestURI, location)
log.debug("Locating to URL: %s" % location)
return location
示例13: spider
def spider(client, url, domain_whitelist=None, pool=None, threadpool=None, tested=None):
client.send_status('Spidering {url}...'.format(url=url))
domain_whitelist = domain_whitelist or (urlsplit(url).netloc,)
threadpool = threadpool or ThreadPool(4) # for lxml - 4 workers
pool = pool or Pool() # maximum number of concurrent HTTP requests
tested = tested or set([url])
with timer() as timed:
response = requests.get(url)
result = dict(
status_code = response.status_code,
length = len(response.text),
headers = response.headers,
url = url,
duration = timed.result(),
)
client.send_result(result)
html = threadpool.apply(fromstring, [response.text])
for link in html.cssselect('a'):
href = link.attrib.get('href').split('#')[0].strip()
if not href:
continue
url = urljoin(response.url, href)
parts = urlsplit(url)
if parts.netloc not in domain_whitelist:
continue
if url in tested:
continue
tested.add(url)
pool.spawn(spider, client, url, domain_whitelist, pool, threadpool, tested)
return pool
示例14: filename
def filename(self, pdf_url0):
pdf_url = str(pdf_url0)
CurrentDir=os.path.dirname(os.path.realpath(__file__)).replace('\\','/')
if re.findall('/', pdf_url):
self.suffix = os.path.splitext(pdf_url)[1]
self.file_name_decode = urllib2.unquote(pdf_url).decode('utf8').split('/')[-1]
self.filename = urlparse.urlsplit(pdf_url).path.split('/')[-1]
if self.filename.endswith('.jsp'):
self.filename=(self.suffix).split('arnumber=')[1]+'.pdf'
# self.filename=(pdf_url).split('id=')[1].split('&')[0]+'.pdf'
# self.pdf_Folder_filename = CurrentDir + "/"+self.PDF_Files_Dir+"/" + self.filename
# self.W_pdf_Folder_filename = CurrentDir + "/"+self.Watermarked_PDF_Dir+"/" + self.filename
self.pdf_Folder_filename = self.PDF_Files_Dir+"/" + self.filename
self.W_pdf_Folder_filename =self.Watermarked_PDF_Dir+"/" + self.filename
self.chdir=CurrentDir
else:
self.filename = urlparse.urlsplit(pdf_url).path.split('\\')[-1]
self.chdir=CurrentDir
# self.pdf_Folder_filename = CurrentDir+ "/"+self.PDF_Files_Dir+"/" + self.filename
# self.W_pdf_Folder_filename = CurrentDir + "/"+self.Watermarked_PDF_Dir+"/" + self.filename
self.pdf_Folder_filename =self.PDF_Files_Dir+"/" + self.filename
self.W_pdf_Folder_filename =self.Watermarked_PDF_Dir+"/" + self.filename
return self
示例15: url
def url(self, name, force=False):
"""
Returns the real URL in DEBUG mode.
"""
if settings.DEBUG and not force:
hashed_name, fragment = name, ''
else:
clean_name, fragment = urldefrag(name)
if urlsplit(clean_name).path.endswith('/'): # don't hash paths
hashed_name = name
else:
cache_key = self.cache_key(name)
hashed_name = self.cache.get(cache_key)
if hashed_name is None:
hashed_name = self.hashed_name(clean_name).replace('\\', '/')
# set the cache if there was a miss
# (e.g. if cache server goes down)
self.cache.set(cache_key, hashed_name)
final_url = super(CachedFilesMixin, self).url(hashed_name)
# Special casing for a @font-face hack, like url(myfont.eot?#iefix")
# http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
query_fragment = '?#' in name # [sic!]
if fragment or query_fragment:
urlparts = list(urlsplit(final_url))
if fragment and not urlparts[4]:
urlparts[4] = fragment
if query_fragment and not urlparts[3]:
urlparts[2] += '?'
final_url = urlunsplit(urlparts)
return unquote(final_url)