本文整理汇总了Python中urllib2.urlparse.urljoin函数的典型用法代码示例。如果您正苦于以下问题:Python urljoin函数的具体用法?Python urljoin怎么用?Python urljoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了urljoin函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_products
def parse_products(self, response):
print "parse_products", response.url
sel = Selector(response)
breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
categories = [span for span in breadcrumb.xpath(".//span[@itemprop='title']/text()").extract()[1:]]
categories.append(breadcrumb.xpath(".//span/text()").extract()[-1])
print categories
for product in sel.xpath('//div[contains(@id,"quickLookItem")]'):
# check if it is a multistore product
if product.xpath('.//span[contains(@id, "numStoresQA")]'):
print product.xpath(".//a/@href").extract()[0]
url = product.xpath(".//a/@href").extract()[0]
url = "/".join(url.split("/")[:-1])+"/prices"
yield Request(urlparse.urljoin(response.url, url), callback=self.parse_multiple_store_product)
else:
# It is not a multistore product. Parse it.
item = ShoppingdotcomItem()
item["categories"] = categories
item["product_name"] = product.xpath(".//span[contains(@id, 'nameQA')]/@title").extract()[0]
if product.xpath(".//span[@class='placeholderImg']").extract():
item["image_urls"] = product.xpath(".//span[@class='placeholderImg']/text()").extract()
else:
item["image_urls"] = product.xpath(".//div[@class='gridItemTop']//img/@src").extract()
item["product_urls"] = [urlparse.urljoin(response.url, product.xpath(".//a/@href").extract()[0])]
item["stores"] = product.xpath(".//a[@class='newMerchantName']/text()").extract()
item["prices"] = [price.replace("\n","") for price in product.xpath(".//span[@class='productPrice']/a/text()").extract()]
yield item
# Check if Next page link is there then yeild request with next URL
if sel.xpath("//a[@name='PLN']").extract():
yield Request(urlparse.urljoin(response.url, sel.xpath("//a[@name='PLN']/@href").extract()[0]), self.parse_products)
pass
示例2: parse_start_url
def parse_start_url(self, response):
print response.url
sel = Selector(response)
for url in sel.xpath("//a"):
#print url.xpath("@href").extract()
href = url.xpath("@href").extract()[0] if url.xpath("@href").extract() else None
if href and href.split("/")[-1] == "products":
yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
if href and href.find("xFA-") >= 0:
href = href.replace("xFA-", "").split("~")[0]+"/products"
yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
pass
示例3: main
def main(argv=sys.argv):
""" Punto de entrada al programa """
url = "http://www.vientonomade.com.ar/index.php?option=com_content&view=category&" "layout=blog&id=8&Itemid=10"
fetcher = httplib2.Http()
get = partial(obtener_pagina, fetcher)
while url:
html = get(url)
uri, links = buscar_links(html)
for link in links:
try:
print urlparse.urljoin(url, link)
except UnicodeEncodeError:
pass
url = uri and urlparse.urljoin(url, uri) or None
示例4: check_config
def check_config():
"""
Check crucial configuration details for existence and workability.
Runs checks to see whether bugtracker's URL is reachable, whether
backend is available at the right filename, and whether the script has
the key arguments it needs to run: URL, backend, and database details.
The filename for the backend in the backends/ directory needs to be the
same as the configuration argument specifying that backend. For
instance, invoking the Launchpad backend uses 'lp', and so the filename
is 'lp.py'.
"""
Config.check_params(['url', 'backend'])
if Config.backend + ".py" not in Backend.get_all_backends():
raise InvalidConfig('Backend "' + Config.backend + '" does not exist')
url = urlparse.urlparse(Config.url)
check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
print("Checking URL: " + check_url)
req = Request(check_url)
if Config.backend != 'github':
try:
response = urlopen(req)
except HTTPError, e:
raise InvalidConfig('The server could not fulfill the request '
+ str(e.msg) + '(' + str(e.code) + ')')
except URLError, e:
raise InvalidConfig('We failed to reach a server. ' + str(e.reason))
示例5: transform
def transform(row, table):
'Transform row "link" into full URL and add "state" based on "name"'
data = row._asdict()
data['link'] = urlparse.urljoin('https://pt.wikipedia.org', data['link'])
data['name'], data['state'] = regexp_city_state.findall(data['name'])[0]
return data
示例6: bot_send_video
def bot_send_video(gesture, video_url, video_preview_img, to_mid="u2ef38a8c1f3f1c2c63bdf9c0a629023c"):
headers = {}
headers['Content-type'] = 'application/json; charset=UTF-8'
headers['X-Line-ChannelID'] = settings.CHANNEL_ID
headers['X-Line-ChannelSecret'] = settings.CHANNEL_SECRET
headers['X-Line-Trusted-User-With-ACL'] = settings.CHANNEL_MID
api = 'https://trialbot-api.line.me/v1/events'
body = {}
body['to'] = [to_mid]
body['toChannel'] = 1383378250
body['eventType'] = "138311608800106203"
#gesture = Gesture.objects.all()[0]
myurl = 'https://eldertranslator.herokuapp.com/'
video_url = urlparse.urljoin(myurl, gesture.video.url)
content = {
"contentType": 3,
"toType": 1,
"originalContentUrl": video_url,
"previewImageUrl": video_preview_img
}
body['content'] = content
req = requests.post(api, data=json.dumps(body), headers=headers, verify=False)
return req
示例7: parse
def parse(self, response):
delinquent_link = Selector(response).xpath(
'//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
urllib.urlretrieve(urlparse.urljoin(response.url, delinquent_link[0]), 'delinquent.zip')
unzip('delinquent.zip', 'delinquent')
with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',')
for idx, column in enumerate(csvreader.next()):
column = re.sub('["]', "", column).strip()
if column.startswith("PARCELID"):
parcelidcol = idx
if column.startswith("OWNERNAME1"):
ownernamecol = idx
if column.startswith("PARCELLOCATION"):
parcellocationcol = idx
if column.startswith("CLS"):
parcelclass = idx
if column.startswith("ASMTBLDG"):
buildingvalue = idx
for row in csvreader:
item = ReapItem()
item['parcel_id'] = re.sub('["]', "", row[parcelidcol]).strip()
item['parcel_location'] = row[parcellocationcol].strip()
item['parcel_class'] = row[parcelclass].strip()
item['building_value'] = row[buildingvalue].strip()
request = scrapy.Request(
"http://mctreas.org/master.cfm?parid={0}&taxyr={1}&own1={2}".format(
item['parcel_id'], str(YEAR), row[ownernamecol]),
callback=self.get_tax_eligibility)
request.meta['item'] = item
yield request
示例8: parse
def parse(url, body, **kwargs):
for line in body.decode('gbk', errors='ignore').splitlines():
if line.lstrip().startswith('var docData'):
l, r = line.find('{'), line.rfind('}')
obj = json.loads(line[l:r+1])
doc = obj['result']['docinfo'][0]['foolrinfo']
doc['title'] = obj['result']['sDocTitle']
doc['url'] = urlparse.urljoin('http://www.xici.net', obj['result']['strPageUrl'])
doc['date'] = '20'+doc['LongDate']
doc['content'] = html.fromstring(doc['floorcontent']).text_content()
tpl = Template('''
<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="content-type">
<title>{{doc['title']}}</title>
</head>
<body>
<a id="title" href="{{doc['url']}}">{{doc['title']}}</a>
<p id="date">{{doc['date']}}</p>
<div id="content">{{doc['content']}}</div>
</body>
</html>''')
return tpl.render(doc=doc).encode('gbk', errors='ignore')
else:
return '<html/>'
示例9: submit
def submit(self, opener, res):
"""submit login form to COSign IdP
:param opener: the urllib2 opener
:param data: the form data
as a dictionary :param res: the response object :param cm: a
:class:`~slick.passmgr.CredentialManager` containing the URL
to the service provider you want to connect to
"""
idp_data = {}
cm = self.cm
data = self.data
url = urlparse.urljoin(res.url, data["form"]["action"])
log.info("Form Authentication from: %s" % url)
idp_data[self.username_field] = cm.get_username()
idp_data[self.password_field] = cm.get_password()
idp_data["service"] = data["service"]["value"]
idp_data["ref"] = data["ref"]["value"]
data = urllib.urlencode(idp_data)
request = Request(url, data=data)
log.info("Submitting login form")
log.debug("POST: %s" % request.get_full_url())
response = opener.open(request)
return request, response
示例10: submit
def submit(self, opener, res):
"""submit WAYF form with IDP
:param opener: the urllib2 opener
:param data: the form data as a dictionary
:param res: the response object
"""
log.info('Submitting form to wayf')
#Set IDP to correct IDP
wayf_data = {}
idp = self.idp
data = self.data
if not idp.get_idp() in data['origin']:
raise WAYFException(
"Can't find IdP '{0}' in WAYF's IdP list".format(
idp.get_idp()))
wayf_data['origin'] = data['origin'][idp.get_idp()]
wayf_data['shire'] = data['shire']['value']
wayf_data['providerId'] = data['providerId']['value']
wayf_data['target'] = data['target']['value']
wayf_data['time'] = data['time']['value']
wayf_data['cache'] = 'false'
wayf_data['action'] = 'selection'
url = urlparse.urljoin(res.url, data['form']['action'])
data = urllib.urlencode(wayf_data)
request = Request(url + '?' + data)
log.debug("POST: %s" % request.get_full_url())
response = opener.open(request)
return request, response
示例11: install_artifacts
def install_artifacts(artifacts, dirstruct, installdir, basestaticurl):
"""
Install the artifacts.
"""
assert basestaticurl.endswith("/"), "Basestaticurl should end with /"
installed = []
for reldir, artifactnames in dirstruct.items():
destdir = os.path.join(installdir, reldir)
if not os.path.exists(destdir):
log.warn(msg="Making install directory %s" % destdir)
os.makedirs(destdir)
else:
assert os.path.isdir(destdir)
for artifactname in artifactnames:
destpath = os.path.abspath(os.path.join(destdir, artifactname))
if artifactname in artifacts.keys():
# The artifact must be loaded from jenkins
theartifact = artifacts[artifactname]
else:
# It's probably a static file, we can get it from the static collection
staticurl = urlparse.urljoin(basestaticurl, artifactname)
theartifact = Artifact(artifactname, staticurl)
theartifact.save(destpath)
installed.append(destpath)
return installed
示例12: notify
def notify(cls, alert, *args, **kwargs):
current_span = extract_span_from_kwargs(**kwargs)
url = cls._config.get('notifications.hipchat.url')
token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
repeat = kwargs.get('repeat', 0)
notify = kwargs.get('notify', False)
alert_def = alert['alert_def']
message_format = kwargs.get('message_format', 'html')
current_span.set_tag('alert_id', alert_def['id'])
entity = alert.get('entity')
is_changed = alert.get('alert_changed', False)
is_alert = alert.get('is_alert', False)
current_span.set_tag('entity', entity['id'])
current_span.set_tag('alert_changed', bool(is_changed))
current_span.set_tag('is_alert', is_alert)
current_span.log_kv({'room': kwargs.get('room')})
color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')
message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))
if kwargs.get('link', False):
zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
alert_id = alert['alert_def']['id']
alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
link_text = kwargs.get('link_text', 'go to alert')
if message_format == 'html':
message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)
else:
message_text += ' -- {} - {}'.format(link_text, alert_url)
message = {
'message': message_text,
'color': color,
'notify': notify,
'message_format': message_format
}
try:
logger.info(
'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
token) + ' ' + json.dumps(message))
r = requests.post(
'{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
r.raise_for_status()
except Exception:
current_span.set_tag('error', True)
current_span.log_kv({'exception': traceback.format_exc()})
logger.exception('Hipchat write failed!')
return repeat
示例13: show
def show(self):
slcs_login_url = urlparse.urljoin(self.settings.slcs, 'login')
idp_keys = list_idps(slcs_login_url).keys()
idp_keys.sort()
for i in idp_keys:
self.idps.append_text(i)
if i == self.settings.idp:
self.idps.set_active(len(self.idps.get_model())-1)
self.window.show_all()
示例14: relative_to_full_url
def relative_to_full_url(original_url, url):
"""
Resolve the URL based on the original_url
"""
from urllib2 import urlparse
parsed = urlparse.urlparse(url)
if not parsed.netloc:
url = urlparse.urljoin(original_url, parsed.path)
return url
示例15: urlIterator
def urlIterator(startUrl, nextCssSelector):
'''Yields the url of a page while there is a next one found by the cssSelector'''
#This function takes time because it has to parse the dom to get the next url
url = startUrl
while url:
yield url
nextTags = getElementsFromUrl(url, nextCssSelector)
url = None
for possibleNext in nextTags:
if possibleNext.tag == 'a':
href = possibleNext.get('href')
# Absolute href
url = urlparse.urljoin(startUrl, href)
break
else:
newTag = possibleNext.find('a')
if newTag != None:
href = newTag.get('href')
url = urlparse.urljoin(startUrl, href)
break