本文整理汇总了Python中urllib.basejoin函数的典型用法代码示例。如果您正苦于以下问题:Python basejoin函数的具体用法?Python basejoin怎么用?Python basejoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了basejoin函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findVideoFrameLink
def findVideoFrameLink(page, data):
minheight=300
minwidth=300
frames = findFrames(data)
if not frames:
return None
iframes = re.findall(data, "(frame[^>]* height=[\"']*(\d+)[\"']*[^>]*>)")
if iframes:
for iframe in iframes:
height = int(iframe[1])
if height > minheight:
m = re.findall(iframe[0], "[\"' ]width=[\"']*(\d+[%]*)[\"']*")
if m:
if m[0] == '100%':
width = minwidth+1
else:
width = int(m[0])
if width > minwidth:
m = re.findall(iframe[0], '[\'"\s]src=["\']*\s*([^"\' ]+)\s*["\']*')
if m:
link = m[0]
if not link.startswith('http://'):
up = urlparse.urlparse(urllib.unquote(page))
if link.startswith('/'):
link = urllib.basejoin(up[0] + '://' + up[1],link)
else:
link = urllib.basejoin(up[0] + '://' + up[1] + '/' + up[2],link)
return link.strip()
# Alternative 1
iframes = re.findall(data, "(frame[^>]*[\"; ]height:\s*(\d+)[^>]*>)")
if iframes:
for iframe in iframes:
height = int(iframe[1])
if height > minheight:
m = re.findall(iframe[0], "[\"; ]width:\s*(\d+)")
if m:
width = int(m[0])
if width > minwidth:
m = re.findall(iframe[0], '[ ]src=["\']*\s*([^"\' ]+)\s*["\']*')
if m:
link = m[0]
if not link.startswith('http://'):
link = urllib.basejoin(page,link)
return link.strip()
# Alternative 2 (Frameset)
iframes = re.findall(data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"')
if iframes:
link = iframes[0]
if not link.startswith('http://'):
link = urllib.basejoin(page,link)
return link.strip()
return None
示例2: _parseLinks
def _parseLinks(self,url):
c=httplib2.Http()
resp,content=c.request(url)
tree=libxml2.htmlParseDoc(content,"utf-8")
links=htmltools.find_elements(tree.getRootElement(),"a")
reflink=""
magnet=None
for i in links:
if i.getContent().lstrip().rstrip()=="Download torrent":
reflink=urllib.basejoin(url,i.prop('href'))
if i.getContent().lstrip().rstrip()=="magnet link":
magnet=urllib.basejoin(url,i.prop('href'))
if "&" in magnet:
j=magnet.index("&")
magnet=magnet[:j]
return reflink,magnet
示例3: __init__
def __init__(self):
baseurl = 'http://164.100.47.132/LssNew/psearch/'
date2num = {\
(datetime.date(1998, 03, 23), \
datetime.date(1999, 04, 24)): 12, \
(datetime.date(1999, 10, 20), \
datetime.date(2004, 02, 05)): 13, \
(datetime.date(2004, 06, 02), \
datetime.date(2009, 02, 26)): 14, \
(datetime.date(2009, 06, 01), \
datetime.date(2014, 06, 01)): 15, \
}
num2webform = { 12: 'DebateAdvSearch12.aspx', \
13: 'DebateAdvSearch13.aspx', \
14: 'DebateAdvSearch14.aspx', \
15: 'DebateAdvSearch15.aspx', \
}
num2dateqry = { 12: 'DebateAdvSearch12.aspx', \
13: 'DebateAdvSearch13.aspx', \
14: 'DebateAdvSearch14.aspx', \
15: 'DebateAdvSearch15.aspx', \
}
self.webformUrls = {}
for k in date2num.keys():
self.webformUrls[k] = urllib.basejoin(baseurl, \
num2webform[date2num[k]])
self.dateqryUrls = {}
for k in date2num.keys():
self.dateqryUrls[k] = urllib.basejoin(baseurl, \
num2dateqry[date2num[k]])
示例4: _get_magnet
def _get_magnet(self,url):
i=len(url)-1
while url[i]!='/':
i-=1
url=url[:i+1]+urllib.quote_plus(url[i+1:])
c=httplib2.Http()
resp,content=c.request(url)
if "set-cookie" in resp:
cookie=resp['set-cookie']
else:
cookie=None
tree=libxml2.htmlParseDoc(content,"utf-8")
form=htmltools.find_elements(tree.getRootElement(),"form",id="frmAdultDisclaimer")
if form:
form=form[0]
inputs=htmltools.find_elements(form,"input")
body={}
for i in inputs:
body[i.prop('name')]=i.prop('value')
del body['btn_Decline']
body=urllib.urlencode(body)
headers={'Content-type':"application/x-www-form-urlencoded"}
if cookie:
headers['Cookie']=cookie
url=urllib.basejoin(url,form.prop('action'))
resp,content=c.request(url,"POST",body,headers)
if "set-cookie" in resp:
cookie=resp['set-cookie']
if cookie:
headers['Cookie']=cookie
url=urllib.basejoin(url,resp["location"])
resp,content=c.request(url,headers=headers)
tree=libxml2.htmlParseDoc(content,"utf-8")
return htmltools.find_elements(tree.getRootElement(),"a",**{'class':'dwld_links'})[0].prop('href')
示例5: search
def search(self, pattern=''):
pattern = urllib.quote(pattern)
url = '?s=%(pattern)s' % {'pattern': pattern}
search = []
search_elem = self.get_html_tree(url)
if not search_elem or search_elem.find('div', {'class': 'alert alert-warning'}):
# Sorry, no results were found.
return search
div_elems = search_elem.findAll(
'div', {'class': 'col-lg-3 col-xs-3 col-sm-3 item'})
for div_elem in div_elems:
a_elem = div_elem.findAll('a')[-1]
img_elem = div_elem.find('img')
tv_show = re.sub(urllib.basejoin(self.main_url, 'watch-'), '',
a_elem.get('href'))
item = {'label': a_elem.getText(),
'tv_show': tv_show,
'thumbnail': urllib.basejoin(self.main_url, img_elem.get('src'))}
search.append(item)
return search
示例6: __iter__
def __iter__(self):
if self.target:
basepath = xmlrpclib.ServerProxy(self.target).getPhysicalPath()
for item in self.previous:
if not self.target:
yield item
continue
keys = item.keys()
type_, path = item.get(self.typekey(*keys)[0]), item.get(self.pathkey(*keys)[0])
if not (type_ and path): # not enough info
yield item; continue
#fti = self.ttool.getTypeInfo(type_)
#if fti is None: # not an existing type
# msg = "constructor: no type found %s:%s" % (type_,path)
# logger.log(logging.ERROR, msg)
# yield item; continue
elems = path.strip('/').rsplit('/', 1)
for attempt in range(0, 3):
try:
url = urllib.basejoin(self.target, path)
proxy = xmlrpclib.ServerProxy(url)
container, id = (len(elems) == 1 and ('', elems[0]) or elems)
#if id == 'index.html':
try:
#test paths in case of acquition
rpath = proxy.getPhysicalPath()
#TODO: should check type to see if it's correct?
rpath = rpath[len(basepath):]
if path == '/'.join(rpath):
self.logger.debug("%s already exists. Not creating"% ('/'.join(rpath)) )
break
except xmlrpclib.Fault:
# Doesn't already exist
pass
purl = urllib.basejoin(self.target,container)
pproxy = xmlrpclib.ServerProxy(purl)
try:
pproxy.invokeFactory(type_, id)
self.logger.info("%s Created with type=%s"% (path, type_) )
except xmlrpclib.ProtocolError,e:
if e.errcode == 302:
pass
else:
raise
except xmlrpclib.Fault:
self.logger.warning("Failure while creating '%s' of type '%s'"% (path, type_) )
pass
break
except xmlrpclib.ProtocolError,e:
if e.errcode == 503:
continue
else:
raise
示例7: _parse_result
def _parse_result(self, page_url, result_line):
torrent_link, category, title, size, seeders, leechers, health = TorrentSearch.htmltools.find_elements(result_line, "td")
torrent_url = urllib.basejoin(page_url, TorrentSearch.htmltools.find_elements(torrent_link, "a")[0].prop('href').replace('/torrent_download/','/download/'))
if len(TorrentSearch.htmltools.find_elements(title, "a"))==2:
details_link = TorrentSearch.htmltools.find_elements(title, "a")[0]
else:
details_link = TorrentSearch.htmltools.find_elements(title, "a")[1]
title = details_link.getContent()
details_link = urllib.basejoin(page_url, details_link.prop('href'))
size=size.getContent()
size=size[:-4]+" "+size[-2:]
seeders=eval(seeders.getContent())
leechers=eval(leechers.getContent())
category=self._parse_category(TorrentSearch.htmltools.find_elements(category, "a")[0].prop('href').split('/')[-2])
c=httplib2.Http()
resp,content=self.http_queue_request(details_link)
tree=libxml2.htmlParseDoc(content,"utf-8")
lines=TorrentSearch.htmltools.find_elements(TorrentSearch.htmltools.find_elements(tree, "td", **{'class':'tabledata0'})[0].parent.parent,"tr")
for i in lines:
cells=TorrentSearch.htmltools.find_elements(i, "td")
if cells[0].getContent()=="Info hash:":
hashvalue=cells[1].getContent()
elif cells[0].getContent()=="Torrent added:":
date=cells[1].getContent().split(" ")[0]
date=time.strptime(date,"%Y-%m-%d")
date=datetime.date(date.tm_year, date.tm_mon, date.tm_mday)
self.add_result(ExtraTorrentPluginResult(title, date, size, seeders, leechers, torrent_url, hashvalue, category))
示例8: paso_a_paso
def paso_a_paso():
url = "%s/tips/recetas" % BASE_URL
html = urllib.urlopen(url).read()
dom = lxml.html.document_fromstring(html)
answer = []
serie = models.Serie()
serie.title = 'Paso a paso'
serie.description = "por Martiniano Molina"
serie.url = 'rss://%s/content/elgourmet/paso_a_paso' % settings.MY_BASE_URL
serie.thumbnail = dom.cssselect("#cab_logo img")[0].get("src")
serie.episodes = []
serie.show_name = 'paso_a_paso'
for a in dom.cssselect("#contenedor a"):
try:
url2 = a.get('href')
if not url2.startswith('receta'): continue
url2 = urllib.basejoin(BASE_URL, url2)
episode = models.Episode()
episode.title = a.cssselect("h2")[0].text_content()
print "\t%s" % episode.title
html2 = urllib.urlopen(url2).read()
episode.url = url2
episode.thumbnail = urllib.basejoin(BASE_URL, dom.cssselect("img")[0].get('src'))
x = re.findall('"file": ?"(.*?)"', html2)
episode.video_url = get_video_url(x[0], STREAMING_URL)
serie.episodes.append(episode)
except Exception,e:
print "Error: %s" % e
示例9: _make_img_urls
def _make_img_urls(self, product_key, img_count):
"""
the keyworld `RLLZ` in url meaning large size(about 800*1000), `RLLD` meaning small size (about 400 *500)
http://www.ruelala.com/images/product/131385/1313856984_RLLZ_1.jpg
http://www.ruelala.com/images/product/131385/1313856984_RLLZ_2.jpg
http://www.ruelala.com/images/product/131385/1313856984_RLLZ_1.jpg
http://www.ruelala.com/images/product/131385/1313856984_RLLZ_2.jpg
"""
urls = []
prefix = 'http://www.ruelala.com/images/product/'
for i in range(0, img_count):
subfix = '%s/%s_RLLZ_%d.jpg' %(product_key[:6], product_key, i+1)
url = urllib.basejoin(prefix, subfix)
urls.append(url)
# num_image_urls() if return 0, means RLLZ and RLLA is not work, use RLLDE instead.
if img_count == 0:
for j in xrange(0, 1000):
sub = '%s/%s_RLLDE_%d.jpg' %(product_key[:6], product_key, j+1)
url = urllib.basejoin(prefix, sub)
status = self.net.fetch_image(url)
if status != 404:
urls.append(url)
else:
return urls
return urls
示例10: __init__
def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
lobis.Lobis.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
self.baseurl = 'http://lobis.nic.in/'
self.courturl = urllib.basejoin(self.baseurl, '/phhc/')
self.cookieurl = urllib.basejoin(self.baseurl, \
'/phhc/juddt.php?scode=28')
self.dateurl = urllib.basejoin(self.baseurl, \
'/phhc/juddt1.php?dc=28&fflag=1')
示例11: __init__
def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
self.cookiefile = tempfile.NamedTemporaryFile()
self.baseurl = 'http://ldemo.mp.nic.in'
self.cookieurl = urllib.basejoin(self.baseurl, \
'causelist/ciskiosk/ordermain.php')
self.dateurl = urllib.basejoin(self.baseurl, \
'/causelist/ciskiosk/order_action.php?as9=ok3')
示例12: _run_search
def _run_search(self,pattern,href=None,page=0):
if href==None:
href="http://mononoke-bt.org/browse2.php?search="+urllib.quote_plus(pattern)
resp,content=self.http_queue_request(href,headers={'Cookie':self._app.parse_cookie(self.login_cookie)})
tree=libxml2.htmlParseDoc(content,"utf-8")
pager=htmltools.find_elements(tree.getRootElement(),"div",**{'class':'animecoversfan'})[0].parent.next
try:
data=htmltools.find_elements(pager,"b")[-1].getContent()
i=len(data)-1
while data[i] in "0123456789":
i-=1
self.results_count=eval(data[i+1:])
except:
pass
restable=pager.next.next
lines=htmltools.find_elements(restable,"tr",1)[1:-2]
for i in lines:
try:
cells=htmltools.find_elements(i,"td")
team, show, stype, name, torrent_link, nbfiles, nbcmt, rate, date, size, views, dl, seeders, leechers, ratio=cells
link=htmltools.find_elements(name,"a")[0]
label=link.getContent()
link=urllib.basejoin(href,link.prop('href'))
torrent_link=urllib.basejoin(href,htmltools.find_elements(torrent_link,"a")[0].prop('href'))+"&r=1"
date=htmltools.find_elements(date,"nobr")[0].children.getContent()
date=time.strptime(date,"%Y-%m-%d")
date=datetime.date(date.tm_year,date.tm_mon,date.tm_mday)
strsize=""
cell=size.children
while cell:
if cell.name=="text":
if strsize:
strsize+=" "
strsize+=cell.getContent().upper()
cell=cell.next
size=strsize.replace('O','B')
seeders=eval(seeders.getContent())
leechers=eval(leechers.getContent())
resp,content=self.http_queue_request(link,headers={'Cookie':self._app.parse_cookie(self.login_cookie)})
itemtree=libxml2.htmlParseDoc(content,"utf-8")
tds=htmltools.find_elements(itemtree.getRootElement(),"td")
hashvalue=None
for j in tds:
if j.getContent()=="Info hash":
hashvalue=j.next.next.getContent()
self.add_result(MononokeBTPluginResult(label,date,size,seeders,leechers,torrent_link,hashvalue))
except:
pass
if self.stop_search:
return
if not self.stop_search:
try:
b=htmltools.find_elements(pager,"b")[-1]
if b.parent.name=="a":
url="http://mononoke-bt.org/browse2.php?search=%s&page=%d"%(urllib.quote_plus(pattern),page+1)
self._run_search(pattern,url,page+1)
except:
pass
示例13: download_oneday
def download_oneday(self, relpath, dateobj):
dateurl = urllib.basejoin(self.baseurl, '/hcjudge/date_output.php')
postdata = [('d1', dateobj.day), ('m1', dateobj.month), \
('y1', dateobj.year), ('d2', dateobj.day), \
('m2', dateobj.month), ('y2', dateobj.year), \
('button', 'Submit')]
webpage = self.download_url(dateurl, postdata = postdata)
if not webpage:
self.logger.warning(u'No webpage for %s date: %s' % \
(dateurl, dateobj))
return []
d = utils.parse_webpage(webpage)
if not d:
self.logger.error(u'HTML parsing failed for date: %s' % dateobj)
return []
newdls = []
for link in d.findAll('a'):
href = link.get('href')
title = utils.get_tag_contents(link)
if (not href) or (not title):
self.logger.warning(u'Could not process %s' % link)
continue
words = href.split('/')
filename = words[-1]
url = urllib.basejoin(dateurl, href)
self.logger.info(u'link: %s title: %s' % (href, title))
relurl = os.path.join (relpath, filename)
filepath = os.path.join(self.rawdir, relurl)
metapath = os.path.join(self.metadir, relurl)
if not os.path.exists(filepath):
webpage = self.download_url(url)
if not webpage:
self.logger.warning(u'No webpage %s' % url)
else:
utils.save_file(filepath, webpage)
self.logger.info(u'Saved %s' % url)
newdls.append(relurl)
if os.path.exists(filepath) and \
(self.updateMeta or not os.path.exists(metapath)):
metainfo = self.get_meta_info(title, dateobj)
if metainfo:
utils.print_tag_file(metapath, metainfo)
return newdls
示例14: __iter__
def __iter__(self):
self.checkOptions()
for item in self.previous:
if not self.target:
yield item
continue
keys = item.keys()
# Apply defaultMatcher() function to extract necessary data
# 1) which item will be transitioned
# 2) with which transition
pathkey = self.pathkey(*keys)[0]
transitionskey = self.transitionskey(*keys)[0]
if not (pathkey and transitionskey): # not enough info
yield item
continue
path, transitions = item[pathkey], item[transitionskey]
if isinstance(transitions, basestring):
transitions = (transitions,)
remote_url = urllib.basejoin(self.target, path)
if not remote_url.endswith("/"):
remote_url += "/"
for transition in transitions:
transition_trigger_url = urllib.basejoin(remote_url, "content_status_modify?workflow_action=" + transition)
self.logger.info("%s performing transition '%s'" % (path, transition))
from httplib import HTTPException
try:
f= urllib.urlopen(transition_trigger_url)
data = f.read()
# Use Plone not found page signature to detect bad URLs
if "Please double check the web address" in data:
import pdb ; pdb.set_trace()
raise RuntimeError("Bad remote URL:" + transition_trigger_url)
except HTTPException, e:
# Other than HTTP 200 OK should end up here,
# unless URL is broken in which case Plone shows
# "Your content was not found page"
self.logger.error("fail")
msg = "Remote workflow transition failed %s->%s" %(path,transition)
self.logger.log(logging.ERROR, msg, exc_info=True)
yield item
示例15: _run_search
def _run_search(self,pattern,href=None):
if href==None:
href="http://www.torrent411.com/search/"+urllib.quote_plus(pattern)
resp,content=self.http_queue_request(href)
content=_codecs.utf_8_encode(_codecs.latin_1_decode(content)[0])[0]
tree=libxml2.htmlParseDoc(content,"utf-8")
pager=htmltools.find_elements(htmltools.find_elements(tree.getRootElement(),"table",**{'class':'NB-frame'})[1],"p")[0]
try:
b=htmltools.find_elements(pager,"b")[-1]
data=b.getContent()
i=len(data)-1
while data[i] in "012346789":
i-=1
self.results_count=eval(data[i+1:])
except:
pass
restable=htmltools.find_elements(pager.next.next,"table")[0]
restable=htmltools.find_elements(restable,"table")[1]
body=htmltools.find_elements(restable,"tbody")[0]
lines=htmltools.find_elements(body,"tr",1)
for i in lines:
try:
cat,link,a,date,b,c,d,e,f,g,h,i,size,j,seeders,leechers=htmltools.find_elements(i,"td")
date=date.getContent().replace(chr(194)+chr(160)+"at"+chr(194)+chr(160)," ")
date=time.strptime(date,"%Y-%m-%d %H:%M:%S")
date=datetime.date(date.tm_year,date.tm_mon,date.tm_mday)
size=size.getContent().replace(chr(194)+chr(160)," ")
seeders=eval(seeders.getContent())
leechers=eval(leechers.getContent())
link=htmltools.find_elements(link,"a")[0]
label=link.prop('title')
link=urllib.basejoin("http://www.torrent411.com",link.prop('href'))
resp,content=self.http_queue_request(link)
content=_codecs.utf_8_encode(_codecs.latin_1_decode(content)[0])[0]
itemtree=libxml2.htmlParseDoc(content,"utf-8")
table=htmltools.find_elements(itemtree.getRootElement(),"table",**{'cellpadding':'3'})[1]
desc,name,torrent,cat,siz,hashvalue=htmltools.find_elements(table,"tr")[:6]
torrent=htmltools.find_elements(torrent,"a")[0].prop('href')
hashvalue=htmltools.find_elements(hashvalue,"td")[1].getContent()
self.add_result(Torrent411PluginResult(label,date,size,seeders,leechers,torrent,hashvalue))
except:
pass
if self.stop_search:
return
if not self.stop_search:
try:
links=htmltools.find_elements(pager,"a")
next_link=None
for i in links:
if i.getContent()=="Next"+chr(194)+chr(160)+">>":
next_link=i
if next_link:
link=urllib.basejoin("http://www.torrent411.com",next_link.prop('href'))
self._run_search(pattern,link)
except:
pass