本文整理汇总了Python中Httpy.Httpy.between方法的典型用法代码示例。如果您正苦于以下问题:Python Httpy.between方法的具体用法?Python Httpy.between怎么用?Python Httpy.between使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Httpy.Httpy
的用法示例。
在下文中一共展示了Httpy.between方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_urls_user_albums
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls_user_albums(self):
if self.url.endswith('/all'):
# Images, not albums
return self.get_urls_user_images()
from Httpy import Httpy
httpy = Httpy()
user = self.url.split('//')[1].split('.')[0]
r = httpy.get(self.url)
result = []
for (index, cover) in enumerate(httpy.between(r, '<div class="cover">', '</div>')):
if not '<a href="' in cover: continue
album = httpy.between(cover, '<a href="', '"')[0]
if album.startswith('//'):
album = 'http:%s' % album
albumid = album.split('/')[4]
album = 'http://imgur.com/a/%s' % albumid
for image in self.get_urls_album(album):
# Tack this album's index/albumid to image
image['saveas'] = '%03d_%s_%s' % (index + 1, albumid, image['saveas'])
result.append(image)
sleep(2)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例2: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
already_got = []
while True:
for chunk in httpy.between(r, '<a class="thumb', '>'):
if not 'href="' in chunk: continue
link = httpy.between(chunk, 'href="', '"')[0]
if link in already_got:
continue
already_got.append(link)
# Get image from page
while len(self.threads) >= self.max_threads:
sleep(0.1)
self.threads.append(None)
t = Thread(target=self.get_url_from_page, args=(httpy, result, link,))
t.start()
# Go to next page
nexts = httpy.between(r, '<li class="next">', '</li>')
if len(nexts) == 0 or not 'href"' in nexts[0]:
break
next_page = httpy.between(nexts[0], 'href="', '"')[0]
if not 'offset=' in next_page:
break
r = httpy.get(next_page)
while len(self.threads) > 0:
sleep(0.1)
return result
示例3: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for post in httpy.between(r, 'daposts">', '</div> </div> </div>'):
images = httpy.between(post, 'href="', '"')
if len(images) > 0 and 'javascript:' not in images[0]:
result.append('http://www.chansluts.com%s' % images[0])
return result
示例4: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
chunks = httpy.between(r, '<article class="', '</article>')
if len(chunks) == 0:
raise Exception('unable to find "article class" at %s '% self.url)
r = chunks[0]
result = []
for link in httpy.between(r, '<a href="', '"'):
if link.startswith('//'):
link = 'http:%s' % link
link = link.replace(' ', '%20')
result.append(link)
return result
示例5: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for link in httpy.between(r, '/img.php?path=', '"'):
result.append(link)
return result
示例6: sanitize_url
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def sanitize_url(self):
if '/image.php?id=' in self.url:
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
if not 'View complete gallery: <a href="' in r:
raise Exception('no gallery found at %s' % self.url)
self.url = 'http://imagearn.com/%s' % httpy.between(r, 'View complete gallery: <a href="', '"')[0]
if not '/gallery.php?id=' in self.url:
raise Exception('expected /gallery.php?id= not found in URL')
示例7: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
url = self.url
result = []
while True:
r = httpy.get(url)
for chunk in httpy.between(r, '<a name="', '</li>'):
if not '<img src="' in chunk: continue
image = httpy.between(chunk, '<img src="', '"')[0]
image = image.replace('_stream', '_max')
if image.startswith('//'):
image = 'http:%s' % image
result.append(image)
if '<li class="next"><a href="' in r:
url = httpy.between(r, '<li class="next"><a href="', '"')[0]
else:
break
return result
示例8: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
result = []
page = 1
r = httpy.get(self.url)
while True:
for chunk in httpy.between(r, "class='slideTool'", 'Related Galleries'):
for link in httpy.between(chunk, "' src='", "'"):
link = link.replace('_160.', '_1000.').replace('http://p2.', 'http://up.')
result.append(link)
break
page += 1
next_page = self.url.replace('.html', '-%d.html' % page)
if next_page in r:
r = httpy.get(next_page)
else:
break
return result
示例9: sanitize_url
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def sanitize_url(self):
if '/image/' in self.url:
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
if not "class='gallery_title'><a href='" in r:
raise Exception('no gallery found at %s' % self.url)
self.url = httpy.between(r, "class='gallery_title'><a href='", "'")[0]
if not '/gallery/' in self.url:
raise Exception('expected /gallery/ not found in URL')
if not self.url.endswith('/'): self.url += '/'
示例10: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
r = r[r.find('showMoreGalleries'):] # To ignore user icon
links = httpy.between(r, 'border=0 src="', '"')
result = []
for link in links:
link = 'http://%s' % link[link.find('.')+1:].replace('/images/thumb/', '/images/full/')
result.append(link)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例11: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
'''
Returns list of URLs from album. Does not download them.
'''
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for link in httpy.between(r, '<img src="', '"'):
link = 'http://hostname.com%s' % link
result.append(link)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例12: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for link in httpy.between(r, 'data-cfsrc="', '"'):
if link.startswith('//'):
link = 'http:%s' % link
link = link.replace(' ', '%20')
if '-cu_' in link:
temp = link[:link.find('-cu_')]
temp = '%s-me.%s' % (temp, link.split('.')[-1])
link = temp
result.append(link)
return result
示例13: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for link in httpy.between(r, 'src="', '"'):
if not 'http://' in link: continue
if not 'imgur.com' in link: continue
doti = link.rfind('.')-1
if link[doti] == 'm':
link = link.replace(link[doti:], link[doti+1:])
result.append(link)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例14: get_urls_album_noscript
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls_album_noscript(url):
'''
Requires URL in the format: http://imgur.com/a/[albumid]
'''
from Httpy import Httpy
httpy = Httpy()
r = httpy.get('%s/noscript' % url)
result = []
for link in httpy.between(r, 'img src="//i.', '"'):
link = 'http://i.%s' % link
try:
link = self.get_highest_res(link)
except Exception, e:
# Image is gone.
# Add it anyway so RipManager will mark the image as 'errored'
pass
result.append(link)
示例15: get_urls_subreddit
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import between [as 别名]
def get_urls_subreddit(self):
from Httpy import Httpy
httpy = Httpy()
page = 0
result = []
while True:
r = httpy.get('%s/page/%d' % (self.url, page))
links = httpy.between(r, ' src="//i.', '"')
if len(links) == 0:
# Hit end of pages
return result
for link in links:
if link in result:
# Pages started repeating
return result
link = self.get_highest_res(link)
result.append(link)
page += 1