本文整理汇总了Python中Httpy.Httpy.get方法的典型用法代码示例。如果您正苦于以下问题:Python Httpy.get方法的具体用法?Python Httpy.get怎么用?Python Httpy.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Httpy.Httpy
的用法示例。
在下文中一共展示了Httpy.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
already_got = []
while True:
for chunk in httpy.between(r, '<a class="thumb', '>'):
if not 'href="' in chunk: continue
link = httpy.between(chunk, 'href="', '"')[0]
if link in already_got:
continue
already_got.append(link)
# Get image from page
while len(self.threads) >= self.max_threads:
sleep(0.1)
self.threads.append(None)
t = Thread(target=self.get_url_from_page, args=(httpy, result, link,))
t.start()
# Go to next page
nexts = httpy.between(r, '<li class="next">', '</li>')
if len(nexts) == 0 or not 'href"' in nexts[0]:
break
next_page = httpy.between(nexts[0], 'href="', '"')[0]
if not 'offset=' in next_page:
break
r = httpy.get(next_page)
while len(self.threads) > 0:
sleep(0.1)
return result
示例2: get_urls_user_albums
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls_user_albums(self):
if self.url.endswith('/all'):
# Images, not albums
return self.get_urls_user_images()
from Httpy import Httpy
httpy = Httpy()
user = self.url.split('//')[1].split('.')[0]
r = httpy.get(self.url)
result = []
for (index, cover) in enumerate(httpy.between(r, '<div class="cover">', '</div>')):
if not '<a href="' in cover: continue
album = httpy.between(cover, '<a href="', '"')[0]
if album.startswith('//'):
album = 'http:%s' % album
albumid = album.split('/')[4]
album = 'http://imgur.com/a/%s' % albumid
for image in self.get_urls_album(album):
# Tack this album's index/albumid to image
image['saveas'] = '%03d_%s_%s' % (index + 1, albumid, image['saveas'])
result.append(image)
sleep(2)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例3: test
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def test():
'''
Test that ripper is working as expected.
Raise exception if necessary.
'''
from Httpy import Httpy
httpy = Httpy()
# Check we can hit the host
url = 'http://imgur.com'
r = httpy.get(url)
if len(r.strip()) == 0:
raise Exception('unable to retrieve data from %s' % url)
# Check ripper gets all images in an album
#url = 'http://markedone911.imgur.com/'
#url = 'http://imgur.com/r/nsfw_oc/top/all'
url = SiteImgur.get_sample_url()
s = SiteImgur(url)
urls = s.get_urls()
for (i,u) in enumerate(urls):
print i, u
expected = 4
if len(urls) < expected:
return 'expected at least %d images, got %d. url: %s' % (expected, len(urls), url)
return None
示例4: test
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def test():
'''
Test that ripper is working as expected.
StatusManager.py uses the results of this method to show what rippers are working/broken on the main page
Returns:
None - if ripper is working as expected
str - Warning message if the ripper may not be working properly.
Raises:
Exception - if ripper is definitely broken. Exception message is used to display on site.
'''
from Httpy import Httpy
httpy = Httpy()
# Check we can hit the host
url = 'http://hostname.com'
r = httpy.get(url)
if len(r.strip()) == 0:
# Raise exception because the site is *very* broken, definitely can't rip from it if we can't hit the home page.
raise Exception('unable to retrieve data from %s' % url)
# Check ripper gets all images in an album
url = _SampleSite.get_sample_url()
s = _SampleSite(url)
urls = s.get_urls()
expected = 10
if len(urls) < expected:
# Returning non-None string since this may be a transient error.
# Maybe the album was deleted but the ripper is working as expected.
return 'expected at least %d images, got %d. url: %s' % (expected, len(urls), url)
# Returning None because the ripper is working as expected. No issues found.
return None
示例5: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
self.api_key = self.db.get_config('tumblr_key')
if self.api_key == None:
raise Exception('unable to rip album (%s), tumblr key not found in database' % self.url)
from Httpy import Httpy
httpy = Httpy()
result = []
offset = 0
while True:
url = self.get_api_url(offset=offset)
r = httpy.get(url)
json = loads(r)
if not 'response' in json or not 'posts' in json['response']:
#raise Exception('no posts found at %s' % self.url)
break
posts = json['response']['posts']
if len(posts) == 0: break
for post in posts:
for photos in post['photos']:
result.append(photos['original_size']['url'])
if self.post_type == 'post': break
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
offset += 20
sleep(1)
return result
示例6: get_image_count_for_album
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_image_count_for_album(url):
url = url.replace('m.imgur.com', 'imgur.com').replace('https://', '').replace('http://', '')
aid = url.split('/')[2]
url = 'http://imgur.com/a/%s/noscript' % aid
httpy = Httpy()
r = httpy.get(url)
return r.count('src="//i.imgur.com')
示例7: test
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def test():
from Httpy import Httpy
httpy = Httpy()
try:
r = httpy.get('http://www.vimeo.com/')
if len(r.strip()) == 0:
raise Exception('empty response from vimeo.com')
except Exception, e:
raise e
示例8: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for link in httpy.between(r, '/img.php?path=', '"'):
result.append(link)
return result
示例9: sanitize_url
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def sanitize_url(self):
if '/image.php?id=' in self.url:
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
if not 'View complete gallery: <a href="' in r:
raise Exception('no gallery found at %s' % self.url)
self.url = 'http://imagearn.com/%s' % httpy.between(r, 'View complete gallery: <a href="', '"')[0]
if not '/gallery.php?id=' in self.url:
raise Exception('expected /gallery.php?id= not found in URL')
示例10: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
result = []
page = 1
r = httpy.get(self.url)
while True:
for chunk in httpy.between(r, "class='slideTool'", 'Related Galleries'):
for link in httpy.between(chunk, "' src='", "'"):
link = link.replace('_160.', '_1000.').replace('http://p2.', 'http://up.')
result.append(link)
break
page += 1
next_page = self.url.replace('.html', '-%d.html' % page)
if next_page in r:
r = httpy.get(next_page)
else:
break
return result
示例11: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
result = []
for post in httpy.between(r, 'daposts">', '</div> </div> </div>'):
images = httpy.between(post, 'href="', '"')
if len(images) > 0 and 'javascript:' not in images[0]:
result.append('http://www.chansluts.com%s' % images[0])
return result
示例12: sanitize_url
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def sanitize_url(self):
if '/image/' in self.url:
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
if not "class='gallery_title'><a href='" in r:
raise Exception('no gallery found at %s' % self.url)
self.url = httpy.between(r, "class='gallery_title'><a href='", "'")[0]
if not '/gallery/' in self.url:
raise Exception('expected /gallery/ not found in URL')
if not self.url.endswith('/'): self.url += '/'
示例13: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
fields = self.url.split('/')
url = 'http://api.4chan.org/%s/res/%s.json' % (fields[3], fields[5])
try:
r = httpy.get(url)
json = loads(r)
posts = json['posts']
except Exception, e:
raise Exception('failed to load %s: %s' % (url, str(e)))
示例14: get_urls
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def get_urls(self):
from Httpy import Httpy
httpy = Httpy()
r = httpy.get(self.url)
r = r[r.find('showMoreGalleries'):] # To ignore user icon
links = httpy.between(r, 'border=0 src="', '"')
result = []
for link in links:
link = 'http://%s' % link[link.find('.')+1:].replace('/images/thumb/', '/images/full/')
result.append(link)
if len(result) > SiteBase.MAX_IMAGES_PER_RIP:
break
return result
示例15: test
# 需要导入模块: from Httpy import Httpy [as 别名]
# 或者: from Httpy.Httpy import get [as 别名]
def test():
'''
Test that ripper is working as expected.
Raise exception if necessary.
'''
from Httpy import Httpy
httpy = Httpy()
# Check we can hit the host
url = 'http://boards.4chan.org/b/'
r = httpy.get(url)
if len(r.strip()) == 0:
raise Exception('unable to retrieve data from %s' % url)
# Check ripper gets images from a random album
try:
# Get first thread on /r/
url = 'http://api.4chan.org/r/1.json'
threads = httpy.get(url)
json = loads(threads)
thread = json['threads'][0]['posts'][0]
number = thread['no']
except Exception, e:
raise Exception('failed to load %s: %s' % (url, str(e)))