本文整理汇总了Python中util.http.get_html函数的典型用法代码示例。如果您正苦于以下问题:Python get_html函数的具体用法?Python get_html怎么用?Python get_html使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_html函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: snopes
def snopes(inp):
".snopes <topic> -- searches snopes for an urban legend about <topic>"
search_page = http.get_html(search_url, sp_q=inp, sp_c="1")
result_urls = search_page.xpath("//a[@target='_self']/@href")
if not result_urls:
return "no matching pages found"
snopes_page = http.get_html(result_urls[0])
snopes_text = snopes_page.text_content()
claim = re.search(r"Claim: .*", snopes_text).group(0).strip()
status = re.search(r"Status: .*", snopes_text)
if status is not None:
status = status.group(0).strip()
else: # new-style statuses
status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED",
snopes_text).group(0).title()
claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace
status = re.sub(r"[\s\xa0]+", " ", status)
return "%s %s %s" % (claim, status, result_urls[0])
示例2: get_beer
def get_beer(inp):
""" search beeradvocate.com """
search_url = "http://beeradvocate.com/search"
base_url = "http://beeradvocate.com"
post_dict = {
'q' : inp,
'qt' : 'beer',
}
results = http.get_html(search_url, post_data=urlencode(post_dict))
try:
result = results.xpath("//div[@id='content']/div[@class='pageWidth']/div[@class='pageContent']/div[@class='mainContainer']/div[@class='mainContent']/fieldset/div[@id='baContent']/div[2]/ul/li[1]")[0]
except IndexError:
return "No Results"
page_url = base_url + result.xpath('a')[0].get('href')
scores = http.get_html(page_url).cssselect('.BAscore_big')
beer_info = [x.text_content() for x in result.xpath('a')]
return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % (beer_info[0],
beer_info[1],
scores[0].text_content(),
scores[1].text_content(), page_url)
示例3: man
def man(inp, say=''):
""".man <command> [section] - Returns man page for specified command, section defaults to 1 if not specified."""
raw = inp.split()
command = raw[0]
if len(raw) == 2 and raw[1].isdigit():
page = raw[1]
else:
page = "1"
try:
manpage = str(http.get_html(base_url, topic=command, section=page))
# If not specified man page
if re.match(r'.+(\>No matches for ").+', manpage):
page = "all"
manpage = str(http.get_html(base_url, topic=command, section=page))
# If man page exists for command
if not re.match(r'.+(\>No matches for ").+', manpage) and 1 == 2:
if page != "all":
say("{} - {}({})".format(web.try_googl(base_url.format(command, page)),
command, page))
else:
say("{} - {}({}) (No section {})".format((web.try_googl(base_url.format(command, page)),
command, page, raw[1])))
else:
system_manpage = get_system_manpage(command)
if system_manpage:
haste_url = web.haste(system_manpage, ext='txt')
googl_url = web.try_googl(haste_url)
say("{} - {}".format(googl_url, command, page))
else:
return "There is no man page for {}.".format(command)
except Exception as e: # (http.HTTPError, http.URLError) as e:
print(">>> u'HTTP Error: {}'".format(e))
return "HTTP Error, please try again in a few minutes."
示例4: timefunction2
def timefunction2(inp, nick="", reply=None, db=None, notice=None):
"time [location] [dontsave] | [@ nick] -- Gets time for <location>."
save = True
if '@' in inp:
nick = inp.split('@')[1].strip()
location = database.get(db,'users','location','nick',nick)
if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
else:
location = database.get(db,'users','location','nick',nick)
if not inp:
if not location:
notice(time.__doc__)
return
else:
# if not location: save = True
if " dontsave" in inp: save = False
location = inp.split()[0]
# now, to get the actual time
try:
url = "https://time.is/%s" % location.replace(' ','+').replace(' save','')
html = http.get_html(url)
prefix = html.xpath("//div[@id='msgdiv']/h1/a/text()")[0].strip()
curtime = html.xpath("//div[contains(@id,'twd')]/text()")[0].strip()
ampm = html.xpath("//div[contains(@id,'twd')]/span/text()")[0].strip()
date = html.xpath("//h2[contains(@id,'dd')]/text()")[0].strip()
except IndexError:
return "Could not get time for that location."
if location and save: database.set(db,'users','location',location,'nick',nick)
return u'Time in {} is \x02{} {}\x02 [{}]'.format(prefix, curtime, ampm.upper(), date)
示例5: forum_link
def forum_link(inp, bot=None):
if 'sa_user' not in bot.config or \
'sa_password' not in bot.config:
return
login(bot.config['sa_user'], bot.config['sa_password'])
thread = http.get_html(showthread, threadid=inp.group(1), perpage='1',
cookies=True)
breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()')
if not breadcrumbs:
return
thread_title = breadcrumbs[-1]
forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2])
poster = thread.xpath('//dt[@class="author"]/text()')[0]
# 1 post per page => n_pages = n_posts
num_posts = thread.xpath('//a[@title="last page"]/@href')
if not num_posts:
num_posts = 1
else:
num_posts = int(num_posts[0].rsplit('=', 1)[1])
return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % (
forum_title, thread_title, poster, num_posts,
's' if num_posts > 1 else '')
示例6: mtg
def mtg(inp):
""".mtg <name> - Gets information about Magic the Gathering card <name>."""
url = 'http://magiccards.info/query?v=card&s=cname'
h = http.get_html(url, q=inp)
name = h.find('body/table/tr/td/span/a')
if name is None:
return "no cards found"
card = name.getparent().getparent().getparent()
type = card.find('td/p').text.replace('\n', '')
# this is ugly
text = http.html.tostring(card.xpath("//p[@class='ctext']/b")[0])
text = text.replace('<br>', '$')
text = http.html.fromstring(text).text_content()
text = re.sub(r'(\w+\s*)\$+(\s*\w+)', r'\1. \2', text)
text = text.replace('$', ' ')
text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations
text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing
name.make_links_absolute(base_url=url)
link = name.attrib['href']
name = name.text_content().strip()
type = type.strip()
text = ' '.join(text.split())
return (
' | '.join(
(" ".join(name.split()),
" ".join(type.split()),
" ".join(text.split()),
link))
)
示例7: readtitle
def readtitle(match, say=None, nick=None):
parsed_url = match.group().split(' ')[0]
if any(word in parsed_url for word in skipurls):
return
try:
request_url = http.get_html(parsed_url)
except http.HTTPError as e:
errors = {400: 'bad request (ratelimited?) 400',
401: 'unauthorized 401 ',
403: 'forbidden 403',
404: 'invalid user/id 404',
500: 'something is broken 500',
502: 'something is down ("getting upgraded?") 502',
503: 'something is overloaded 503',
410: 'something something 410'}
if e.code == 404:
return 'bad url?'
if e.code in errors:
return 'error: ' + errors[e.code]
return 'error: unknown %s' % e.code
try:
titleget = request_url.xpath('//title/text()')[0]
titleuni = " - " + unicode(titleget.strip())
except IndexError:
titleuni = ""
shorturl = web.try_googl(parsed_url)
say(shorturl + titleuni)
示例8: fixchewy
def fixchewy(match,chan='',input=None):
if match.group(1) is not None:
if "Error" not in input.lastparam or "L: " in input.lastparam:
return
card = match.group(1)
else:
url = match.group(2)
result = http.get_html(url)
card = result.xpath('//title')[0].text.split("(")[0].strip()
result = http.get_json("http://api.deckbrew.com/mtg/cards", name=card)
if len(result) == 0:
return
for cards in result:
if cards['name'].lower() == card.lower():
card = cards
break
for edition in card['editions']:
if edition['set_id'][:1] != "p" and 'price' in edition:
price = edition['price']
break
if price:
prices = "L: $%s M: $%s H: $%s" % ('{:.2f}'.format(price['low']/100.), '{:.2f}'.format(price['median']/100.), '{:.2f}'.format(price['high']/100.))
return "chewy's bot sucks here are prices: %s" % prices
示例9: define
def define(text):
"""define <word> -- Fetches definition of <word>.
:type text: str
"""
url = 'http://ninjawords.com/'
h = http.get_html(url + http.quote_plus(text))
definition = h.xpath('//dd[@class="article"] | '
'//div[@class="definition"] |'
'//div[@class="example"]')
if not definition:
return 'No results for ' + text + ' :('
result = format_output(h, definition, True)
if len(result) > 450:
result = format_output(h, definition, False)
if len(result) > 450:
result = result[:result.rfind(' ', 0, 450)]
result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...'
return result
示例10: amazon
def amazon(inp):
"""az [query] -- Searches amazon for query"""
href = "http://www.amazon.com/s/url=search-alias%3Daps&field-keywords={}".format(inp.replace(" ","%20"))
results = http.get_html(href)
# title = results.xpath('//title/text()')[0]
try:
title = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/h2/text()")[0]
url = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/@href")[0]
price = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/a/span/text()")[0]
rating = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]
except:
title = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/h2/text()")[0]
url = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/@href")[0]
price = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/a/span/text()")[0]
rating = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]
azid = re.match(r'^.*\/dp\/([\w]+)\/.*',url).group(1)
star_count = round(float(rating.split(' ')[0]),0)
stars=""
for x in xrange(0,int(star_count)):
stars = "{}{}".format(stars,'★')
for y in xrange(int(star_count),5):
stars = "{}{}".format(stars,'☆')
return '\x02{}\x02 - {} - \x034{}\x02 - http://amzn.com/{}'.format(title, stars, price, azid).decode('utf-8')
示例11: time
def time(inp, nick="", reply=None, db=None, notice=None):
"time [location] [dontsave] | [@ nick] -- Gets time for <location>."
save = True
if '@' in inp:
nick = inp.split('@')[1].strip()
location = database.get(db,'users','location','nick',nick)
if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
else:
location = database.get(db,'users','location','nick',nick)
if not inp:
if not location:
notice(time.__doc__)
return
else:
if not location: save = True
if " save" in inp: save = True
location = inp.split()[0]
# now, to get the actual time
try:
url = "https://www.google.com/search?q=time+in+%s" % location.replace(' ','+').replace(' save','')
html = http.get_html(url)
prefix = html.xpath("//div[contains(@class,'vk_c vk_gy')]//span[@class='vk_gy vk_sh']/text()")[0].strip()
curtime = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_bk vk_ans']/text()")[0].strip()
day = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/text()")[0].strip()
date = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/span/text()")[0].strip()
except IndexError:
return "Could not get time for that location."
if location and save: database.set(db,'users','location',location,'nick',nick)
return u'{} is \x02{}\x02 [{} {}]'.format(prefix, curtime, day, date)
示例12: predb
def predb(inp):
'.predb <query> -- searches scene releases using orlydb.com'
try:
h = http.get_html("http://orlydb.com/", q=inp)
except HTTPError:
return 'orlydb seems to be down'
results = h.xpath("//div[@id='releases']/div/span[@class='release']/..")
if not results:
return "zero results"
result = results[0]
date, time = result.xpath("span[@class='timestamp']/text()")[0].split()
section, = result.xpath("span[@class='section']//text()")
name, = result.xpath("span[@class='release']/text()")
size = result.xpath("span[@class='inforight']//text()")
if size:
size = ' :: ' + size[0].split()[0]
else:
size = ''
return '%s - %s - %s%s' % (date, section, name, size)
示例13: ebay_url
def ebay_url(match,bot):
apikey = bot.config.get("api_keys", {}).get("ebay")
# if apikey:
# # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I)
# itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I)
# url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1))
# print url
# else:
print "No eBay api key set."
item = http.get_html(match)
title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
if not price: price = item.xpath("//span[@id='prcIsum']/text()")
if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
if price: price = price[0].strip()
else: price = '?'
try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
except: bids = "Buy It Now"
feedback = item.xpath("//span[@class='w2b-head']/text()")
if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()")
if feedback: feedback = feedback[0].strip()
else: feedback = '?'
return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(title, price, bids, feedback))
示例14: legal
def legal(inp):
now = datetime.datetime.now()
name = inp.replace(' ', '_')
html = http.get_html('http://rottentomatoes.com/celebrity/%s/' % (name))
date = html.xpath('//dl[@class="bottom_divider"]/dd/text()')[0]
#return date
info = date.split(' ')
month = info[0]
birth_day = info[1].strip(",")
birth_year = info[2]
birth_month = months[month]
birthdate = datetime.date(int(birth_year), int(birth_month), int(birth_day))
age = now.year - int(birth_year)
if age >= 18:
return "legal - is %s" % (age)
else:
year_18 = int(birth_year) + 18
birthday_18 = "%s %s %s" % (birth_day, full_month[month], year_18)
#return birthday_18
#return "%s :: %s" % (birth_month, str(day_18))
return "%s will be 18 in %s" % (inp, timesince.timeuntil(birthdate, now=birthday_18))
return months[birth_month]
示例15: urban
def urban(inp):
'''.u/.urban <phrase> [#] -- looks up <phrase> with [#] definition on urbandictionary.com'''
args = inp.split(" ")
# Look for a number to cycle through definitions, optionally
if(len(args) > 1):
try:
int(args[-1])
number = int(args.pop())
index = number - 1
except(ValueError):
index = 0
else:
index = 0
args = " ".join(args)
url = 'http://www.urbandictionary.com/define.php'
page = http.get_html(url, term=args)
words = page.xpath("//*[@id='entries']/div/span")
defs = page.xpath("//div[@class='definition']")
if not defs:
return 'no definitions found'
# Put together a string from the xpath requests.
out = words[index].text.strip() + ': ' + ' '.join(
defs[index].text.split())
if len(out) > 400:
out = out[:out.rfind(' ', 0, 400)] + '...'
return out