Python http.get_html函数代码示例

本文整理汇总了Python中util.http.get_html函数的典型用法代码示例。如果您正苦于以下问题：Python get_html函数的具体用法？Python get_html怎么用？Python get_html使用的例子？那么, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了get_html函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: snopes

def snopes(inp):
    ".snopes <topic> -- searches snopes for an urban legend about <topic>"

    search_page = http.get_html(search_url, sp_q=inp, sp_c="1")
    result_urls = search_page.xpath("//a[@target='_self']/@href")

    if not result_urls:
        return "no matching pages found"

    snopes_page = http.get_html(result_urls[0])
    snopes_text = snopes_page.text_content()

    claim = re.search(r"Claim: .*", snopes_text).group(0).strip()
    status = re.search(r"Status: .*", snopes_text)

    if status is not None:
        status = status.group(0).strip()
    else:  # new-style statuses
        status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED",
                                           snopes_text).group(0).title()

    claim = re.sub(r"[\s\xa0]+", " ", claim)   # compress whitespace
    status = re.sub(r"[\s\xa0]+", " ", status)

    return "%s %s %s" % (claim, status, result_urls[0])

开发者ID:APU-CSFC，项目名称:skybot，代码行数:25，代码来源:snopes.py

示例2: get_beer

def get_beer(inp):
    """ search beeradvocate.com """

    search_url = "http://beeradvocate.com/search"
    base_url = "http://beeradvocate.com"

    post_dict = {
        'q' : inp,
        'qt' : 'beer',
    }
    results = http.get_html(search_url, post_data=urlencode(post_dict))
    
    try:
        result = results.xpath("//div[@id='content']/div[@class='pageWidth']/div[@class='pageContent']/div[@class='mainContainer']/div[@class='mainContent']/fieldset/div[@id='baContent']/div[2]/ul/li[1]")[0]
    except IndexError:
        return "No Results"

    page_url = base_url + result.xpath('a')[0].get('href')
    scores = http.get_html(page_url).cssselect('.BAscore_big')
    beer_info = [x.text_content() for x in result.xpath('a')]

    return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % (beer_info[0], 
                                                                        beer_info[1],
                                                                        scores[0].text_content(), 
                                                                        scores[1].text_content(), page_url)

开发者ID:coaxmetal，项目名称:DL-Skybot-Plugins，代码行数:25，代码来源:beeradvocate.py

示例3: man

def man(inp, say=''):
    """.man <command> [section] - Returns man page for specified command, section defaults to 1 if not specified."""
    raw = inp.split()

    command = raw[0]
    if len(raw) == 2 and raw[1].isdigit():
        page = raw[1]
    else:
        page = "1"

    try:
        manpage = str(http.get_html(base_url, topic=command, section=page))
        # If not specified man page
        if re.match(r'.+(\>No matches for ").+', manpage):
            page = "all"
            manpage = str(http.get_html(base_url, topic=command, section=page))
        # If man page exists for command
        if not re.match(r'.+(\>No matches for ").+', manpage) and 1 == 2:
            if page != "all":
                say("{} - {}({})".format(web.try_googl(base_url.format(command, page)),
                    command, page))
            else:
                say("{} - {}({}) (No section {})".format((web.try_googl(base_url.format(command, page)),
                    command, page, raw[1])))
        else:
            system_manpage = get_system_manpage(command)
            if system_manpage:
                haste_url = web.haste(system_manpage, ext='txt')
                googl_url = web.try_googl(haste_url)
                say("{} - {}".format(googl_url, command, page))
            else:
                return "There is no man page for {}.".format(command)
    except Exception as e:  # (http.HTTPError, http.URLError) as e:
        print(">>> u'HTTP Error: {}'".format(e))
        return "HTTP Error, please try again in a few minutes."

开发者ID:Cameri，项目名称:Gary，代码行数:35，代码来源:man.py

示例4: timefunction2

def timefunction2(inp, nick="", reply=None, db=None, notice=None):
    "time [location] [dontsave] | [@ nick] -- Gets time for <location>."

    save = True
    
    if '@' in inp:
        nick = inp.split('@')[1].strip()
        location = database.get(db,'users','location','nick',nick)
        if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
    else:
        location = database.get(db,'users','location','nick',nick)
        if not inp:
            if not location:
                notice(time.__doc__)
                return
        else:
            # if not location: save = True
            if " dontsave" in inp: save = False
            location = inp.split()[0]

    # now, to get the actual time
    try:
        url = "https://time.is/%s" % location.replace(' ','+').replace(' save','')
        html = http.get_html(url)
        prefix = html.xpath("//div[@id='msgdiv']/h1/a/text()")[0].strip()
        curtime = html.xpath("//div[contains(@id,'twd')]/text()")[0].strip()
        ampm = html.xpath("//div[contains(@id,'twd')]/span/text()")[0].strip()
        date = html.xpath("//h2[contains(@id,'dd')]/text()")[0].strip()
    except IndexError:
        return "Could not get time for that location."

    if location and save: database.set(db,'users','location',location,'nick',nick)

    return u'Time in {} is \x02{} {}\x02 [{}]'.format(prefix, curtime, ampm.upper(), date)

开发者ID:Noclip21，项目名称:uguubot，代码行数:34，代码来源:time.py

示例5: forum_link

def forum_link(inp, bot=None):
    if 'sa_user' not in bot.config or \
       'sa_password' not in bot.config:
        return

    login(bot.config['sa_user'], bot.config['sa_password'])

    thread = http.get_html(showthread, threadid=inp.group(1), perpage='1', 
                           cookies=True)

    breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()')

    if not breadcrumbs:
        return

    thread_title = breadcrumbs[-1]
    forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2])

    poster = thread.xpath('//dt[@class="author"]/text()')[0]

    # 1 post per page => n_pages = n_posts
    num_posts = thread.xpath('//a[@title="last page"]/@href')

    if not num_posts:
        num_posts = 1
    else:
        num_posts = int(num_posts[0].rsplit('=', 1)[1])

    return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % (
            forum_title, thread_title, poster, num_posts, 
            's' if num_posts > 1 else '')

开发者ID:pvenegas，项目名称:skybot，代码行数:31，代码来源:somethingawful.py

示例6: mtg

def mtg(inp):
    """.mtg <name> - Gets information about Magic the Gathering card <name>."""

    url = 'http://magiccards.info/query?v=card&s=cname'
    h = http.get_html(url, q=inp)

    name = h.find('body/table/tr/td/span/a')
    if name is None:
        return "no cards found"
    card = name.getparent().getparent().getparent()

    type = card.find('td/p').text.replace('\n', '')

    # this is ugly
    text = http.html.tostring(card.xpath("//p[@class='ctext']/b")[0])
    text = text.replace('<br>', '$')
    text = http.html.fromstring(text).text_content()
    text = re.sub(r'(\w+\s*)\$+(\s*\w+)', r'\1. \2', text)
    text = text.replace('$', ' ')
    text = re.sub(r'\(.*?\)', '', text)  # strip parenthetical explanations
    text = re.sub(r'\.(\S)', r'. \1', text)  # fix spacing

    name.make_links_absolute(base_url=url)
    link = name.attrib['href']
    name = name.text_content().strip()
    type = type.strip()
    text = ' '.join(text.split())

    return (
        ' | '.join(
            (" ".join(name.split()),
             " ".join(type.split()),
             " ".join(text.split()),
             link))
    )

开发者ID:Cameri，项目名称:Gary，代码行数:35，代码来源:mtg.py

示例7: readtitle

def readtitle(match, say=None, nick=None):
    parsed_url = match.group().split(' ')[0]
    if any(word in parsed_url for word in skipurls):
        return
    try:
        request_url = http.get_html(parsed_url)
    except http.HTTPError as e:
        errors = {400: 'bad request (ratelimited?) 400',
                  401: 'unauthorized 401 ',
                  403: 'forbidden 403',
                  404: 'invalid user/id 404',
                  500: 'something is broken 500',
                  502: 'something is down ("getting upgraded?") 502',
                  503: 'something is overloaded 503',
                  410: 'something something 410'}
        if e.code == 404:
            return 'bad url?'
        if e.code in errors:
            return 'error: ' + errors[e.code]
        return 'error: unknown %s' % e.code

    try:
        titleget = request_url.xpath('//title/text()')[0]
        titleuni = " - " + unicode(titleget.strip())
    except IndexError:
        titleuni = ""

    shorturl = web.try_googl(parsed_url)

    say(shorturl + titleuni)

开发者ID:Cameri，项目名称:Gary，代码行数:30，代码来源:readtitle.py

示例8: fixchewy

def fixchewy(match,chan='',input=None):
  if match.group(1) is not None:
    if "Error" not in input.lastparam or "L: " in input.lastparam:
      return
    card = match.group(1)
  else:
    url = match.group(2)
    result = http.get_html(url)
    card = result.xpath('//title')[0].text.split("(")[0].strip()

  result = http.get_json("http://api.deckbrew.com/mtg/cards", name=card)
  if len(result) == 0:
    return
  for cards in result:
    if cards['name'].lower() == card.lower():
      card = cards
      break

  for edition in card['editions']:
    if edition['set_id'][:1] != "p" and 'price' in edition:
      price = edition['price']
      break
  if price:
    prices = "L: $%s M: $%s H: $%s" % ('{:.2f}'.format(price['low']/100.), '{:.2f}'.format(price['median']/100.), '{:.2f}'.format(price['high']/100.))
    return "chewy's bot sucks here are prices: %s" % prices

开发者ID:Veste，项目名称:skybot，代码行数:25，代码来源:mtg.py

示例9: define

def define(text):
    """define <word> -- Fetches definition of <word>.
    :type text: str
    """

    url = 'http://ninjawords.com/'

    h = http.get_html(url + http.quote_plus(text))

    definition = h.xpath('//dd[@class="article"] | '
                         '//div[@class="definition"] |'
                         '//div[@class="example"]')

    if not definition:
        return 'No results for ' + text + ' :('

    result = format_output(h, definition, True)
    if len(result) > 450:
        result = format_output(h, definition, False)

    if len(result) > 450:
        result = result[:result.rfind(' ', 0, 450)]
        result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...'

    return result

开发者ID:FurCode，项目名称:RoboCop2，代码行数:25，代码来源:dictionary.py

示例10: amazon

def amazon(inp):
    """az [query] -- Searches amazon for query"""
    href = "http://www.amazon.com/s/url=search-alias%3Daps&field-keywords={}".format(inp.replace(" ","%20"))
    results = http.get_html(href)
    # title = results.xpath('//title/text()')[0]
    try:
        title = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/h2/text()")[0]
        url = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/@href")[0]
        price = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/a/span/text()")[0]
        rating = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]
    except:
        title = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/h2/text()")[0]
        url = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/@href")[0]
        price = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/a/span/text()")[0]
        rating = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]

    azid = re.match(r'^.*\/dp\/([\w]+)\/.*',url).group(1)

    star_count = round(float(rating.split(' ')[0]),0)
    stars=""
    for x in xrange(0,int(star_count)):
        stars = "{}{}".format(stars,'★')
    for y in xrange(int(star_count),5):
        stars = "{}{}".format(stars,'☆')

    return '\x02{}\x02 - {} - \x034{}\x02 - http://amzn.com/{}'.format(title, stars, price, azid).decode('utf-8')

开发者ID:FrozenPigs，项目名称:uguubot，代码行数:26，代码来源:amazon.py

示例11: time

def time(inp, nick="", reply=None, db=None, notice=None):
    "time [location] [dontsave] | [@ nick] -- Gets time for <location>."

    save = True
    
    if '@' in inp:
        nick = inp.split('@')[1].strip()
        location = database.get(db,'users','location','nick',nick)
        if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
    else:
        location = database.get(db,'users','location','nick',nick)
        if not inp:
            if not location:
                notice(time.__doc__)
                return
        else:
            if not location: save = True
            if " save" in inp: save = True
            location = inp.split()[0]

    # now, to get the actual time
    try:
        url = "https://www.google.com/search?q=time+in+%s" % location.replace(' ','+').replace(' save','')
        html = http.get_html(url)
        prefix = html.xpath("//div[contains(@class,'vk_c vk_gy')]//span[@class='vk_gy vk_sh']/text()")[0].strip()
        curtime = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_bk vk_ans']/text()")[0].strip()
        day = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/text()")[0].strip()
        date = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/span/text()")[0].strip()
    except IndexError:
        return "Could not get time for that location."

    if location and save: database.set(db,'users','location',location,'nick',nick)

    return u'{} is \x02{}\x02 [{} {}]'.format(prefix, curtime, day, date)

开发者ID:edwinfinch，项目名称:uguubot，代码行数:34，代码来源:time.py

示例12: predb

def predb(inp):
    '.predb <query> -- searches scene releases using orlydb.com'

    try:
        h = http.get_html("http://orlydb.com/", q=inp)
    except HTTPError:
        return 'orlydb seems to be down'

    results = h.xpath("//div[@id='releases']/div/span[@class='release']/..")

    if not results:
        return "zero results"

    result = results[0]

    date, time = result.xpath("span[@class='timestamp']/text()")[0].split()
    section, = result.xpath("span[@class='section']//text()")
    name, = result.xpath("span[@class='release']/text()")

    size = result.xpath("span[@class='inforight']//text()")
    if size:
        size = ' :: ' + size[0].split()[0]
    else:
        size = ''

    return '%s - %s - %s%s' % (date, section, name, size)

开发者ID:APU-CSFC，项目名称:skybot，代码行数:26，代码来源:pre.py

示例13: ebay_url

def ebay_url(match,bot):
    apikey = bot.config.get("api_keys", {}).get("ebay")
    # if apikey:
    #     # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I)
    #     itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I)
    #     url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1))
    #     print url

    # else:
    print "No eBay api key set."
    item = http.get_html(match)
    title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
    price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
    if not price: price = item.xpath("//span[@id='prcIsum']/text()")
    if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
    if price: price = price[0].strip()
    else: price = '?'

    try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
    except: bids = "Buy It Now"

    feedback = item.xpath("//span[@class='w2b-head']/text()")
    if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()")
    if feedback: feedback = feedback[0].strip()
    else: feedback = '?'

    return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(title, price, bids, feedback))

开发者ID:Anonymike，项目名称:pasta-bot，代码行数:27，代码来源:urls.py

示例14: legal

def legal(inp):
    now = datetime.datetime.now()

    name = inp.replace(' ', '_')
    html = http.get_html('http://rottentomatoes.com/celebrity/%s/' % (name))
    date = html.xpath('//dl[@class="bottom_divider"]/dd/text()')[0]
    #return date

    info = date.split(' ')

    month = info[0]
    birth_day = info[1].strip(",")
    birth_year = info[2]

    birth_month = months[month]

    birthdate = datetime.date(int(birth_year), int(birth_month), int(birth_day))
    age = now.year - int(birth_year)

    if age >= 18:
        return "legal - is %s" % (age)
    else:
        year_18 = int(birth_year) + 18
        birthday_18 = "%s %s %s" % (birth_day, full_month[month], year_18) 
        #return birthday_18

        #return "%s :: %s" % (birth_month, str(day_18))
        return "%s will be 18 in %s" % (inp, timesince.timeuntil(birthdate, now=birthday_18))

    return months[birth_month]

开发者ID:limnick，项目名称:siri，代码行数:30，代码来源:legal.py

示例15: urban

def urban(inp):
  '''.u/.urban <phrase> [#] -- looks up <phrase> with [#] definition on urbandictionary.com'''

  args = inp.split(" ")

  # Look for a number to cycle through definitions, optionally
  if(len(args) > 1):
    try:
      int(args[-1])
      number = int(args.pop())
      index = number - 1
    except(ValueError):
      index = 0

  else:   
    index = 0

  args = " ".join(args)

  url = 'http://www.urbandictionary.com/define.php'
  page = http.get_html(url, term=args)
  words = page.xpath("//*[@id='entries']/div/span")
  defs = page.xpath("//div[@class='definition']")

  if not defs:
    return 'no definitions found'

  # Put together a string from the xpath requests.
  out = words[index].text.strip() + ': ' + ' '.join(
      defs[index].text.split())

  if len(out) > 400:
    out = out[:out.rfind(' ', 0, 400)] + '...'

  return out

开发者ID:NoiSek，项目名称:classhole，代码行数:35，代码来源:dictionary.py

注：本文中的util.http.get_html函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。