當前位置: 首頁>>代碼示例>>Python>>正文


Python html.parse方法代碼示例

本文整理匯總了Python中lxml.html.parse方法的典型用法代碼示例。如果您正苦於以下問題:Python html.parse方法的具體用法?Python html.parse怎麽用?Python html.parse使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在lxml.html的用法示例。


在下文中一共展示了html.parse方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _parse_tables

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def _parse_tables(self, doc, match, attrs):
        """
        Return all tables from the parsed DOM.

        Parameters
        ----------
        doc : the DOM from which to parse the table element.

        match : str or regular expression
            The text to search for in the DOM tree.

        attrs : dict
            A dictionary of table attributes that can be used to disambiguate
            multiple tables on a page.

        Raises
        ------
        ValueError : `match` does not match any text in the document.

        Returns
        -------
        list of node-like
            HTML <table> elements to be parsed into raw data.
        """
        raise AbstractMethodError(self) 
開發者ID:Frank-qlu,項目名稱:recruit,代碼行數:27,代碼來源:html.py

示例2: _build_doc

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def _build_doc(self):
        """
        Return a tree-like object that can be used to iterate over the DOM.

        Returns
        -------
        node-like
            The DOM from which to parse the table element.
        """
        raise AbstractMethodError(self) 
開發者ID:Frank-qlu,項目名稱:recruit,代碼行數:12,代碼來源:html.py

示例3: _parse

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
    flavor = _validate_flavor(flavor)
    compiled_match = re.compile(match)  # you can pass a compiled regex here

    # hack around python 3 deleting the exception variable
    retained = None
    for flav in flavor:
        parser = _parser_dispatch(flav)
        p = parser(io, compiled_match, attrs, encoding, displayed_only)

        try:
            tables = p.parse_tables()
        except Exception as caught:
            # if `io` is an io-like object, check if it's seekable
            # and try to rewind it before trying the next parser
            if hasattr(io, 'seekable') and io.seekable():
                io.seek(0)
            elif hasattr(io, 'seekable') and not io.seekable():
                # if we couldn't rewind it, let the user know
                raise ValueError('The flavor {} failed to parse your input. '
                                 'Since you passed a non-rewindable file '
                                 'object, we can\'t rewind it to try '
                                 'another parser. Try read_html() with a '
                                 'different flavor.'.format(flav))

            retained = caught
        else:
            break
    else:
        raise_with_traceback(retained)

    ret = []
    for table in tables:
        try:
            ret.append(_data_to_frame(data=table, **kwargs))
        except EmptyDataError:  # empty table
            continue
    return ret 
開發者ID:Frank-qlu,項目名稱:recruit,代碼行數:40,代碼來源:html.py

示例4: fetch_through_redirects

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def fetch_through_redirects(url):
    tree = None
    while True:
        cont = False
        resp = requests.get(
            url,
            verify=certifi.where(),
            headers={"User-Agent": USER_AGENT},
            timeout=10,
            stream=True,
        )
        try:
            if resp.status_code != 200:
                raise Not200(resp.status_code)
            # Convince urllib3 to decode gzipped pages.
            resp.raw.decode_content = True
            tree = html.parse(resp.raw)
        finally:
            resp.close()
        # Check for sneaky <meta> redirects.
        for meta in META_XPATH(tree):
            m = re.match(r"0;\s*url=['\"](.+?)['\"]", meta.get("content"))
            if m is not None:
                url = m.groups()[0]
                cont = True
                break
        if not cont:
            break
    return resp, tree 
開發者ID:benjaminp,項目名稱:httpswatch,代碼行數:31,代碼來源:check_https.py

示例5: main

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def main():
    with open("mangalist.csv", "w") as f:
        tree = parse("http://www.mangapanda.com/alphabetical")
        manga_name_list = tree.xpath("//ul[@class='series_alpha']/li/a/text()")
        manga_url_list = tree.xpath("//ul[@class='series_alpha']/li/a/@href")
        f.write("\"Manga Name\", URL\n")

        for i in range(len(manga_name_list)):
            f.write("\"{0}\", http://www.mangapanda.com{1}\n".format(manga_name_list[i].replace("\"", ""), manga_url_list[i])) 
開發者ID:AnimeshShaw,項目名稱:MangaScrapper,代碼行數:11,代碼來源:MangaList.py

示例6: _parse_tables

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def _parse_tables(self, doc, match, attrs):
        """Return all tables from the parsed DOM.

        Parameters
        ----------
        doc : tree-like
            The DOM from which to parse the table element.

        match : str or regular expression
            The text to search for in the DOM tree.

        attrs : dict
            A dictionary of table attributes that can be used to disambiguate
            multiple tables on a page.

        Raises
        ------
        ValueError
            * If `match` does not match any text in the document.

        Returns
        -------
        tables : list of node-like
            A list of <table> elements to be parsed into raw data.
        """
        raise com.AbstractMethodError(self) 
開發者ID:birforce,項目名稱:vnpy_crypto,代碼行數:28,代碼來源:html.py

示例7: _parse_tables

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def _parse_tables(self, doc, match, attrs):
        """Return all tables from the parsed DOM.

        Parameters
        ----------
        doc : tree-like
            The DOM from which to parse the table element.

        match : str or regular expression
            The text to search for in the DOM tree.

        attrs : dict
            A dictionary of table attributes that can be used to disambiguate
            mutliple tables on a page.

        Raises
        ------
        ValueError
            * If `match` does not match any text in the document.

        Returns
        -------
        tables : list of node-like
            A list of <table> elements to be parsed into raw data.
        """
        raise NotImplementedError 
開發者ID:ktraunmueller,項目名稱:Computable,代碼行數:28,代碼來源:html.py

示例8: get_lxml_elements

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def get_lxml_elements(url, element):
    _skip_if_no('lxml')
    from lxml.html import parse
    doc = parse(url)
    return doc.xpath('.//{0}'.format(element)) 
開發者ID:ktraunmueller,項目名稱:Computable,代碼行數:7,代碼來源:test_html.py

示例9: parse_rss

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed 
開發者ID:nerevu,項目名稱:riko,代碼行數:16,代碼來源:parsers.py

示例10: xml2etree

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def xml2etree(f, xml=True, html5=False):
    if xml:
        element_tree = etree.parse(f)
    elif html5 and html5parser:
        element_tree = html5parser.parse(f)
    elif html5parser:
        element_tree = html.parse(f)
    else:
        # html5lib's parser returns an Element, so we must convert it into an
        # ElementTree
        element_tree = ElementTree(html.parse(f))

    return element_tree 
開發者ID:nerevu,項目名稱:riko,代碼行數:15,代碼來源:parsers.py

示例11: grab_trending_gif_urls

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def grab_trending_gif_urls():
    doc = parse("http://giphy.com").getroot()
    els = doc.cssselect(".gif-link img")[:10]
    ret = []
    for el in els:
        ret.append("http:" +re.sub(r"\/([^./])*\.gif", "/giphy.gif", el.attrib['src']))
    return ret 
開發者ID:agermanidis,項目名稱:SnapchatBot,代碼行數:9,代碼來源:gifbot.py

示例12: parse_html

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def parse_html(fileobj, encoding):
    """
    Given a file object *fileobj*, get an ElementTree instance.
    The *encoding* is assumed to be utf8.
    """
    parser = HTMLParser(encoding=encoding, remove_blank_text=True)
    return parse(fileobj, parser) 
開發者ID:datalib,項目名稱:libextract,代碼行數:9,代碼來源:core.py

示例13: extract

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def extract(response):
    tree = parse(response)
    return tree.xpath('//h1/text()') 
開發者ID:eBay,項目名稱:wextracto,代碼行數:5,代碼來源:tutorial.py

示例14: extract

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def extract(response):
    tree = parse(response)
    yield "name", text(tree.xpath('//h1'))
    yield "country", text(tree.xpath('//dd[@id="country"]'))
    yield "region", text(tree.xpath('//dd[@id="region"]')) 
開發者ID:eBay,項目名稱:wextracto,代碼行數:7,代碼來源:tutorial.py

示例15: extract

# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import parse [as 別名]
def extract(response):
    tree = parse(response)
    return text(tree.xpath('//h1/text()')) 
開發者ID:eBay,項目名稱:wextracto,代碼行數:5,代碼來源:tutorial.py


注:本文中的lxml.html.parse方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。