当前位置: 首页>>代码示例>>Python>>正文


Python BeautifulSoup.find_all方法代码示例

本文整理汇总了Python中sigil_bs4.BeautifulSoup.find_all方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.find_all方法的具体用法?Python BeautifulSoup.find_all怎么用?Python BeautifulSoup.find_all使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sigil_bs4.BeautifulSoup的用法示例。


在下文中一共展示了BeautifulSoup.find_all方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: performNCXSourceUpdates

# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import find_all [as 别名]
def performNCXSourceUpdates(data, currentdir, keylist, valuelist):
    # rebuild serialized lookup dictionary
    updates = {}
    for i in range(0, len(keylist)):
        updates[ keylist[i] ] = valuelist[i]
    xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags)
    soup = BeautifulSoup(data, features=None, builder=xmlbuilder)
    for tag in soup.find_all("content"):
        if "src" in tag.attrs:
            src = tag["src"]
            if src.find(":") == -1:
                parts = src.split('#')
                url = parts[0]
                fragment = ""
                if len(parts) > 1:
                    fragment = parts[1]
                bookrelpath = os.path.join(currentdir, unquoteurl(url))
                bookrelpath = os.path.normpath(bookrelpath)
                bookrelpath = bookrelpath.replace(os.sep, "/")
                if bookrelpath in updates:
                    attribute_value = updates[bookrelpath]
                    if fragment != "":
                        attribute_value = attribute_value + "#" + fragment
                    attribute_value = quoteurl(attribute_value)
                    tag["src"] = attribute_value
    newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars="  ")
    return newdata
开发者ID:uchuugaka,项目名称:Sigil,代码行数:29,代码来源:xmlprocessor.py

示例2: performPageMapUpdates

# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import find_all [as 别名]
def performPageMapUpdates(data, currentdir, keylist, valuelist):
    data = _remove_xml_header(data)
    # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
    data = data.encode('utf-8')
    # rebuild serialized lookup dictionary of xml_updates properly adjusted
    updates = {}
    for i in range(0, len(keylist)):
        updates[ keylist[i] ] = "../" + valuelist[i]
    xml_empty_tags = ["page"]
    xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags)
    soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
    for tag in soup.find_all(["page"]):
        for att in ["href"]:
            if att in tag.attrs :
                ref = tag[att]
                if ref.find(":") == -1 :
                    parts = ref.split('#')
                    url = parts[0]
                    fragment = ""
                    if len(parts) > 1:
                        fragment = parts[1]
                    bookrelpath = os.path.join(currentdir, unquoteurl(url))
                    bookrelpath = os.path.normpath(bookrelpath)
                    bookrelpath = bookrelpath.replace(os.sep, "/")
                    if bookrelpath in updates:
                        attribute_value = updates[bookrelpath]
                        if fragment != "":
                            attribute_value = attribute_value + "#" + fragment
                        attribute_value = quoteurl(attribute_value)
                        tag[att] = attribute_value
    newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars="  ")
    return newdata
开发者ID:hobo71,项目名称:Sigil,代码行数:34,代码来源:xmlprocessor.py

示例3: anchorNCXUpdates

# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import find_all [as 别名]
def anchorNCXUpdates(data, originating_filename, keylist, valuelist):
    # rebuild serialized lookup dictionary
    id_dict = {}
    for i in range(0, len(keylist)):
        id_dict[ keylist[i] ] = valuelist[i]
    xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags)
    soup = BeautifulSoup(data, features=None, builder=xmlbuilder)
    original_filename_with_relative_path = TEXT_FOLDER_NAME  + "/" + originating_filename
    for tag in soup.find_all("content"):
        if "src" in tag.attrs:
            src = tag["src"]
            if src.find(":") == -1:
                parts = src.split('#')
                if (parts is not None) and (len(parts) > 1) and (parts[0] == original_filename_with_relative_path) and (parts[1] != ""):
                    fragment_id = parts[1]
                    if fragment_id in id_dict:
                        attribute_value = TEXT_FOLDER_NAME + "/" + quoteurl(id_dict[fragment_id]) + "#" + fragment_id
                        tag["src"] = attribute_value
    newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars="  ")
    return newdata
开发者ID:uchuugaka,项目名称:Sigil,代码行数:22,代码来源:xmlprocessor.py


注:本文中的sigil_bs4.BeautifulSoup.find_all方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。