本文整理汇总了Python中sigil_bs4.BeautifulSoup.decodexml方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.decodexml方法的具体用法?Python BeautifulSoup.decodexml怎么用?Python BeautifulSoup.decodexml使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sigil_bs4.BeautifulSoup
的用法示例。
在下文中一共展示了BeautifulSoup.decodexml方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: performNCXSourceUpdates
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def performNCXSourceUpdates(data, currentdir, keylist, valuelist):
# rebuild serialized lookup dictionary
updates = {}
for i in range(0, len(keylist)):
updates[ keylist[i] ] = valuelist[i]
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags)
soup = BeautifulSoup(data, features=None, builder=xmlbuilder)
for tag in soup.find_all("content"):
if "src" in tag.attrs:
src = tag["src"]
if src.find(":") == -1:
parts = src.split('#')
url = parts[0]
fragment = ""
if len(parts) > 1:
fragment = parts[1]
bookrelpath = os.path.join(currentdir, unquoteurl(url))
bookrelpath = os.path.normpath(bookrelpath)
bookrelpath = bookrelpath.replace(os.sep, "/")
if bookrelpath in updates:
attribute_value = updates[bookrelpath]
if fragment != "":
attribute_value = attribute_value + "#" + fragment
attribute_value = quoteurl(attribute_value)
tag["src"] = attribute_value
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ")
return newdata
示例2: performPageMapUpdates
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def performPageMapUpdates(data, currentdir, keylist, valuelist):
data = _remove_xml_header(data)
# lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
data = data.encode('utf-8')
# rebuild serialized lookup dictionary of xml_updates properly adjusted
updates = {}
for i in range(0, len(keylist)):
updates[ keylist[i] ] = "../" + valuelist[i]
xml_empty_tags = ["page"]
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags)
soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
for tag in soup.find_all(["page"]):
for att in ["href"]:
if att in tag.attrs :
ref = tag[att]
if ref.find(":") == -1 :
parts = ref.split('#')
url = parts[0]
fragment = ""
if len(parts) > 1:
fragment = parts[1]
bookrelpath = os.path.join(currentdir, unquoteurl(url))
bookrelpath = os.path.normpath(bookrelpath)
bookrelpath = bookrelpath.replace(os.sep, "/")
if bookrelpath in updates:
attribute_value = updates[bookrelpath]
if fragment != "":
attribute_value = attribute_value + "#" + fragment
attribute_value = quoteurl(attribute_value)
tag[att] = attribute_value
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ")
return newdata
示例3: repairXML
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def repairXML(data, self_closing_tags=ebook_xml_empty_tags, indent_chars=" "):
data = _remove_xml_header(data)
# lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
data = data.encode('utf-8')
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=self_closing_tags)
soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars)
return newdata
示例4: repairXML
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def repairXML(data, mtype="", indent_chars=" "):
data = _remove_xml_header(data)
data = _make_it_sane(data)
voidtags = get_void_tags(mtype)
# lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
data = data.encode('utf-8')
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=voidtags)
soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars)
return newdata
示例5: repairXML
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def repairXML(data, mtype="", indent_chars=" "):
newdata = _remove_xml_header(data)
# if well-formed - don't mess with it
if _well_formed(newdata):
return data
newdata = _make_it_sane(newdata)
if not _well_formed(newdata):
newdata = _reformat(newdata)
if mtype == "application/oebps-package+xml":
newdata = newdata.decode('utf-8')
newdata = Opf_Parser(newdata).rebuild_opfxml()
# lxml requires utf-8 on Mac, won't work with unicode
if isinstance(newdata, str):
newdata = newdata.encode('utf-8')
voidtags = get_void_tags(mtype)
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=voidtags)
soup = BeautifulSoup(newdata, features=None, from_encoding="utf-8", builder=xmlbuilder)
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars)
return newdata
示例6: anchorNCXUpdates
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def anchorNCXUpdates(data, originating_filename, keylist, valuelist):
# rebuild serialized lookup dictionary
id_dict = {}
for i in range(0, len(keylist)):
id_dict[ keylist[i] ] = valuelist[i]
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags)
soup = BeautifulSoup(data, features=None, builder=xmlbuilder)
original_filename_with_relative_path = TEXT_FOLDER_NAME + "/" + originating_filename
for tag in soup.find_all("content"):
if "src" in tag.attrs:
src = tag["src"]
if src.find(":") == -1:
parts = src.split('#')
if (parts is not None) and (len(parts) > 1) and (parts[0] == original_filename_with_relative_path) and (parts[1] != ""):
fragment_id = parts[1]
if fragment_id in id_dict:
attribute_value = TEXT_FOLDER_NAME + "/" + quoteurl(id_dict[fragment_id]) + "#" + fragment_id
tag["src"] = attribute_value
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=" ")
return newdata
示例7: repairXML
# 需要导入模块: from sigil_bs4 import BeautifulSoup [as 别名]
# 或者: from sigil_bs4.BeautifulSoup import decodexml [as 别名]
def repairXML(data, self_closing_tags=ebook_xml_empty_tags, indent_chars=" "):
xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=self_closing_tags)
soup = BeautifulSoup(data, features=None, builder=xmlbuilder)
newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars=indent_chars)
return newdata