本文整理汇总了Python中bs4.element.NavigableString方法的典型用法代码示例。如果您正苦于以下问题:Python element.NavigableString方法的具体用法?Python element.NavigableString怎么用?Python element.NavigableString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.element
的用法示例。
在下文中一共展示了element.NavigableString方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: replace_code_tags
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def replace_code_tags(self, soup: BeautifulSoup) -> None:
"""
Recursively replace code nodes with navigable strings whose values are unique IDs.
Arguments:
soup: The root tag of a BeautifulSoup HTML tree.
"""
def recursive_replace(tag):
if hasattr(tag, "contents"):
for i in range(len(tag.contents)):
child = tag.contents[i]
if child.name == "code":
tag.contents[i] = NavigableString(self.store(str(child)))
else:
recursive_replace(child)
recursive_replace(soup)
示例2: get_content
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def get_content(self, tag):
"""Returns content between two h2 tags"""
bssiblings = tag.next_siblings
siblings = []
for elem in bssiblings:
# get only tag elements, before the next h2
# Putting away the comments, we know there's
# at least one after it.
if type(elem) == NavigableString:
continue
# It's a tag
if elem.name == 'h2':
break
siblings.append(elem.text)
content = '\n'.join(siblings)
if len(content) >= 1024:
content = content[:1021] + '...'
return content
示例3: text
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def text(self, target=None, ignore_pureascii_words=False):
"""
Get all text in HTML, skip script and comment
:param target: the BeatuifulSoup object, default self.b
:param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website)
:return: list of str
"""
if target is None:
target = self.b
from bs4 import Comment
from bs4.element import NavigableString,Doctype
result = []
for descendant in target.descendants:
if not isinstance(descendant, NavigableString) \
or isinstance(descendant,Doctype) \
or descendant.parent.name in ["script", "style"] \
or isinstance(descendant, Comment) \
or "none" in descendant.parent.get("style","")\
or "font-size:0px" in descendant.parent.get("style",""):
continue
data = descendant.strip()
if len(data) > 0:
if not ignore_pureascii_words or any([ord(i)>127 for i in data]):
if PY2:
result.append(data.encode())
else:
result.append(data)
return result
示例4: insert_escaped_tags
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def insert_escaped_tags(tags, label=None):
"""For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
so that these tags are still there when html markup is stripped out."""
found = False
for tag in tags:
strs = list(tag.strings)
if len(strs) > 0:
if label != None:
l = label
else:
l = tag.name
strs[0].parent.insert(0, NavigableString("<"+l+">"))
strs[-1].parent.append(NavigableString("</"+l+">"))
found = True
return found
示例5: insert_escaped_tags
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def insert_escaped_tags(self, tags):
"""For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
so that these tags are still there when html markup is stripped out."""
found = False
for tag in tags:
strs = list(tag.strings)
if len(strs) > 0:
l = tag.name
strs[0].parent.insert(0, NavigableString("<"+l+">"))
strs[-1].parent.append(NavigableString("</"+l+">"))
found = True
return found
示例6: study
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import NavigableString [as 别名]
def study(bs_node, parent=None, keyword=""):
entries = []
# if parent is None:
# print("Keyword is: {0}".format(keyword))
if keyword in str(bs_node).lower():
if isinstance(bs_node, element.Tag):
if keyword in str(bs_node.attrs):
for k, v in bs_node.attrs.items():
if keyword in v:
# print("Found in attribute value {0} of tag {1}".format(k, bs_node.name))
noscript = close_noscript(bs_node)
d = {"type": "attrval", "name": k, "tag": bs_node.name, "noscript": noscript}
if d not in entries:
entries.append(d)
if keyword in k:
# print("Found in attribute name {0} of tag {1}".format(k, bs_node.name))
noscript = close_noscript(bs_node)
d = {"type": "attrname", "name": k, "tag": bs_node.name, "noscript": noscript}
if d not in entries:
entries.append(d)
elif keyword in bs_node.name:
# print("Found in tag name")
noscript = close_noscript(bs_node)
d = {"type": "tag", "value": bs_node.name, "noscript": noscript}
if d not in entries:
entries.append(d)
# recursively search injection points for the same variable
for x in bs_node.contents:
for entry in study(x, parent=bs_node, keyword=keyword):
if entry not in entries:
entries.append(entry)
elif isinstance(bs_node, element.Comment):
# print("Found in comment, tag {0}".format(parent.name))
noscript = close_noscript(bs_node)
d = {"type": "comment", "parent": parent.name, "noscript": noscript}
if d not in entries:
entries.append(d)
elif isinstance(bs_node, element.NavigableString):
# print("Found in text, tag {0}".format(parent.name))
noscript = close_noscript(bs_node)
d = {"type": "text", "parent": parent.name, "noscript": noscript}
if d not in entries:
entries.append(d)
return entries
# generate a list of payloads based on where in the webpage the js-code will be injected