本文整理汇总了Python中bs4.Tag.insert方法的典型用法代码示例。如果您正苦于以下问题:Python Tag.insert方法的具体用法?Python Tag.insert怎么用?Python Tag.insert使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.Tag
的用法示例。
在下文中一共展示了Tag.insert方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_latex
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def add_latex(self, html):
""""""
import re
html = re.sub("________", "", html)
html = re.sub(r"\\rm", r"", html) # r"\\text{", html)
html = re.sub(r"-", r"-", html)
html = re.sub(r"\\gt", r'>', html)
html = re.sub(r"\\lt", r'<', html)
html = re.sub(r"2!", r'2\!', html)
html = re.sub(r",!", r",\!", html)
html = re.sub(r"c!", r"c\!", html)
html = re.sub(r"!\cdot!", r"\!-\cdot-\!", html)
html = re.sub("\!$$", "$$", html)
html = re.sub(r"2\\\\!\\!", "2", html)
html = re.sub(r"\|", r"|", html)
html = re.sub(r"(% ((<)|<)!\[CDATA\[\n)", r"", html)
html = re.sub(r"( %]](>|>))", r"", html)
with open('index2.txt', 'w+') as output:
output.write(html)
soup = BeautifulSoup(html, 'html.parser')
for nav_string in soup(text=True):
if isinstance(nav_string, CData):
tag = Tag(soup, name="math")
tag.insert(0, nav_string[:])
nav_string.replace_with(tag)
self.add_clean_markdown(soup, 'span', attribs={"data-math": True})
self.add_clean_markdown(soup, 'div', attribs={"data-math": True})
self.add_clean_markdown(soup, 'div', tag_class='stem_text')
self.add_clean_markdown(soup, 'div', tag_class='answer_text')
self.add_clean_markdown(soup, 'div', tag_class='feedback')
return soup
示例2: get_svg
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def get_svg(xml,sketchID,version):
root = ET.fromstring(xml)
result_soup = BeautifulSoup()
for kobject in root.findall('.//KObject'):
objectID = kobject.attrib['id']
parent = kobject.find('parent')
parentID = parent.attrib['id']
stroke = kobject.find('strokeData')
if stroke is not None:
path = ksketchsvg.get_polyline(stroke)
color = ksketchsvg.convert_color(stroke.attrib['color'])
thickness = stroke.attrib['thickness']
tag = ksketchsvg.createTag(objectID, path, color, thickness, kobject.attrib['centroid'])
if parentID == "0":
result_soup.insert(len(result_soup.find_all('g', recursive=False)), tag)
else:
grp = result_soup.find('g', {'id': parentID})
if grp:
grp.insert(len(grp.find_all('g', recursive=False)), tag)
else:
tag = ksketchsvg.createGroup(objectID)
if parentID == "0":
result_soup.insert(len(result_soup.find_all('g', recursive=False)), tag)
else:
grp = result_soup.find('g', {'id': parentID})
if grp:
grp.insert(len(grp.find_all('g', recursive=False)), tag)
soup = BeautifulSoup()
g_tag = Tag(soup, name='g')
g_tag['id'] = "0"
g_tag.insert(0, result_soup)
SVGCache.addSVGData(sketchID,version,g_tag.prettify())
return g_tag.prettify()
示例3: add_latex
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def add_latex(self, html):
""""""
import re
html = re.sub("________", "", html)
html = re.sub(r"\\rm", r"", html) # r"\\text{", html)
html = re.sub(r"–", r"-", html)
html = re.sub(r"\\gt", r'>', html)
html = re.sub(r"\\lt", r'<', html)
html = re.sub(r"2!", r'2\!', html)
html = re.sub(r",!", r",\!", html)
html = re.sub(r"c!", r"c\!", html)
html = re.sub(r"!\cdot!", r"\!-\cdot-\!", html)
html = re.sub("\!$$", "$$", html)
html = re.sub(r"2\\\\!\\!", "2", html)
html = re.sub(r"\|", r"|", html)
html = re.sub(r"(% ((<)|<)!\[CDATA\[\n)", r"", html)
html = re.sub(r"( %]](>|>))", r"", html)
with open('index2.txt', 'w+') as output:
output.write(html)
soup = BeautifulSoup(html, 'html.parser')
for nav_string in soup(text=True):
if isinstance(nav_string, CData):
tag = Tag(soup, name="math")
tag.insert(0, nav_string[:])
nav_string.replace_with(tag)
self.add_clean_markdown(soup, 'span', attribs={"data-math": True})
# for tag in soup.find_all('span', attrs={"data-math": True}):
# tag['data-math'] = '$' + tag['data-math'] + '$'
# try:
# tag.string.replace_with('$' + tag.string + '$')
# except Exception as e:
# print(e, tag, tag.string)
self.add_clean_markdown(soup, 'div', attribs={"data-math": True})
# for tag in soup.find_all('div', attrs={"data-math": True}):
# tag['data-math'] = '$$' + tag['data-math'] + '$$'
# try:
# tag.string.replace_with('$$' + tag.string + '$$')
# except Exception as e:
# print(e, tag, tag.string)
self.add_clean_markdown(soup, 'div', tag_class='stem_text')
# for tag in soup.find_all('div', class_='stem_text'):
# try:
# tag.string = bleach.clean(markdown.markdown(tag.string))
# except Exception:
# print("Issue: %s : %s" % (tag, tag.string))
self.add_clean_markdown(soup, 'div', tag_class='answer_text')
# for tag in soup.find_all('div', class_='answer_text'):
# try:
# tag.string = bleach.clean(markdown.markdown(tag.string))
# except Exception:
# print("Issue: %s : %s" % (tag, tag.string))
self.add_clean_markdown(soup, 'div', tag_class='feedback')
# for tag in soup.find_all('div', class_='feedback'):
# try:
# tag.string = bleach.clean(markdown.markdown(tag.string))
# except Exception:
# print("Issue: %s : %s" % (tag, tag.string))
return soup.prettify(formatter='html')
示例4: _rewrite_html_with_local_files
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def _rewrite_html_with_local_files(self, index_path, ad_hash, base_url, logger=None):
# print os.getcwd()
os.system('mv "%s" "%s.original"' % (index_path, index_path))
folder = index_path[:index_path.rindex('/') + 1]
fp = codecs.open(index_path + '.original', mode='r')
soup = fp.read()
soup = re.sub(r'&(?!nbsp;)', r'&', soup)
soup = BeautifulSoup(soup, "html.parser")
# add ad hash as watermark comment
# new_comment = Comment("ad-hash: " + ad_hash)
# soup.head.append(new_comment)
# merge the CSS in the html file
stylesheets = soup.findAll("link", {"rel": "stylesheet"})
for s in stylesheets:
if s["href"] and not s["href"].startswith("http"):
if logger is not None:
tag = "Embed CSS file."
logger.append({'text': tag, 'code': " %s " % s["href"]})
# handle all local css files
c = open(folder + s["href"]).read()
tag = Tag(soup, "style", [("type", "text/css")])
tag.insert(0, c)
s.replaceWith(tag)
else:
# internal method, which should fetch an external css file
self._enqueue_external_css_resource(base_url, s)
if logger is not None:
tag = "External CSS file, which should be fetched first. Unable to embed it."
logger.append({'text': tag, 'code': " %s " % s["href"]})
self._fetch_and_rewrite_all(soup.findAll('a'), 'href', base_url, image=False, logger=logger)
self._fetch_and_rewrite_all(soup.findAll('table'), 'background', base_url, image=True, logger=logger)
self._fetch_and_rewrite_all(soup.findAll('td'), 'background', base_url, image=True, logger=logger)
self._fetch_and_rewrite_all(soup.findAll('link'), 'href', base_url, image=False, logger=logger)
self._fetch_and_rewrite_all(soup.findAll('img'), 'src', base_url, image=True, logger=logger)
self._fetch_and_rewrite_css(soup.findAll('style'), base_url, logger=logger)
self._process_assets_queue()
# find all comments and remove those
# comments = soup.findAll(text=lambda text:isinstance(text, Comment))
# [comment.extract() for comment in comments]
soup = re.sub(r'&', r'&', unicode(soup))
fp.close()
out = codecs.open(index_path, mode='w', encoding='utf-8')
out.write(unicode(soup))
out.close()
示例5: add_scribble_canvas
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def add_scribble_canvas(soup_obj, user_id, scribble_id):
script_tag = Tag(soup_obj, name="script")
script_tag['src'] = '//code.jquery.com/jquery-1.7.2.min.js'
soup_obj.body.insert(-1, script_tag)
script_tag = Tag(soup_obj, name="script")
script_text = NavigableString("var BASEURL='%s'" % HOME_URL)
script_tag.insert(0, script_text)
soup_obj.body.insert(-1, script_tag)
with open("scribble/static/js/load_canvas.js") as scribjs:
script_tag = Tag(soup_obj, name="script")
script_text = NavigableString(scribjs.read().format( user_id, scribble_id))
script_tag.insert(0, script_text)
soup_obj.body.insert(-1, script_tag)
示例6: createTag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def createTag(objectID, path, color, width, centroid):
soup = BeautifulSoup()
g_tag = Tag(soup, name='g')
x, y = [float(i) for i in centroid.split(',')]
g_tag['centreX'] = x
g_tag['centreY'] = y
g_tag['id'] = objectID
g_tag['style'] = "opacity:0;"
path_tag = Tag(soup, name='path')
path_tag['id'] = "p" + objectID
path_tag['stroke'] = color
path_tag['stroke-width'] = width
path_tag['fill'] = 'none'
path_tag['d'] = path
path_tag['stroke-linecap'] = 'round'
path_tag['stroke-linejoin'] = 'round'
g_tag.insert(0, path_tag)
return g_tag
示例7: parse_cms_template
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def parse_cms_template(html, dictionary, parent_namespace='', publish=False, request=dum_request):
"""
Refer to tests for cms syntax
:param html: Html to be parsed using cms syntax
:type html: str
:param dictionary: Dictionary that is to be used to parse the cms attributes in template
:type dictionary: dict
:param parent_namespace: Namespace of the html content to be parsed (if any)
:type parent_namespace: str
:param publish: This will hide sensitive info while rendering for public usage
:type publish: bool
:rtype : str
"""
soup = BeautifulSoup(html, features=HTML_PARSER)
for tag in soup.find_all(attrs={'data-cms-include': include_html_re}):
namespace = get_namespace(tag, parent_namespace=parent_namespace)
include_value = tag.attrs.pop('data-cms-include')
if ':' in include_value:
local_namespace, default_template_name = include_value.split(':', 1)
else:
try:
local_namespace = tag.attrs['data-cms-namespace']
except KeyError:
raise TemplateSyntaxError(
'value of data-cms-include should be of the form {namespace}:{template path}'
'if namespace is not specified then another attribute data-cms-namespace should be defined'
)
else:
if not namespace_re.match(local_namespace):
raise TemplateSyntaxError(
'"{}" is not a valid value for data-cms-namespace'.format(local_namespace)
)
else:
default_template_name = include_value
namespace += NAMESPACE_DELIMITER + local_namespace if namespace else local_namespace
template_name = dictionary.get(namespace, default_template_name)
if template_name.endswith('.html'):
template_name = template_name[:-5]
try:
include_template = validate_and_get_template(template_name)
except ValidationError:
include_template = validate_and_get_template(
default_template_name[:-5] if default_template_name.endswith('.html') else default_template_name
)
include_html = include_template.render(request=request)
tag.attrs['data-cms-namespace'] = local_namespace
if not publish:
tag.attrs['data-cms-include'] = template_name
new_tag = Tag(soup, name=tag.name, attrs=tag.attrs)
new_tag.insert(0, BeautifulSoup(include_html, features=HTML_PARSER))
tag.replaceWith(new_tag)
# soup does not recognize the changes made in above loop unless I do this
# Also do not move it inside the loop. It will mess up the variable scoping
soup = BeautifulSoup(soup.encode_contents(), features=HTML_PARSER)
for tag in soup.find_all(attrs={'data-cms-attr': attr_re}):
_ns = get_namespace(tag, parent_namespace=parent_namespace)
attrs = tag['data-cms-attr'].split('|')
for attr in attrs:
attr_name, key = attr.split(':', 1)
key = _ns + NAMESPACE_DELIMITER + key if _ns else key
if key in dictionary:
tag[attr_name] = dictionary[key]
soup = BeautifulSoup(soup.encode_contents(), features=HTML_PARSER)
for tag in soup.find_all(attrs={'data-cms-content': content_re}):
_ns = get_namespace(tag, parent_namespace=parent_namespace)
key = tag['data-cms-content']
md = False
if key.startswith('md:'):
key = key[3:]
md = True
key = _ns + NAMESPACE_DELIMITER + key if _ns else key
if key in dictionary:
content = dictionary[key]
else:
content = tag.encode_contents()
if not any(_ in content for _ in CMS_ATTRIBUTES):
continue
new_tag = Tag(soup, name=tag.name, attrs=tag.attrs)
if any(_ in content for _ in CMS_ATTRIBUTES):
content = parse_cms_template(content, dictionary, parent_namespace=key, request=request)
#.........这里部分代码省略.........
示例8: parse_cms_template
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import insert [as 别名]
def parse_cms_template(html, cms_context, parent_namespace='', public=False,
request=dum_request, template_context=None):
"""
Refer to tests for cms syntax
:param html: Html to be parsed using cms syntax
:type html: str
:param cms_context: Dictionary that is to be used to parse the
cms attributes in template
:type cms_context: dict
:param parent_namespace: Namespace of the html content to be parsed (if any)
:type parent_namespace: str
:param public: Renders the page for public usage
:type public: bool
:param request: Request object to be used for template context
:param template_context: Template context to be used for rendering the
base and included templates
:type template_context: dict
:rtype : str
"""
soup = BeautifulSoup(html, features=HTML_PARSER)
for tag in soup.find_all(attrs={INCLUDE_TAG: include_html_re}):
namespace = get_namespace(tag, parent_namespace=parent_namespace)
include_value = tag.attrs.pop(INCLUDE_TAG)
if ':' in include_value:
local_namespace, default_template_name = include_value.split(':', 1)
else:
try:
local_namespace = tag.attrs[NAMESPACE_TAG]
except KeyError:
raise TemplateSyntaxError(
'value of data-cms-include should be of the form '
'{namespace}:{template path}.'
'if namespace is not specified then another attribute '
'data-cms-namespace should be defined'
)
else:
if not namespace_re.match(local_namespace):
raise TemplateSyntaxError(
'"{}" is not a valid value for {}'.format(
local_namespace, NAMESPACE_TAG
)
)
else:
default_template_name = include_value
if namespace:
namespace += NAMESPACE_DELIMITER + local_namespace
else:
namespace = local_namespace
template_name = cms_context.get(namespace, default_template_name)
if template_name.endswith('.html'):
template_name = template_name[:-5]
try:
include_template = validate_and_get_template(template_name)
except ValidationError:
include_template = validate_and_get_template(default_template_name)
include_html = include_template.render(template_context, request)
tag.attrs[NAMESPACE_TAG] = local_namespace
if not public:
tag.attrs[INCLUDE_TAG] = template_name
new_tag = Tag(soup, name=tag.name, attrs=tag.attrs)
new_tag.insert(0, BeautifulSoup(include_html, features=HTML_PARSER))
tag.replaceWith(new_tag)
# soup does not recognize the changes made in above loop unless I do this
# Also do not move it inside the loop. It will mess up the variable scoping
soup = BeautifulSoup(soup.encode_contents(), features=HTML_PARSER)
for tag in soup.find_all(attrs={ATTR_TAG: attr_re}):
_ns = get_namespace(tag, parent_namespace=parent_namespace)
attrs = tag[ATTR_TAG].split('|')
for attr in attrs:
attr_name, key = attr.split(':', 1)
key = _ns + NAMESPACE_DELIMITER + key if _ns else key
if key in cms_context:
tag[attr_name] = cms_context[key]
soup = BeautifulSoup(soup.encode_contents(), features=HTML_PARSER)
for tag in soup.find_all(attrs={CONTENT_TAG: content_re}):
_ns = get_namespace(tag, parent_namespace=parent_namespace)
key = tag[CONTENT_TAG]
md = False
if key.startswith('md:'):
key = key[3:]
md = True
key = _ns + NAMESPACE_DELIMITER + key if _ns else key
#.........这里部分代码省略.........