本文整理汇总了Python中bs4.Tag.append方法的典型用法代码示例。如果您正苦于以下问题:Python Tag.append方法的具体用法?Python Tag.append怎么用?Python Tag.append使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.Tag
的用法示例。
在下文中一共展示了Tag.append方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: construct_xml
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def construct_xml(self):
soup = BeautifulSoup(etree.tostring(etree.Element('OTA_AirLowFareSearchRQ')), 'xml')
query = soup.contents[0]
query.attrs = {
'xmlns':'http://www.opentravel.org/OTA/2003/05',
'xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance',
'PrimaryLangId':'en',
'Version':'2.001',
'TimeStamp':str(datetime.datetime.now().isoformat()),
'EchoToken':str(time.mktime(time.gmtime())),
'xsi:schemaLocation':'http://www.opentravel.org/2006A/OTA_AirLowFareSearchRQ.xsd',
}
t_pos = Tag(name='POS')
t_source = Tag(name='Source')
t_req = Tag(name='RequestorID')
t_req.attrs = {
'ID':'weathersick',
'URL':'http://www.weathersick.com',
'Type':'18',
}
t_source.append(t_req)
t_pos.append(t_source)
query.append(t_pos)
t_odinf = Tag(name='OriginDestinationInformation')
t_odinf.attrs {'RPH':1}
t_deptime = Tag(name='DepartureDateTime')
t_deptime.
OriginDestinationInformation RPH="1"
import pdb; pdb.set_trace()
示例2: heading2table
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def heading2table(soup, table, row):
"""add heading row to table"""
tr = Tag(soup, name="tr")
table.append(tr)
for attr in row:
th = Tag(soup, name="th")
tr.append(th)
th.append(attr)
示例3: row2table
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def row2table(soup, table, row):
"""ad a row to the table"""
tr = Tag(soup, name="tr")
table.append(tr)
for attr in row:
td = Tag(soup, name="td")
tr.append(td)
td.append(attr)
示例4: tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def tag(self):
tt=Tag(name='table')
for r in self.cells:
rt=Tag(name='tr')
for c in r:
rt.append(c.tag())
tt.append(rt)
return tt
示例5: replace_dot_code
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def replace_dot_code(dot_code: Tag) -> None:
svg = BeautifulSoup(dot(dot_code.text), 'xml').svg
assert 'style' not in svg.attrs
svg.attrs['style'] = (
f'max-width: 100%; '
f'width: {svg.attrs.pop("width")}; '
f'height: {svg.attrs.pop("height")};'
)
dot_code.clear()
dot_code.append(svg)
示例6: clone
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def clone(el):
if isinstance(el, NavigableString):
return type(el)(el)
copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
# work around bug where there is no builder set
# https://bugs.launchpad.net/beautifulsoup/+bug/1307471
copy.attrs = dict(el.attrs)
for attr in ('can_be_empty_element', 'hidden'):
setattr(copy, attr, getattr(el, attr))
for child in el.contents:
copy.append(clone(child))
return copy
示例7: soup
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def soup(self):
'''
Returns HTML as a BeautifulSoup element.
'''
components_soup = Tag(name=self.tagname, builder=BUILDER)
components_soup.attrs = self.attributes
for c in flatten(self.components):
if hasattr(c, 'soup'):
components_soup.append(c.soup())
elif type(c) in (str, ):
# components_soup.append(BeautifulSoup(str(c)))
components_soup.append(str(c))
# else:
# Component should not be integrated
# pass
return components_soup
示例8: clone_bs4_elem
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def clone_bs4_elem(el):
"""Clone a bs4 tag before modifying it.
Code from `http://stackoverflow.com/questions/23057631/clone-element-with
-beautifulsoup`
"""
if isinstance(el, NavigableString):
return type(el)(el)
copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
# work around bug where there is no builder set
# https://bugs.launchpad.net/beautifulsoup/+bug/1307471
copy.attrs = dict(el.attrs)
for attr in ('can_be_empty_element', 'hidden'):
setattr(copy, attr, getattr(el, attr))
for child in el.contents:
copy.append(clone_bs4_elem(child))
return copy
示例9: get_markdown_page_index_objects
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def get_markdown_page_index_objects(content: Tag, url: str, page_path: str, title: str, page_type: str,
page_views: int) -> List[Dict]:
headers = ['h1', 'h2', 'h3']
index_objects = []
children = [element for element in content.children if isinstance(element, Tag)]
if children[0].name not in headers:
return get_page_index_objects(content, url, page_path, title, page_type, page_views)
block_title = ""
content = []
url_with_href = ""
for child in children:
if child.name in headers:
if block_title != '':
for ind, page_part in enumerate(get_valuable_content(content)):
page_info = {'url': url_with_href, 'objectID': url_with_href + str(ind), 'content': page_part,
'headings': block_title, 'pageTitle': title, 'type': page_type,
'pageViews': page_views}
index_objects.append(page_info)
url_with_href = url + '#' + child.get('id')
block_title = child.text
content = []
else:
content.append(child)
return index_objects
示例10: printhtml
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def printhtml(csvdiffs):
"""print the html"""
soup = BeautifulSoup()
html = Tag(soup, name="html")
para1 = Tag(soup, name="p")
para1.append(csvdiffs[0][0])
para2 = Tag(soup, name="p")
para2.append(csvdiffs[1][0])
table = Tag(soup, name="table")
table.attrs.update(dict(border="1"))
soup.append(html)
html.append(para1)
html.append(para2)
html.append(table)
heading2table(soup, table, csvdiffs[3])
for row in csvdiffs[4:]:
row = [str(cell) for cell in row]
row2table(soup, table, row)
# print soup.prettify()
print(soup)
示例11: _extract_article_body
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def _extract_article_body(page):
article = page.find(id='artikel').find(class_='content')
body = Tag(name='temporary_tag')
# +1 internetz for the person who can tell me why I can't write:
# for element in article.children:
# or
# for element in article.contents:
for element in list(article.children):
# Ignore the comment form
if element.name == 'form':
continue
# Ignore whitespace
if element.name is None and re.search('\S', str(element)) is None:
continue
# Nor div, nor form, nor whitespace: probably article content
if element.name != 'div':
body.append(element.extract())
continue
# TODO uncomment me when the app is ready to support subtitles
# Oh, and change the next if with an elif
# if 'field-field-ondertitel' in element['class']:
# paragraph = _extract_paragraph(element, 'subtitle')
# body.append(paragraph)
if 'field-field-inleiding' in element['class']:
paragraph = _extract_paragraph(element, 'introduction')
body.append(paragraph)
elif 'field-field-img-regulier' in element['class']:
images_div = Tag(name='div', attrs={'class': 'image'})
for image_and_caption in element(id='image-and-caption'):
image = image_and_caption.img
caption = image_and_caption.find(class_='caption-text')
paragraph = Tag(name='p')
paragraph.append(image)
if caption is not None:
paragraph.append(caption.text)
images_div.append(paragraph)
body.append(images_div)
elif 'field-field-website' in element['class']:
label = element.find(class_='field-label').text
label_p = Tag(name='p')
label_s = Tag(name='strong')
label_s.append(label)
label_p.append(label_s)
body.append(label_p)
websites = element.find(class_='field-item').contents
for website in list(websites):
body.append(website)
else:
# Ignore other divs
pass
return body
示例12: Tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
for fig in divFigures:
figCaption = fig.p
# Turn the caption into span for CSS formatting
#note the games chapter needs some caption work
if figCaption is not None:
figCaption.name = "div"
# [zach] -- this is to make images that are not full width, have captions below the image
div = Tag(soup, None, "div")
div['style'] = "image" #"clear:both"
div.append(clone(fig.img))
fig.img.replace_with(div)
# Images have been stored in ./CHAPTER_NAME/images/ relative
# to the chapter html, but image references in the html are
# to ./images/. Modify the image tags:
div.img["src"] = internalImagesPath + "/" + div.img["src"]
# Turn the figure image into a hyperlink that points to the
# full resolution version of the image
imgHyperlink = soup.new_tag("a", href=fig.img["src"])
fig.img.wrap(imgHyperlink)
fig['class'] = "inner"
divWhole = Tag(soup, None, "div")
divWhole['class'] = "figure"
示例13: Tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
# for attr in mem_attr:
# th = Tag(soup, None, "th")
# tr.append(th)
# th.append(attr)
# print soup.prettify()
for c in chapterTags:
ul = Tag(soup, None, "ul")
li = Tag(soup, None, "li")
a = Tag(soup, None, "a");
a['href'] = "chapters/" + c['path'] + ".html"
a.string = c['title']
li.append(a)
ul.append(li)
#print c['title']
#print c['path']
if (len(['innerTags'])):
ulInner = Tag(soup, None, "ul")
li.append(ulInner);
for tag in c['innerTags']:
liInner = Tag(soup, None, "li")
ulInner.append(liInner)
a = Tag(soup, None, "a")
tagNoSpaces = tag.replace(" ", "")
a['href'] = "chapters/" + c['path'] + ".html#" + tagNoSpaces
a['target'] = "_top"
a.string = tag
示例14: len
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
if len(divFigures) != 0:
for fig in divFigures:
figCaption = fig.p
# Turn the caption into span for CSS formatting
#note the games chapter needs some caption work
if figCaption is not None:
figCaption.name = "span"
# [zach] -- this is to make images that are not full width, have captions below the image
div = Tag(soup, None, "div")
div['style'] = "clear:both"
div.append(clone(fig.img));
fig.img.replace_with(div)
# Images have been stored in ./CHAPTER_NAME/images/ relative
# to the chapter html, but image references in the html are
# to ./images/. Modify the image tags:
div.img["src"] = internalImagesPath + "/" + div.img["src"]
# Make all hyperlinks in the chapter target a new window/tab
hyperlinkTags = soup.find_all("a")
for hyperlinkTag in hyperlinkTags:
hyperlinkTag["target"]= "_blank"
html = str(soup)
with open(destChapterPath, "wb") as file:
file.write(html)
示例15: rebuild_rss
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import append [as 别名]
def rebuild_rss(url, output, selectors, replace = None, pretty = False, raw = False):
source = feedparser.parse(url)
try:
soup = BeautifulSoup('<rss version="2.0" />', 'xml')
rss = soup.rss
has_lxml = True
except FeatureNotFound:
rss = BeautifulSoup('<rss version="2.0" />').rss
has_lxml = False
channel = Tag(name = 'channel')
rss.append(channel)
putback_elems(source.feed, channel_required, channel)
putback_elems(source.feed, channel_optional, channel)
build_date = Tag(name = 'lastBuildDate')
build_date.string = time.strftime('%a, %d %b %Y %H:%M:%S +0000', time.gmtime())
channel.append(build_date)
generator = Tag(name = 'generator')
generator.string = source.feed.generator + ' & RSS Rebuilder' if hasattr(source.feed, 'generator') else 'RSS Rebuilder'
channel.append(generator)
if replace:
regexp = re.compile(replace[0])
for entry in source.entries:
item = Tag(name = 'item')
channel.append(item)
putback_elems(entry, item_required, item)
putback_elems(entry, item_optional, item)
r = requests.get(entry.link)
html = r.content if raw else r.text
linked_html = BeautifulSoup(html, 'lxml') if has_lxml else BeautifulSoup(html)
content = ''
for selector in selectors:
tags = linked_html.select(selector)
if replace:
tags = replace_urls(tags, regexp, replace[1])
content = reduce(lambda s, tag: s + unicode(tag), tags, content)
desc = Tag(name = 'description')
desc.string = content
item.append(desc)
out_func = lambda x: (x.prettify() if pretty else unicode(x)).encode('utf-8')
if output == '-':
out_file = sys.stdout
close_file = lambda: None
else:
out_file = open(output, 'w')
close_file = out_file.close
if has_lxml:
out_file.write(out_func(soup))
else:
out_file.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
out_file.write(out_func(rss))
out_file.write('\n')
close_file()