本文整理汇总了Python中bs4.Tag.string方法的典型用法代码示例。如果您正苦于以下问题:Python Tag.string方法的具体用法?Python Tag.string怎么用?Python Tag.string使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.Tag
的用法示例。
在下文中一共展示了Tag.string方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: apply_solution
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def apply_solution(self, flux_dict, color_map=default_color_map):
self.object_styles.clear()
fluxes = dict((i, flux_dict[i]) for i in self.included_reactions.intersection(flux_dict))
abs_fluxes = [min(abs(i), 20) for i in fluxes.itervalues()]
x_min = min(abs_fluxes)
x_max = max(abs_fluxes)
scale_func = lambda value: min(1, (abs(value) - x_min) / (x_max - x_min) * 3)
for reaction, value in fluxes.iteritems():
#t = Tag(name="title")
#t.string = "%.2f" % (value)
self._rxn_layer.findChild("g", id=reaction).title.string += "\n%.2f" % (value)#append(t)
try:
t = Tag(name="title")
t.string = "%.2f" % (value)
self._rxn_label_layer.findChild(name="text", text=compile(reaction)).append(t)
except: None
if str(reaction) in self.included_reactions:
self.set_object_color(str(reaction), color_map(scale_func(value)))
if value < 0:
self.object_styles["%s .end" % str(reaction)] = {"marker-end": "none"}
if value > 0:
self.object_styles["%s .start" % str(reaction)] = {"marker-start": "none"}
for reaction in self.included_reactions.difference(flux_dict.keys()):
self.set_object_color(reaction, (0, 0, 0))
self._update_svg()
return self
示例2: makeTag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def makeTag(name, string=None, **kwargs):
tag = Tag(name=name, attrs=kwargs)
for key in kwargs:
if kwargs[key] is None:
kwargs[key] = ""
if string:
tag.string = string
return tag
示例3: putback_elems
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def putback_elems(source, elems, xml_elem):
for elem in elems:
if isinstance(elem, tuple):
attr = elem[0]
tag = elem[1]
else:
attr = elem
tag = elem
if hasattr(source, attr):
e = Tag(name = tag)
e.string = getattr(source, attr)
xml_elem.append(e)
示例4: transform_item_in_feed
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def transform_item_in_feed(item):
"""Transform an <item>"""
link = item.link.text
print('Processing {}'.format(link))
# Ignore empty articles
if item.description is None or len(item.description.contents) == 0:
print('Empty article body, ignoring...')
item.decompose()
return
# Ignore articles without title
if item.title is None or len(item.title) == 0:
print('Article without title, ignoring...')
item.decompose()
return
# Parse the article content as HTML
article = read_html_from_string(item.description.contents[0])
# The creator in the RSS is a username, so try first to parse from the HTML.
html_authors = _parse_article_authors(article)
if html_authors is not None:
item.creator.string = html_authors
# Remove authors from article text itself
article.find('div', class_='field-name-field-auteurs').decompose()
# Get the category
category_tag = Tag(name='category')
category_node = article.select_one('div.field-name-field-rubriek a')
if category_node is not None:
category_tag.string = category_node.text.strip()
category_tag['domain'] = category_node['href']
# Remove category from the article body
article.find('div', class_='field-name-field-rubriek').decompose()
item.append(category_tag)
# Remove edition from article body if present
edition_node = article.find('div', class_='field-name-field-editie')
if edition_node is not None:
edition_node.decompose()
encoded = article.find('body').decode_contents(formatter='html')
item.description.contents = [CData(htmlmin.minify(encoded, remove_optional_attribute_quotes=False))]
示例5: home
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def home():
if request.method == "GET":
return render_template("scrapermaker_form.html")
elif request.method == "POST":
url = request.form['url']
r = requests.get(url)
bs = BeautifulSoup(r.content)
js_tag = Tag(bs, name="script")
js_tag.string = payload
body = bs.find("body")
body.append(js_tag)
return str(bs)
示例6: find_parents
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
### the id of the parents is identical to the documentation_units
# TODO: this is a dirty way and needs to be improved
all_parents = find_parents(
methods + functions + attributes + classmethods + staticmethods + sections + classes +
exceptions + describtions + datas)
all_parents_copy = copy.copy(all_parents)
###define placeholder and replace nested elements to avoid double rating
placeholder = '[something removed here]'
for parent in methods + functions + attributes + classmethods + staticmethods + sections + classes\
+ exceptions + describtions:
### set placeholders to duplicated dl-elements ###
for elem in parent.find_all('dl', {
'class': ['method', 'function', 'attribute', 'classmethod', 'staticmethod', 'section', 'class', 'exception', 'describe', 'data']}):
tag = Tag(name='p')
tag.string = placeholder
elem.replace_with(tag)
### same thing with sections##
for elem in parent.find_all('div', {'class': 'section'}):
tag = Tag(name='p')
tag.string = placeholder
elem.replace_with(tag)
### summarize placeholders ###
summarize_placeholders(parent, placeholder)
results = methods + functions + attributes + classmethods + staticmethods + sections + classes \
+ exceptions + describtions + datas
# # #########STORE SOMETHING INTO THE DATABASE#############
conn = psycopg2.connect("host=127.0.0.1 dbname=mydb user=sven password=Schwen91")
示例7: enumerate
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
# make sidebar for chapters
#-----------------------------------------------------
for idx, chapter in enumerate(chapterDicts):
soup = Soup()
soupFromFile = Soup(open(chapter['destChapterPath']).read())
# Create previous/next links in the footer
nextChapterDiv = soupFromFile.find(id='next_chapter')
prevChapterDiv = soupFromFile.find(id='prev_chapter')
if (idx != 0):
prevLink = Tag(soup, None, "a")
prevLink['href'] = chapterDicts[idx-1]['href']
prevLink.string = "< " + chapterDicts[idx-1]['title']
prevChapterDiv.append(prevLink)
if(idx != len(chapterDicts)-1):
nextLink = Tag(soup, None, "a")
nextLink['href'] = chapterDicts[idx+1]['href']
nextLink.string = chapterDicts[idx+1]['title'] + " >"
nextChapterDiv.append(nextLink)
# Find the navbar UL
navbar = soupFromFile.find_all("ul", {"id":"nav-parts"})[0]
# Run through the chapter groups
示例8: Tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
# table.append(tr)
# for attr in mem_attr:
# th = Tag(soup, None, "th")
# tr.append(th)
# th.append(attr)
# print soup.prettify()
for c in chapterTags:
ul = Tag(soup, None, "ul")
li = Tag(soup, None, "li")
a = Tag(soup, None, "a");
a['href'] = "chapters/" + c['path'] + ".html"
a.string = c['title']
li.append(a)
ul.append(li)
#print c['title']
#print c['path']
if (len(['innerTags'])):
ulInner = Tag(soup, None, "ul")
li.append(ulInner);
for tag in c['innerTags']:
liInner = Tag(soup, None, "li")
ulInner.append(liInner)
a = Tag(soup, None, "a")
tagNoSpaces = tag.replace(" ", "")
a['href'] = "chapters/" + c['path'] + ".html#" + tagNoSpaces
a['target'] = "_top"
示例9: format_links
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def format_links(html):
'''
This monster of a function takes in the html from a post and returns a dict
containing html, text, summary.
Uses opengraph to try to get titles for all untitled links, and tries to hyperlink everything.
'''
edit_html = html
html = html.replace('"', '"')
soup = BeautifulSoup(re.sub(r'&(?!amp;)', r'&', html))
reformat_str = ''.join(random.sample(string.ascii_uppercase, 10)) + '__'
reformat_dict = {}
videos = []
image = None
# Set aside all <img> tags, because we need to treat them special and will add them in later.
for tag_index, img_tag in enumerate(soup.find_all('img')):
key = reformat_str + 'img' + str(tag_index)
img_tag.replace_with(key)
# handle the shitty case where a user inputs a non-http link
if img_tag.has_attr('src') and not img_tag['src'].startswith('http'):
new_src = 'http://' + img_tag['src']
img_tag['src'] = new_src
if not image:
image = img_tag['src']
reformat_dict[key] = img_tag
# Set aside all <a> tags, because we need to treat them special and will add them in later.
for tag_index, a_tag in enumerate(soup.find_all('a')):
key = reformat_str + 'a' + str(tag_index)
a_tag.replace_with(key)
# handle the shitty case where a user inputs a non-http link
if a_tag.has_attr('href'):
new_href = a_tag['href'].strip()
if not new_href.startswith('http'):
new_href = 'http://' + a_tag['href']
a_tag['href'] = new_href
embed_link = get_embed_link(new_href)
if embed_link:
videos.append(embed_link)
a_tag['target'] = '_blank'
try:
if a_tag.string and a_tag['href'] and a_tag.string in a_tag['href']:
og_title = get_opengraph(a_tag['href'], params=['title']).get('title')
a_tag.string = og_title.strip()
except:
pass
reformat_dict[key] = a_tag
mentions = []
# Find all mentions and format them
mention_regex = re.compile(r'(@\S+(?:\s\S+)?)')
for mention_index, mention_str in enumerate(soup(text=mention_regex)):
key = reformat_str + 'm' + str(mention_index)
mention_split_list = mention_regex.split(mention_str)
parent_tag = Tag(name='span')
for piece in mention_split_list:
if type(piece) in [unicode, str]:
s = mention_regex.search(piece)
if s:
first_letter = re.search(r"@\S+", piece).group()[1]
names = [u.name for u in User.objects(name__istartswith=first_letter)]
for i in range(len(piece) - 1):
query_name = re.compile(piece[1:len(piece) - i], flags=re.IGNORECASE)
matches = len([name for name in names if query_name.match(name)])
if matches == 1:
a_tag = Tag(name='a')
target_user = User.objects(name=query_name).get()
a_tag['href'] = '/profile/%s' % str(target_user.id)
a_tag['target'] = '_blank'
a_tag['mention'] = 'Yes'
a_tag.string = '@' + query_name.pattern
parent_tag.append(a_tag)
parent_tag.append(NavigableString(piece[len(piece) - i:]))
mentions.append(str(target_user.id))
break
else:
# for/else structure
# catch an @ that didn't match any users
parent_tag.append(NavigableString(piece))
else:
parent_tag.append(NavigableString(piece))
reformat_dict[key] = parent_tag
mention_str.replace_with(key)
opengraph_index = 0
opengraph_objects = []
# Find all plaintext links and format them.
for p in soup.find_all('p'):
p_text = unicode(p.text)
if link_regex.search(p_text):
#.........这里部分代码省略.........
示例10: enumerate
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
# make sidebar for chapters
#-----------------------------------------------------
for idx, chapter in enumerate(chapterDicts):
soup = Soup()
soupFromFile = Soup(open(chapter['destChapterPath'], "rb").read(), "html.parser")
# Create previous/next links in the footer
nextChapterDiv = soupFromFile.find(id='next_chapter')
prevChapterDiv = soupFromFile.find(id='prev_chapter')
if (idx != 0):
prevLink = Tag(soup, None, "a")
prevLink['href'] = chapterDicts[idx-1]['href']
prevLink.string = "< " + chapterDicts[idx-1]['title']
prevChapterDiv.append(prevLink)
if(idx != len(chapterDicts)-1):
nextLink = Tag(soup, None, "a")
nextLink['href'] = chapterDicts[idx+1]['href']
nextLink.string = chapterDicts[idx+1]['title'] + " >"
nextChapterDiv.append(nextLink)
# Find the navbar UL
navbar = soupFromFile.find_all("ul", {"id":"nav-parts"})[0]
# Run through the chapter groups
示例11: text_tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def text_tag(self):
t=Tag(name='span')
t['class']=self.cls+['n']
t.string=self.text
return t
示例12: rebuild_rss
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def rebuild_rss(url, output, selectors, replace = None, pretty = False, raw = False):
source = feedparser.parse(url)
try:
soup = BeautifulSoup('<rss version="2.0" />', 'xml')
rss = soup.rss
has_lxml = True
except FeatureNotFound:
rss = BeautifulSoup('<rss version="2.0" />').rss
has_lxml = False
channel = Tag(name = 'channel')
rss.append(channel)
putback_elems(source.feed, channel_required, channel)
putback_elems(source.feed, channel_optional, channel)
build_date = Tag(name = 'lastBuildDate')
build_date.string = time.strftime('%a, %d %b %Y %H:%M:%S +0000', time.gmtime())
channel.append(build_date)
generator = Tag(name = 'generator')
generator.string = source.feed.generator + ' & RSS Rebuilder' if hasattr(source.feed, 'generator') else 'RSS Rebuilder'
channel.append(generator)
if replace:
regexp = re.compile(replace[0])
for entry in source.entries:
item = Tag(name = 'item')
channel.append(item)
putback_elems(entry, item_required, item)
putback_elems(entry, item_optional, item)
r = requests.get(entry.link)
html = r.content if raw else r.text
linked_html = BeautifulSoup(html, 'lxml') if has_lxml else BeautifulSoup(html)
content = ''
for selector in selectors:
tags = linked_html.select(selector)
if replace:
tags = replace_urls(tags, regexp, replace[1])
content = reduce(lambda s, tag: s + unicode(tag), tags, content)
desc = Tag(name = 'description')
desc.string = content
item.append(desc)
out_func = lambda x: (x.prettify() if pretty else unicode(x)).encode('utf-8')
if output == '-':
out_file = sys.stdout
close_file = lambda: None
else:
out_file = open(output, 'w')
close_file = out_file.close
if has_lxml:
out_file.write(out_func(soup))
else:
out_file.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
out_file.write(out_func(rss))
out_file.write('\n')
close_file()
示例13: new_tag
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import string [as 别名]
def new_tag(tag, string):
t = Tag(name=tag)
t.string = string
return t