本文整理汇总了Python中bs4.element.Tag方法的典型用法代码示例。如果您正苦于以下问题:Python element.Tag方法的具体用法?Python element.Tag怎么用?Python element.Tag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.element
的用法示例。
在下文中一共展示了element.Tag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_build_notes
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def parse_build_notes(h: Tag) -> None:
entries = []
for n in h.next_elements:
if isinstance(n, Tag) and n.name == 'p':
if 'posted-in' in n.attrs.get('class', []):
break
if n.text:
entries.append(n.text)
embed = {
'title': 'MTGO Build Notes',
'type': 'rich',
'description': '\n'.join(entries),
'url': fetcher.find_announcements()[0],
}
if configuration.get_optional_str('bugs_webhook_id') is not None:
fetch_tools.post_discord_webhook(
configuration.get_str('bugs_webhook_id'),
configuration.get_str('bugs_webhook_token'),
embeds=[embed],
username='Magic Online Announcements',
avatar_url='https://magic.wizards.com/sites/mtg/files/styles/auth_small/public/images/person/wizards_authorpic_larger.jpg'
)
示例2: parse_changelog
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def parse_changelog(collapsible_block: Tag) -> None:
# They never show Fixed bugs in the Bug Blog anymore. Fixed bugs are now listed on the Build Notes section of MTGO weekly announcements.
# This is frustrating.
for added in collapsible_block.find_all('ul'):
for item in added.find_all('li'):
print(item)
bbt = strings.remove_smartquotes(item.get_text())
issue = find_issue_by_code(bbt)
if issue is not None:
if not repo.is_issue_from_bug_blog(issue):
print('Adding Bug Blog to labels')
issue.add_to_labels('From Bug Blog')
elif find_issue_by_name(bbt):
print('Already exists.')
else:
print('Creating new issue')
text = 'From Bug Blog.\nBug Blog Text: {0}'.format(bbt)
repo.get_repo().create_issue(bbt, body=strings.remove_smartquotes(text), labels=['From Bug Blog'])
示例3: check_if_removed_from_bugblog
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def check_if_removed_from_bugblog(bbt: Match, b: Tag, issue: Issue) -> None:
if bbt is not None:
text = strings.remove_smartquotes(bbt.group(1).strip())
for row in b.find_all('tr'):
data = row.find_all('td')
rowtext = strings.remove_smartquotes(data[1].text.strip())
if rowtext == text:
break
if strip_squarebrackets(rowtext) == strip_squarebrackets(text):
# Fix this
print("Issue #{id}'s bug blog text has differing autocard notation.".format(id=issue.number))
old_bbt = strings.get_body_field(issue.body, 'Bug Blog Text')
body = re.sub(BBT_REGEX, 'Bug Blog Text: {0}'.format(rowtext), issue.body, flags=re.MULTILINE)
new_bbt = strings.get_body_field(body, 'Bug Blog Text')
issue.edit(body=body)
print('Updated to `{0}`'.format(rowtext))
issue.create_comment(f'Changed bug blog text from `{old_bbt}` to `{new_bbt}`')
break
else:
print('{id} is fixed!'.format(id=issue.number))
repo.create_comment(issue, 'This bug has been removed from the bug blog!')
issue.edit(state='closed')
示例4: get_tag_from_bs
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def get_tag_from_bs(cls, soup):
from bs4 import BeautifulSoup as bs
from bs4.element import Tag as bs_tag
father = None
if isinstance(soup, bs):
father = soup.find()
elif isinstance(soup, bs_tag):
father = soup
if not father or not father.name:
return None
tag = cls(father.name, father.text, father.attrs)
for c in father.children:
c_tag = cls.get_tag_from_bs(c)
tag.append(c_tag)
return tag
示例5: ensure_soup
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def ensure_soup(value, parser=None):
"""Coerce a value (or list of values) to Tag (or list of Tag).
:param value: String, BeautifulSoup, Tag, or list of the above
:param str parser: Parser to use; defaults to BeautifulSoup default
:return: Tag or list of Tags
"""
if isinstance(value, BeautifulSoup):
return value.find()
if isinstance(value, Tag):
return value
if isinstance(value, list):
return [
ensure_soup(item, parser=parser)
for item in value
]
parsed = BeautifulSoup(value, features=parser)
return parsed.find()
示例6: decrypt_woff_tag
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def decrypt_woff_tag(tag,TTGlyphs,d_list):
contents = tag.contents
_ = []
while contents:
i = contents.pop(0)
if isinstance(i, Tag):
if i.name in decrypt_tags:
text = dec(i.text)
for index,name in enumerate(TTGlyphs):
if text in name:
i = d_list[index]
else:
continue
if not isinstance(i, str):
continue
_.append(i)
return ''.join(_)
示例7: _match_end_tag
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def _match_end_tag(tag: Tag) -> bool:
"""Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
for attr in SEARCH_END_TAG_ATTRS:
if attr in tag.get("class", ()):
return True
return tag.name == "table"
示例8: parse_article_item_extended
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def parse_article_item_extended(a: Tag) -> Tuple[Tag, str]:
title = a.find_all('h3')[0]
link = 'http://magic.wizards.com' + a.find_all('a')[0]['href']
return (title, link)
示例9: get_article_archive
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def get_article_archive() -> List[Tuple[Tag, str]]:
try:
html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/184956')
except fetch_tools.FetchException:
html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/')
soup = BeautifulSoup(html, 'html.parser')
return [parse_article_item_extended(a) for a in soup.find_all('div', class_='article-item-extended')]
示例10: parse_header
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def parse_header(h: Tag) -> None:
txt = h.text
if txt.startswith('Downtime'):
parse_downtimes(h)
elif txt.startswith('Build Notes'):
parse_build_notes(h)
示例11: parse_downtimes
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def parse_downtimes(h: Tag) -> None:
for n in h.next_elements:
if isinstance(n, Tag) and n.text:
with open('downtimes.txt', 'w', encoding='utf-8') as f:
txt = n.text.strip()
txt = txt.replace("Please note that there are no more 'extended' or 'normal' downtimes; in the new world with fewer downtimes, they're all the same length of time.", '')
print(txt)
f.write(txt)
break
示例12: check_for_missing_bugs
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def check_for_missing_bugs(b: Tag) -> None:
for row in b.find_all('tr'):
data = row.find_all('td')
row_text = data[1].text.strip()
if row_text == 'Description':
# BS4 is bad.
continue
issue = find_issue_by_code(row_text)
if issue:
labels = [c.name for c in issue.labels]
categories = [c for c in labels if c in strings.METACATS]
if categories:
continue
bbcat = re.match(strings.REGEX_BBCAT, data[2].text.strip())
if bbcat is None:
continue
g1 = bbcat.group(1).strip()
if g1 in strings.METACATS:
issue.add_to_labels(g1)
continue
if bbcat.group(2) is not None:
g2 = bbcat.group(2).strip()
if g2 in strings.METACATS:
issue.add_to_labels(g2)
continue
print(f'Unknown BBCat: {bbcat.group(0)}')
continue
print('Could not find issue for `{row}`'.format(row=row_text))
text = 'From Bug Blog.\nBug Blog Text: {0}'.format(row_text)
repo.get_repo().create_issue(strings.remove_smartquotes(row_text), body=strings.remove_smartquotes(text), labels=['From Bug Blog'])
示例13: find_bbt_in_issue_title
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def find_bbt_in_issue_title(issue: Issue, known_issues: Tag) -> None:
title = strip_squarebrackets(issue.title).replace(' ', '')
for row in known_issues.find_all('tr'):
data = row.find_all('td')
row_text = strip_squarebrackets(data[1].text.strip()).replace(' ', '')
if row_text == title:
body = issue.body
body += '\nBug Blog Text: {0}'.format(data[1].text.strip())
if body != issue.body:
issue.edit(body=body)
return
示例14: _coordinates_from_HTML
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def _coordinates_from_HTML(
self, page: Tag, page_num: int
) -> Tuple[
List[PdfWord], Dict[PdfWordId, Bbox],
]:
pdf_word_list: List[PdfWord] = []
coordinate_map: Dict[PdfWordId, Bbox] = {}
block_coordinates: Dict[PdfWordId, Tuple[int, int]] = {}
blocks = page.find_all("block")
i = 0 # counter for word_id in page_num
for block in blocks:
x_min_block = int(float(block.get("xmin")))
y_min_block = int(float(block.get("ymin")))
lines = block.find_all("line")
for line in lines:
y_min_line = int(float(line.get("ymin")))
y_max_line = int(float(line.get("ymax")))
words = line.find_all("word")
for word in words:
xmin = int(float(word.get("xmin")))
xmax = int(float(word.get("xmax")))
for content in self.separators.split(word.getText()):
if len(content) > 0: # Ignore empty characters
word_id: PdfWordId = (page_num, i)
pdf_word_list.append((word_id, content))
coordinate_map[word_id] = Bbox(
page_num, y_min_line, y_max_line, xmin, xmax,
)
block_coordinates[word_id] = (y_min_block, x_min_block)
i += 1
# sort pdf_word_list by page, block top then block left, top, then left
pdf_word_list = sorted(
pdf_word_list,
key=lambda word_id__: block_coordinates[word_id__[0]]
+ (coordinate_map[word_id__[0]].top, coordinate_map[word_id__[0]].left),
)
return pdf_word_list, coordinate_map
示例15: lowercase_attr_names
# 需要导入模块: from bs4 import element [as 别名]
# 或者: from bs4.element import Tag [as 别名]
def lowercase_attr_names(tag):
"""Lower-case all attribute names of the provided BeautifulSoup tag.
Note: this mutates the tag's attribute names and does not return a new
tag.
:param Tag: BeautifulSoup tag
"""
# Use list comprehension instead of dict comprehension for 2.6 support
tag.attrs = dict([
(key.lower(), value)
for key, value in iteritems(tag.attrs)
])