本文整理汇总了Python中bs4.Tag.find_all方法的典型用法代码示例。如果您正苦于以下问题:Python Tag.find_all方法的具体用法?Python Tag.find_all怎么用?Python Tag.find_all使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.Tag
的用法示例。
在下文中一共展示了Tag.find_all方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_phone_data
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import find_all [as 别名]
def get_phone_data(table_tag: Tag) -> []:
"""
从页面载入品牌和机型
:param table_tag: 表格内容
:return: [品牌,标签]
"""
data = []
if table_tag is not None:
tr_list = table_tag.find_all('tr')
tr_index = 0
for tr in tr_list:
# 品牌
if tr_index == 1:
td = tr.find('td')
if td is not None:
text = str(td.get_text())
brand = text.replace('IMEIdb.com免费查询', '').replace('IMEIdb', '').strip()
data.append(brand)
# 机型
if tr_index == 2:
td = tr.find('td')
if td is not None:
text = str(td.get_text())
model = text.replace('IMEIdb.com免费查询', '').replace('IMEIdb', '').strip()
data.append(model)
tr_index = tr_index + 1
return data
示例2: download_images
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import find_all [as 别名]
def download_images(post: Tag) -> None:
image_links = [LINK_PREFIX + link.attrs.get('href') for link in post.find_all(href=re.compile('/galleri/visabild'))]
for url in image_links:
image_id = re.search(r'id=([0-9]+)[&]', url).group(1)
print(' Downloading image: ' + IMG_URL.format(image_id))
r = requests.get(IMG_URL.format(image_id), stream=True)
with open(image_id + '.jpg', 'wb') as fd:
for chunk in r.iter_content(128):
fd.write(chunk)
示例3: parse_match
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import find_all [as 别名]
def parse_match(match: Tag) -> dict:
"""The following code is objectively shit, and prone to breaking. I blame IQDB's horrendous HTML."""
rows = match.find_all('tr')
link = rows[1].td.a.get("href")
thumb = rows[1].td.img.get("src")
size = tuple(int(n) for n in rows[3].td.get_text().split(' ')[0].split('×'))
similarity = int(rows[4].get_text().split('%')[0])
return {"link": link,
"thumb": thumb,
"size": size,
"similarity": similarity}
示例4: autoid_elements
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import find_all [as 别名]
def autoid_elements(soup: Tag) -> Tag:
"""
Add an id to all definition term and headers based on their contents.
For example, there is no *nice* way to let Pandoc still parse markdown
inside the following div:
<div class="slide">
<h1>Foo</h1>
[…]
</div>
"""
for element in soup.find_all(_ELEMENT_OF_INTEREST):
if 'id' not in element.attrs:
element.attrs['id'] = xfrm_ids(str(element.string))
return soup
示例5: _extract_hit
# 需要导入模块: from bs4 import Tag [as 别名]
# 或者: from bs4.Tag import find_all [as 别名]
def _extract_hit(tag: Tag):
meta_doc = Dict()
meta_doc.url = tag['url']
meta_doc.url_hash = hash_url(tag['url'])
to_datetime = lambda x: datetime.strptime(x, "%m/%d/%Y %H:%M:%S")
def _parse_meta(meta_tag: Tag):
nonlocal meta_doc
if meta_tag['name'] == 'lastmodifieddate':
meta_doc.features[meta_tag['name']] = [to_datetime(
meta_tag.text.strip()
)]
else:
meta_doc.features[meta_tag['name']] = [meta_tag.text]
[_parse_meta(meta) for meta in tag.find_all("Meta")]
return meta_doc.to_dict()