本文整理汇总了Python中bs4.FeatureNotFound方法的典型用法代码示例。如果您正苦于以下问题:Python bs4.FeatureNotFound方法的具体用法?Python bs4.FeatureNotFound怎么用?Python bs4.FeatureNotFound使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4
的用法示例。
在下文中一共展示了bs4.FeatureNotFound方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import FeatureNotFound [as 别名]
def __init__(self, source_data, logger=None):
super().__init__(source_data)
if logger:
self.__logger = logger
else:
self.__logger = NullSourceLogger(None)
self.__table_id = None
if typepy.is_null_string(source_data):
raise DataError
try:
self.__soup = bs4.BeautifulSoup(self._source_data, "lxml")
except bs4.FeatureNotFound:
self.__soup = bs4.BeautifulSoup(self._source_data, "html.parser")
示例2: __init__
# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import FeatureNotFound [as 别名]
def __init__(self, markup, parsers, **kwargs):
# reject features
if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
raise ValueError('Features not allowed, only parser names')
# reject some kwargs
if 'features' in kwargs:
raise ValueError('Cannot use features kwarg')
if 'builder' in kwargs:
raise ValueError('Cannot use builder kwarg')
# pick the first parser available
for parser in parsers:
try:
super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
return
except FeatureNotFound:
pass
raise FeatureNotFound
示例3: test_backend_parsers
# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import FeatureNotFound [as 别名]
def test_backend_parsers():
"""
Make sure the user can specify which back-end parser to use
and that an error is raised if the parser is invalid.
"""
for parser in ('lxml', 'xml', 'html.parser', 'html5lib'):
try:
table = Table.read('data/html2.html', format='ascii.html',
htmldict={'parser': parser}, guess=False)
except FeatureNotFound:
if parser == 'html.parser':
raise
# otherwise ignore if the dependency isn't present
# reading should fail if the parser is invalid
with pytest.raises(FeatureNotFound):
Table.read('data/html2.html', format='ascii.html',
htmldict={'parser': 'foo'}, guess=False)
示例4: get_available_parsers
# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import FeatureNotFound [as 别名]
def get_available_parsers():
"""Return a list of parsers that can be used."""
available = []
for p in PARSERS:
try:
bs4.BeautifulSoup("", p)
except bs4.FeatureNotFound:
# Try the next parser
continue
else:
available.append(p)
return available
示例5: create_node
# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import FeatureNotFound [as 别名]
def create_node(self, html, loc, meta={}):
try:
soup = BeautifulSoup(html,'lxml', parse_only=self.only_text)
soup_title = BeautifulSoup(html,'lxml', parse_only=self.only_title)
except FeatureNotFound:
soup = BeautifulSoup(html,'html.parser', parse_only=self.only_text)
soup_title = BeautifulSoup(html,'html.parser', parse_only=self.only_title)
page_text = soup.find("div", {"id": "text"}).get_text(' ', strip=True).replace('\\(','').replace('\\)','').replace('\\[','').replace('\\]','').replace('$$','').replace('^','^')
# What happens if there is not a title.
if soup_title.title is not None:
page_title = '{0}'.format(soup_title.title.string)
else:
page_title = ''
# Should set default category?
if 'category' in meta:
page_category = meta['category']
else:
page_category = ''
if self.siteurl != '':
page_url = urljoin(self.siteurl, loc)
else:
page_url = loc
node = {'title': page_title,
'text': page_text,
'tags': page_category,
'loc': page_url}
self.json_nodes.append(node)