本文整理汇总了Python中html.parser.HTMLParser.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.__init__方法的具体用法?Python HTMLParser.__init__怎么用?Python HTMLParser.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.parser.HTMLParser
的用法示例。
在下文中一共展示了HTMLParser.__init__方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(
self,
decode_html_entities=False,
data_separator=' ',
):
HTMLParser.__init__(self, convert_charrefs=decode_html_entities)
self._data_separator = data_separator
self._in_td = False
self._in_th = False
self._current_table = []
self._current_row = []
self._current_cell = []
self.tables = []
示例2: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, start_tag, start_attr, end_tag, end_attr):
"""使用相当简单
html = '<div class="declare" id="J-declare">声明:百科词条人人可编辑。<a class="declare-details"></a>'
p = ParseHtml('div', ['class="declare"'], 'a', ['class="declare-details"'])
p.feed(html)
print(p.get_data()) # 声明:百科词条人人可编辑。
:param start_tag: 开始标签,必须填写
:param start_attr: 开始标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
:param end_tag: 结束标签,必须填写
:param end_attr: 结束标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
"""
HTMLParser.__init__(self)
self._data = ''
self._flag = False
self._start_tag = start_tag
self._start_attr = self._split(start_attr)
self._end_tag = end_tag
self._end_attr = self._split(end_attr)
示例3: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, *args, **kwargs):
'''
__init__ - Create an AdvancedTagSlim object.
@see AdvancedTag
Extra arguments:
slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags,
i.e. <br/> instead of <br />
This may break xhtml compatibility but modern browsers are okay with it.
'''
if 'slimSelfClosing' in kwargs:
slimSelfClosing = kwargs.pop('slimSelfClosing')
else:
slimSelfClosing = False
AdvancedTag.__init__(self, *args, **kwargs)
object.__setattr__(self, 'slimSelfClosing', slimSelfClosing)
示例4: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, data):
"""
The data holds the characters.
Example:
html = Html()
data = '<body><em>alpha</em></body>'
dom = html.feed(data)
x = dom.fst('em')
x.append(Data('\nbeta'))
It outputs.
<body ><em >alpha
beta</em></body>
"""
Root.__init__(self, DATA)
self.data = data
示例5: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, obj_type, results, heading, metadata):
"""Init our data structure.
Args:
obj_type: String object type name (as returned by
device_type)
results: A list of Result objects to include in the
report.
heading: String heading describing the report.
metadata: Dictionary of other data you want to output.
key: Heading name.
val Another dictionary, with:
key: Subheading name.
val: String of data to print.
"""
self.obj_type = obj_type
self.results = results
self.heading = heading
self.metadata = metadata
示例6: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, *args):
super(EmailAlerter, self).__init__(*args)
self.smtp_host = self.rule.get('smtp_host', 'localhost')
self.smtp_ssl = self.rule.get('smtp_ssl', False)
self.from_addr = self.rule.get('from_addr', 'ElastAlert')
self.smtp_port = self.rule.get('smtp_port')
if self.rule.get('smtp_auth_file'):
self.get_account(self.rule['smtp_auth_file'])
self.smtp_key_file = self.rule.get('smtp_key_file')
self.smtp_cert_file = self.rule.get('smtp_cert_file')
# Convert email to a list if it isn't already
if isinstance(self.rule['email'], str):
self.rule['email'] = [self.rule['email']]
# If there is a cc then also convert it a list if it isn't
cc = self.rule.get('cc')
if cc and isinstance(cc, str):
self.rule['cc'] = [self.rule['cc']]
# If there is a bcc then also convert it to a list if it isn't
bcc = self.rule.get('bcc')
if bcc and isinstance(bcc, str):
self.rule['bcc'] = [self.rule['bcc']]
add_suffix = self.rule.get('email_add_domain')
if add_suffix and not add_suffix.startswith('@'):
self.rule['email_add_domain'] = '@' + add_suffix
示例7: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, fileepub):
self.path = os.path.abspath(fileepub)
self.file = zipfile.ZipFile(fileepub, "r")
cont = ET.parse(self.file.open("META-INF/container.xml"))
self.rootfile = cont.find(
"CONT:rootfiles/CONT:rootfile",
self.NS
).attrib["full-path"]
self.rootdir = os.path.dirname(self.rootfile)\
+ "/" if os.path.dirname(self.rootfile) != "" else ""
cont = ET.parse(self.file.open(self.rootfile))
# EPUB3
self.version = cont.getroot().get("version")
if self.version == "2.0":
# self.toc = self.rootdir + cont.find("OPF:manifest/*[@id='ncx']", self.NS).get("href")
self.toc = self.rootdir\
+ cont.find(
"OPF:manifest/*[@media-type='application/x-dtbncx+xml']",
self.NS
).get("href")
elif self.version == "3.0":
self.toc = self.rootdir\
+ cont.find(
"OPF:manifest/*[@properties='nav']",
self.NS
).get("href")
self.contents = []
self.toc_entries = []
示例8: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
HTMLParser.__init__(self)
示例9: get_links
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(html):
class URLSeeker(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.urls = []
def handle_starttag(self, tag, attrs):
href = dict(attrs).get('href')
if href and tag == 'a':
self.urls.append(href)
url_seeker = URLSeeker()
url_seeker.feed(html)
return url_seeker.urls
示例10: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
HTMLParser.__init__(self)
self._data_flag = ''
self.desc = ''
self._desc_flag = False
self.info_name = []
self.info_value = []
self._info_name = False
self._info_value = False
self.info = {}
示例11: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
HTMLParser.__init__(self)
self.weather_7d = False
self.name = False
self.addr = ''
self.data_7d = []
self.string = ''
示例12: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, base_url, concurrency=10):
self.q = queues.Queue()
self.q2 = queues.Queue()
self.start = time.time()
self.fetching = set()
self.fetched = set()
self.base_url = base_url
self.concurrency = concurrency
self.i = 0
示例13: get_links
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(self, html):
class URLSeeker(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.urls = []
def handle_starttag(self, tag, attrs):
href = dict(attrs).get('href')
if href and tag == 'a':
self.urls.append(href)
url_seeker = URLSeeker()
url_seeker.feed(html)
return url_seeker.urls
示例14: get_links
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(html):#解析HTML
class URLSeeker(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.urls = []
def handle_starttag(self, tag, attrs):
href = dict(attrs).get('href')
if href and tag == 'a':
self.urls.append(href)
url_seeker = URLSeeker()
url_seeker.feed(html)
return url_seeker.urls
示例15: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, v, typ=None):
self.type = typ
self.value = v.strip("\n\r\t ")