当前位置: 首页>>代码示例>>Python>>正文


Python HTMLParser.__init__方法代码示例

本文整理汇总了Python中html.parser.HTMLParser.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.__init__方法的具体用法?Python HTMLParser.__init__怎么用?Python HTMLParser.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.parser.HTMLParser的用法示例。


在下文中一共展示了HTMLParser.__init__方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(
        self,
        decode_html_entities=False,
        data_separator=' ',
    ):

        HTMLParser.__init__(self, convert_charrefs=decode_html_entities)

        self._data_separator = data_separator

        self._in_td = False
        self._in_th = False
        self._current_table = []
        self._current_row = []
        self._current_cell = []
        self.tables = [] 
开发者ID:schmijos,项目名称:html-table-parser-python3,代码行数:18,代码来源:parser.py

示例2: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, start_tag, start_attr, end_tag, end_attr):
        """使用相当简单

        html = '<div class="declare" id="J-declare">声明:百科词条人人可编辑。<a class="declare-details"></a>'
        p = ParseHtml('div', ['class="declare"'], 'a', ['class="declare-details"'])
        p.feed(html)
        print(p.get_data())  # 声明:百科词条人人可编辑。

        :param start_tag: 开始标签,必须填写
        :param start_attr: 开始标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
        :param end_tag: 结束标签,必须填写
        :param end_attr: 结束标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
        """
        HTMLParser.__init__(self)
        self._data = ''
        self._flag = False
        self._start_tag = start_tag
        self._start_attr = self._split(start_attr)
        self._end_tag = end_tag
        self._end_attr = self._split(end_attr) 
开发者ID:jtyoui,项目名称:Jtyoui,代码行数:22,代码来源:HTML.py

示例3: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, *args, **kwargs):
        '''
            __init__ - Create an AdvancedTagSlim object.

                @see AdvancedTag

                Extra arguments:

                  slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags,

                    i.e. <br/> instead of <br />

                    This may break xhtml compatibility but modern browsers are okay with it.
        '''
        if 'slimSelfClosing' in kwargs:
            slimSelfClosing = kwargs.pop('slimSelfClosing')
        else:
            slimSelfClosing = False
        AdvancedTag.__init__(self, *args, **kwargs)

        object.__setattr__(self, 'slimSelfClosing', slimSelfClosing) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:23,代码来源:Formatter.py

示例4: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data 
开发者ID:iogf,项目名称:ehp,代码行数:22,代码来源:ehp.py

示例5: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, obj_type, results, heading, metadata):
        """Init our data structure.

        Args:
            obj_type: String object type name (as returned by
                device_type)
            results: A list of Result objects to include in the
                report.
            heading: String heading describing the report.
            metadata: Dictionary of other data you want to output.
                key: Heading name.
                val Another dictionary, with:
                    key: Subheading name.
                    val: String of data to print.
        """
        self.obj_type = obj_type
        self.results = results
        self.heading = heading
        self.metadata = metadata 
开发者ID:jssimporter,项目名称:Spruce,代码行数:21,代码来源:spruce.py

示例6: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, *args):
        super(EmailAlerter, self).__init__(*args)

        self.smtp_host = self.rule.get('smtp_host', 'localhost')
        self.smtp_ssl = self.rule.get('smtp_ssl', False)
        self.from_addr = self.rule.get('from_addr', 'ElastAlert')
        self.smtp_port = self.rule.get('smtp_port')
        if self.rule.get('smtp_auth_file'):
            self.get_account(self.rule['smtp_auth_file'])
        self.smtp_key_file = self.rule.get('smtp_key_file')
        self.smtp_cert_file = self.rule.get('smtp_cert_file')
        # Convert email to a list if it isn't already
        if isinstance(self.rule['email'], str):
            self.rule['email'] = [self.rule['email']]
        # If there is a cc then also convert it a list if it isn't
        cc = self.rule.get('cc')
        if cc and isinstance(cc, str):
            self.rule['cc'] = [self.rule['cc']]
        # If there is a bcc then also convert it to a list if it isn't
        bcc = self.rule.get('bcc')
        if bcc and isinstance(bcc, str):
            self.rule['bcc'] = [self.rule['bcc']]
        add_suffix = self.rule.get('email_add_domain')
        if add_suffix and not add_suffix.startswith('@'):
            self.rule['email_add_domain'] = '@' + add_suffix 
开发者ID:Yelp,项目名称:elastalert,代码行数:27,代码来源:alerts.py

示例7: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, fileepub):
        self.path = os.path.abspath(fileepub)
        self.file = zipfile.ZipFile(fileepub, "r")
        cont = ET.parse(self.file.open("META-INF/container.xml"))
        self.rootfile = cont.find(
            "CONT:rootfiles/CONT:rootfile",
            self.NS
        ).attrib["full-path"]
        self.rootdir = os.path.dirname(self.rootfile)\
            + "/" if os.path.dirname(self.rootfile) != "" else ""
        cont = ET.parse(self.file.open(self.rootfile))
        # EPUB3
        self.version = cont.getroot().get("version")
        if self.version == "2.0":
            # self.toc = self.rootdir + cont.find("OPF:manifest/*[@id='ncx']", self.NS).get("href")
            self.toc = self.rootdir\
                + cont.find(
                    "OPF:manifest/*[@media-type='application/x-dtbncx+xml']",
                    self.NS
                ).get("href")
        elif self.version == "3.0":
            self.toc = self.rootdir\
                + cont.find(
                    "OPF:manifest/*[@properties='nav']",
                    self.NS
                ).get("href")

        self.contents = []
        self.toc_entries = [] 
开发者ID:wustho,项目名称:epr,代码行数:31,代码来源:epr.py

示例8: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
        HTMLParser.__init__(self) 
开发者ID:graalvm,项目名称:mx,代码行数:4,代码来源:mx_mxtests.py

示例9: get_links

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(html):
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls 
开发者ID:tao12345666333,项目名称:tornado-zh,代码行数:16,代码来源:webspider.py

示例10: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
        HTMLParser.__init__(self)
        self._data_flag = ''
        self.desc = ''
        self._desc_flag = False
        self.info_name = []
        self.info_value = []
        self._info_name = False
        self._info_value = False
        self.info = {} 
开发者ID:jtyoui,项目名称:Jtyoui,代码行数:12,代码来源:Encyclopedias.py

示例11: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self):
        HTMLParser.__init__(self)
        self.weather_7d = False
        self.name = False
        self.addr = ''
        self.data_7d = []
        self.string = '' 
开发者ID:jtyoui,项目名称:Jtyoui,代码行数:9,代码来源:WeatherAddress.py

示例12: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, base_url, concurrency=10):
        self.q = queues.Queue()
        self.q2 = queues.Queue()
        self.start = time.time()
        self.fetching = set()
        self.fetched = set()
        self.base_url = base_url
        self.concurrency = concurrency
        self.i = 0 
开发者ID:makelove,项目名称:Python_Master_Courses,代码行数:11,代码来源:tornado-crawler-demo2.py

示例13: get_links

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(self, html):
        class URLSeeker(HTMLParser):
            def __init__(self):
                HTMLParser.__init__(self)
                self.urls = []

            def handle_starttag(self, tag, attrs):
                href = dict(attrs).get('href')
                if href and tag == 'a':
                    self.urls.append(href)

        url_seeker = URLSeeker()
        url_seeker.feed(html)
        return url_seeker.urls 
开发者ID:makelove,项目名称:Python_Master_Courses,代码行数:16,代码来源:tornado-crawler-demo2.py

示例14: get_links

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def get_links(html):#解析HTML
    class URLSeeker(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls = []

        def handle_starttag(self, tag, attrs):
            href = dict(attrs).get('href')
            if href and tag == 'a':
                self.urls.append(href)

    url_seeker = URLSeeker()
    url_seeker.feed(html)
    return url_seeker.urls 
开发者ID:makelove,项目名称:Python_Master_Courses,代码行数:16,代码来源:tornado-crawler-demo1.py

示例15: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import __init__ [as 别名]
def __init__(self, v, typ=None):
        self.type = typ
        self.value = v.strip("\n\r\t ") 
开发者ID:manatlan,项目名称:vbuild,代码行数:5,代码来源:__init__.py


注:本文中的html.parser.HTMLParser.__init__方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。