当前位置: 首页>>代码示例>>Python>>正文


Python SGMLParser.__init__方法代码示例

本文整理汇总了Python中sgmllib.SGMLParser.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser.__init__方法的具体用法?Python SGMLParser.__init__怎么用?Python SGMLParser.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sgmllib.SGMLParser的用法示例。


在下文中一共展示了SGMLParser.__init__方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self):
    SGMLParser.__init__(self, False)
    # gather data
    self.information = []
    # states
    self.document_id = 0
    self.date = 0
    self.ipc_data = 0
    self.usc_data = 0
    self.title = 0
    self.inventor = 0
    self.assignee = 0
    self.abstract = 0
    self.readable = 0
    # buffers
    self.docid_buffer = ''
    self.date_buffer = ''
    self.ipc_buffer = ''
    self.usc_buffer = ''
    self.title_buffer = ''
    self.inventor_buffer = ''
    self.assignee_buffer = ''
    self.abstract_buffer = '' 
开发者ID:hopped,项目名称:uspto-patents-parsing-tools,代码行数:25,代码来源:uspto-sgml-parser.py

示例2: __init__

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, tag="a", attr="href", unique=False, process_value=None,
                 strip=True, canonicalized=False):
        warnings.warn(
            "BaseSgmlLinkExtractor is deprecated and will be removed in future releases. "
            "Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning, stacklevel=2,
        )
        SGMLParser.__init__(self)
        self.scan_tag = tag if callable(tag) else lambda t: t == tag
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.process_value = (lambda v: v) if process_value is None else process_value
        self.current_link = None
        self.unique = unique
        self.strip = strip
        if canonicalized:
            self.link_key = lambda link: link.url
        else:
            self.link_key = lambda link: canonicalize_url(link.url,
                                                          keep_fragments=True) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:21,代码来源:sgml.py

示例3: __init__

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
        "Basic constructor."

        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
        if attrs is None:
            attrs = []
        elif isinstance(attrs, dict):
            attrs = attrs.items()
        self.attrs = attrs
        self.contents = []
        self.setup(parent, previous)
        self.hidden = False
        self.containsSubstitutions = False
        self.convertHTMLEntities = parser.convertHTMLEntities
        self.convertXMLEntities = parser.convertXMLEntities
        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities

        # Convert any HTML, XML, or numeric entities in the attribute values.
        convert = lambda(k, val): (k,
                                   re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
                                          self._convertEntities,
                                          val))
        self.attrs = map(convert, self.attrs) 
开发者ID:Autodesk,项目名称:arnold-usd,代码行数:30,代码来源:__init__.py

示例4: reset

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def reset(self):
        Tag.__init__(self, self, self.ROOT_TAG_NAME)
        self.hidden = 1
        SGMLParser.reset(self)
        self.currentData = []
        self.currentTag = None
        self.tagStack = []
        self.quoteStack = []
        self.pushTag(self) 
开发者ID:Autodesk,项目名称:arnold-usd,代码行数:11,代码来源:__init__.py

示例5: __init__

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
        "Basic constructor."

        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
        if attrs == None:
            attrs = []
        self.attrs = attrs
        self.contents = []
        self.setup(parent, previous)
        self.hidden = False
        self.containsSubstitutions = False
        self.convertHTMLEntities = parser.convertHTMLEntities
        self.convertXMLEntities = parser.convertXMLEntities
        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities

        # Convert any HTML, XML, or numeric entities in the attribute values.
        convert = lambda(k, val): (k,
                                   re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
                                          self._convertEntities,
                                          val))
        self.attrs = map(convert, self.attrs) 
开发者ID:skarlekar,项目名称:faces,代码行数:28,代码来源:_bsoup.py

示例6: __init__

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
        "Basic constructor."

        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
        if attrs is None:
            attrs = []
        self.attrs = attrs
        self.contents = []
        self.setup(parent, previous)
        self.hidden = False
        self.containsSubstitutions = False
        self.convertHTMLEntities = parser.convertHTMLEntities
        self.convertXMLEntities = parser.convertXMLEntities
        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities

        # Convert any HTML, XML, or numeric entities in the attribute values.
        convert = lambda(k, val): (k,
                                   re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
                                          self._convertEntities,
                                          val))
        self.attrs = map(convert, self.attrs) 
开发者ID:joxeankoret,项目名称:nightmare,代码行数:28,代码来源:BeautifulSoup.py


注:本文中的sgmllib.SGMLParser.__init__方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。