本文整理汇总了Python中sgmllib.SGMLParser.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser.__init__方法的具体用法?Python SGMLParser.__init__怎么用?Python SGMLParser.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sgmllib.SGMLParser
的用法示例。
在下文中一共展示了SGMLParser.__init__方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self):
SGMLParser.__init__(self, False)
# gather data
self.information = []
# states
self.document_id = 0
self.date = 0
self.ipc_data = 0
self.usc_data = 0
self.title = 0
self.inventor = 0
self.assignee = 0
self.abstract = 0
self.readable = 0
# buffers
self.docid_buffer = ''
self.date_buffer = ''
self.ipc_buffer = ''
self.usc_buffer = ''
self.title_buffer = ''
self.inventor_buffer = ''
self.assignee_buffer = ''
self.abstract_buffer = ''
示例2: __init__
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, tag="a", attr="href", unique=False, process_value=None,
strip=True, canonicalized=False):
warnings.warn(
"BaseSgmlLinkExtractor is deprecated and will be removed in future releases. "
"Please use scrapy.linkextractors.LinkExtractor",
ScrapyDeprecationWarning, stacklevel=2,
)
SGMLParser.__init__(self)
self.scan_tag = tag if callable(tag) else lambda t: t == tag
self.scan_attr = attr if callable(attr) else lambda a: a == attr
self.process_value = (lambda v: v) if process_value is None else process_value
self.current_link = None
self.unique = unique
self.strip = strip
if canonicalized:
self.link_key = lambda link: link.url
else:
self.link_key = lambda link: canonicalize_url(link.url,
keep_fragments=True)
示例3: __init__
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
previous=None):
"Basic constructor."
# We don't actually store the parser object: that lets extracted
# chunks be garbage-collected
self.parserClass = parser.__class__
self.isSelfClosing = parser.isSelfClosingTag(name)
self.name = name
if attrs is None:
attrs = []
elif isinstance(attrs, dict):
attrs = attrs.items()
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
self.hidden = False
self.containsSubstitutions = False
self.convertHTMLEntities = parser.convertHTMLEntities
self.convertXMLEntities = parser.convertXMLEntities
self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
# Convert any HTML, XML, or numeric entities in the attribute values.
convert = lambda(k, val): (k,
re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
self._convertEntities,
val))
self.attrs = map(convert, self.attrs)
示例4: reset
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def reset(self):
Tag.__init__(self, self, self.ROOT_TAG_NAME)
self.hidden = 1
SGMLParser.reset(self)
self.currentData = []
self.currentTag = None
self.tagStack = []
self.quoteStack = []
self.pushTag(self)
示例5: __init__
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
previous=None):
"Basic constructor."
# We don't actually store the parser object: that lets extracted
# chunks be garbage-collected
self.parserClass = parser.__class__
self.isSelfClosing = parser.isSelfClosingTag(name)
self.name = name
if attrs == None:
attrs = []
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
self.hidden = False
self.containsSubstitutions = False
self.convertHTMLEntities = parser.convertHTMLEntities
self.convertXMLEntities = parser.convertXMLEntities
self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
# Convert any HTML, XML, or numeric entities in the attribute values.
convert = lambda(k, val): (k,
re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
self._convertEntities,
val))
self.attrs = map(convert, self.attrs)
示例6: __init__
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import __init__ [as 别名]
def __init__(self, parser, name, attrs=None, parent=None,
previous=None):
"Basic constructor."
# We don't actually store the parser object: that lets extracted
# chunks be garbage-collected
self.parserClass = parser.__class__
self.isSelfClosing = parser.isSelfClosingTag(name)
self.name = name
if attrs is None:
attrs = []
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
self.hidden = False
self.containsSubstitutions = False
self.convertHTMLEntities = parser.convertHTMLEntities
self.convertXMLEntities = parser.convertXMLEntities
self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
# Convert any HTML, XML, or numeric entities in the attribute values.
convert = lambda(k, val): (k,
re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
self._convertEntities,
val))
self.attrs = map(convert, self.attrs)