本文整理汇总了Python中whoosh.analysis.Token.feature方法的典型用法代码示例。如果您正苦于以下问题:Python Token.feature方法的具体用法?Python Token.feature怎么用?Python Token.feature使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.analysis.Token
的用法示例。
在下文中一共展示了Token.feature方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __call__
# 需要导入模块: from whoosh.analysis import Token [as 别名]
# 或者: from whoosh.analysis.Token import feature [as 别名]
def __call__(self, value, positions=False, chars=False,
keeporiginal=False, removestops=True,
start_pos=0, start_char=0,
tokenize=True, mode='', **kwargs):
assert isinstance(value, text_type), "%r is not unicode" % value
t = Token(positions, chars, removestops=removestops, mode=mode)
if not tokenize:
t.original = t.text = value
t.boost = 1.0
if positions:
t.pos = start_pos
if chars:
t.startchar = start_char
t.endchar = start_char + len(value)
yield t
else:
pos = start_pos
for m in self.tagger.parse(value):
t.text = m.surface
t.feature = m.feature
# TODO: use base form.
t.boost = 1.0
if keeporiginal:
t.original = t.text
t.stopped = False
if positions:
t.pos = pos
pos += 1
if chars:
t.startchar = start_char + m.start
t.endchar = t.startchar + len(m.surface)
yield t
示例2: __call__
# 需要导入模块: from whoosh.analysis import Token [as 别名]
# 或者: from whoosh.analysis.Token import feature [as 别名]
def __call__(self, value, positions=False, chars=False,
keeporiginal=False, removestops=True,
start_pos=0, start_char=0,
tokenize=True, mode='', **kwargs):
assert isinstance(value, text_type), "%r is not unicode" % value
t = Token(positions, chars, removestops=removestops, mode=mode)
if not tokenize:
t.original = t.text = value
t.boost = 1.0
if positions:
t.pos = start_pos
if chars:
t.startchar = start_char
t.endchar = start_char + len(value)
yield t
else:
pos = start_pos
offset = start_char
byte_offset = 0
# TODO: support other encodings
byte = value.encode('utf-8')
m = self.tagger.parseToNode(byte)
while m:
if len(m.surface) == 0:
m = m.next
continue
t.text = m.surface.decode('utf-8')
t.feature = m.feature
# TODO: use base form.
t.boost = 1.0
if keeporiginal:
t.original = t.text
t.stopped = False
if positions:
t.pos = pos
pos += 1
if chars:
s = byte_offset + m.rlength - m.length
e = s + m.length
t.startchar = offset + \
len(byte[byte_offset:s].decode('utf-8'))
t.endchar = t.startchar + len(byte[s:e].decode('utf-8'))
offset = t.endchar
byte_offset = e
m = m.next
yield t