本文整理汇总了Python中pygtrie.CharTrie方法的典型用法代码示例。如果您正苦于以下问题:Python pygtrie.CharTrie方法的具体用法?Python pygtrie.CharTrie怎么用?Python pygtrie.CharTrie使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pygtrie
的用法示例。
在下文中一共展示了pygtrie.CharTrie方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_traverse_ignore_subtrie
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def test_traverse_ignore_subtrie(self):
t = pygtrie.CharTrie()
t.update({'aaa': 1, 'aab': 2, 'aac': 3, 'b': 4})
cnt = [0]
def make(path_conv, path, children, value=self._SENTINEL):
cnt[0] += 1
if path and path[0] == 'a':
return None
else:
children = [ch for ch in children if ch is not None]
return self._TestNode(path_conv(path), children, value)
r = t.traverse(make)
# Result:
# <>
# b:4
self.assertNode(r, '', 1)
self.assertNode(r.children[0], 'b', 0, 4)
self.assertEqual(3, cnt[0])
示例2: initialize_trie
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def initialize_trie(self) -> None:
self.trie = pygtrie.CharTrie()
with self.ix.reader() as reader:
for doc in reader.iter_docs():
self.trie[list(WhooshConstants.normalized_analyzer(doc[1]['name']))[0].text] = doc[1]['canonical_name']
示例3: test_traverse_empty_tree
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def test_traverse_empty_tree(self):
t = pygtrie.CharTrie()
r = t.traverse(self._make_test_node)
self.assertNode(r, '', 0)
示例4: test_traverse_singleton_tree
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def test_traverse_singleton_tree(self):
t = pygtrie.CharTrie()
t.update({'a': 10})
r = t.traverse(self._make_test_node)
self.assertNode(r, '', 1)
self.assertNode(r.children[0], 'a', 0, 10)
示例5: test_traverse
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def test_traverse(self):
t = pygtrie.CharTrie()
t.update({'aaa': 1, 'aab': 2, 'aac': 3, 'bb': 4})
r = t.traverse(self._make_test_node)
# Result:
# <>
# a
# aa
# aaa:1
# aab:2
# aac:3
# b
# bb:4
self.assertNode(r, '', 2)
# For some reason pylint thinks a_node et al. are strings.
# pylint: disable=no-member
a_node = self.assertNode(r.children[0], 'a', 1)
aa_node = self.assertNode(a_node.children[0], 'aa', 3)
self.assertNode(aa_node.children[0], 'aaa', 0, 1)
self.assertNode(aa_node.children[2], 'aac', 0, 3)
b_node = self.assertNode(r.children[1], 'b', 1)
self.assertNode(b_node.children[0], 'bb', 0, 4)
示例6: test_traverse_compressing
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def test_traverse_compressing(self):
t = pygtrie.CharTrie()
t.update({'aaa': 1, 'aab': 2, 'aac': 3, 'bb': 4})
def make(path_conv, path, children, value=self._SENTINEL):
children = sorted(children)
if value is self._SENTINEL and len(children) == 1:
# There is only one prefix.
return children[0]
else:
return self._TestNode(path_conv(path), children, value)
r = t.traverse(make)
# Result:
# <>
# aa
# aaa:1
# aab:2
# aac:3
# bb:4
self.assertNode(r, '', 2)
# For some reason pylint thinks a_node et al. are strings.
# pylint: disable=no-member
aa_node = self.assertNode(r.children[0], 'aa', 3)
self.assertNode(aa_node.children[0], 'aaa', 0, 1)
self.assertNode(aa_node.children[1], 'aab', 0, 2)
self.assertNode(aa_node.children[2], 'aac', 0, 3)
self.assertNode(r.children[1], 'bb', 0, 4)
示例7: __init__
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def __init__(self):
self._roster = pygtrie.CharTrie()
self.update()
示例8: __init__
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def __init__(self):
super().__init__()
self.trie = pygtrie.CharTrie()
示例9: compute_prefix_embeddings
# 需要导入模块: import pygtrie [as 别名]
# 或者: from pygtrie import CharTrie [as 别名]
def compute_prefix_embeddings(words, emb_info, dtype='float'):
"""
- Words in the pre-trained embeddings: discarded in the output
- Words not in the pre-trained embeddings: the embeddings are computed as mean of the embeddings
of words which share prefixes with the input words.
- Words with no matching prefix: discarded in the output
"""
emb_words, emb = emb_info
emb_w2i=build_w2i(emb_words)
emb_words_trie = pygtrie.CharTrie()
for w in emb_words:
emb_words_trie[w] = 1
output_pairs=[]
for w in words:
if w not in emb_w2i:
# print("===" + w)
## handle emb words for which the input word is a prefix
longer_words=[]
if emb_words_trie.has_key(w):
longer_words=emb_words_trie.keys(w)
## handle emb words which are prefixes of the input word is a prefix
shorter_words=[ x[0] for x in emb_words_trie.prefixes(w) ]
# ## all matched words
# matched_words=longer_words+shorter_words
## longest short word and shortest long word
matched_words = []
if len(longer_words)>0:
matched_words.append( min(longer_words,key=lambda x:len(x)) )
if len(shorter_words)>0:
matched_words.append( max(shorter_words,key=lambda x:len(x)) )
# print(matched_words)
## embedding of word is mean of matched words embeddings
if len(matched_words)>0:
w_emb=np.mean(
np.array(
[ emb[emb_w2i[mw]] for mw in matched_words ],
dtype=dtype),
axis=0
)
output_pairs.append((w,w_emb))
output_words=[ x[0] for x in output_pairs]
output_emb = np.array([ x[1] for x in output_pairs], dtype=dtype)
return (output_words, output_emb)