本文整理汇总了Python中corpus.Corpus.ngram_neighbours方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.ngram_neighbours方法的具体用法?Python Corpus.ngram_neighbours怎么用?Python Corpus.ngram_neighbours使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.ngram_neighbours方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import ngram_neighbours [as 别名]
#.........这里部分代码省略.........
break
if not gain_for_every_word:
continue
else:
tgt_occ = tgt_index[tgt_toks[0][0]]
coocc = len(src_occ & tgt_occ)
# if results are ok, yield them with contingency table
if (coocc >= min_coocc and (max_coocc is None or coocc <= max_coocc)):
cont_table = self.contingency_table(None, src_occ_s=src_occ, tgt_occ_s=tgt_occ, coocc_c=coocc)
if reverse is False:
results.append( (set([(src_tok,)]), set([(tgt_tok[0],) for tgt_tok in tgt_toks]), cont_table) )
else:
results.append( (set([(tgt_tok[0],) for tgt_tok in tgt_toks]), set([(src_tok,)]), cont_table) )
else:
break
yield results
gc.enable()
def generate_unigram_set_pairs(self, min_coocc=1, max_coocc=None, min_len=1, max_len=3, both_ways=True):
for _ in self.__generate_unigram_set_pairs(min_coocc, max_coocc, min_len, max_len, False):
yield _
if both_ways:
for _ in self.__generate_unigram_set_pairs(min_coocc, max_coocc, min_len, max_len, True):
yield _
def ngram_pair_neighbours(self, pair, ngram_indices=None, max_len=4):
src, tgt = pair
if ngram_indices is None:
src_occ = self._src.ngram_index(src)
tgt_occ = self._tgt.ngram_index(tgt)
ngram_indices = src_occ & tgt_occ
src_neighbours = self._src.ngram_neighbours(src, ngram_indices)
tgt_neighbours = self._tgt.ngram_neighbours(tgt, ngram_indices)
all_new_ngram_pairs = []
if len(src) < max_len:
for (neighbour, direction), count in src_neighbours:
new_src_ngram = (src + (neighbour,) if direction == 1 else (neighbour,) + src)
all_new_ngram_pairs.append(((new_src_ngram, tgt), count, True))
if len(tgt) < max_len:
for (neighbour, direction), count in tgt_neighbours:
new_tgt_ngram = (tgt + (neighbour,) if direction == 1 else (neighbour,) + tgt)
all_new_ngram_pairs.append(((src, new_tgt_ngram), count, False))
return all_new_ngram_pairs
def generate_ngram_pairs(self, previous_ngram_pairs, min_coocc=1):
for pair in previous_ngram_pairs:
(src, tgt), _ = pair
src_occ = self._src.ngram_index(src)
tgt_occ = self._tgt.ngram_index(tgt)
ngram_indices = src_occ & tgt_occ
all_new_ngram_pairs = self.ngram_pair_neighbours(pair, ngram_indices)
results_for_pair = [(src, tgt)]
for pair, coocc_c, src_changed in all_new_ngram_pairs:
new_src, new_tgt = pair
if src_changed:
cont_table = self.contingency_table(pair, tgt_occ_s=tgt_occ)
else:
cont_table = self.contingency_table(pair, src_occ_s=src_occ)
results_for_pair.append(((new_src, new_tgt), cont_table))
yield results_for_pair
def contingency_table(self, ngram_pair, **kwargs):
"""