当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.ngram_neighbours方法代码示例

本文整理汇总了Python中corpus.Corpus.ngram_neighbours方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.ngram_neighbours方法的具体用法?Python Corpus.ngram_neighbours怎么用?Python Corpus.ngram_neighbours使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在corpus.Corpus的用法示例。


在下文中一共展示了Corpus.ngram_neighbours方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import ngram_neighbours [as 别名]

#.........这里部分代码省略.........
                                break
                        if not gain_for_every_word:
                            continue

                    else:
                        tgt_occ = tgt_index[tgt_toks[0][0]]
                    coocc = len(src_occ & tgt_occ)

                    # if results are ok, yield them with contingency table
                    if (coocc >= min_coocc and (max_coocc is None or coocc <= max_coocc)):
                        cont_table = self.contingency_table(None, src_occ_s=src_occ, tgt_occ_s=tgt_occ, coocc_c=coocc)
                        if reverse is False:
                            results.append( (set([(src_tok,)]), set([(tgt_tok[0],) for tgt_tok in tgt_toks]), cont_table) )
                        else:
                            results.append( (set([(tgt_tok[0],) for tgt_tok in tgt_toks]), set([(src_tok,)]), cont_table) )
                    else:
                        break
            yield results
        gc.enable()

    def generate_unigram_set_pairs(self, min_coocc=1, max_coocc=None, min_len=1, max_len=3, both_ways=True):
        for _ in self.__generate_unigram_set_pairs(min_coocc, max_coocc, min_len, max_len, False):
            yield _
        if both_ways:
            for _ in self.__generate_unigram_set_pairs(min_coocc, max_coocc, min_len, max_len, True):
                yield _

    def ngram_pair_neighbours(self, pair, ngram_indices=None, max_len=4):
        src, tgt = pair
        if ngram_indices is None:
            src_occ = self._src.ngram_index(src)
            tgt_occ = self._tgt.ngram_index(tgt)
            ngram_indices = src_occ & tgt_occ
        src_neighbours = self._src.ngram_neighbours(src, ngram_indices)
        tgt_neighbours = self._tgt.ngram_neighbours(tgt, ngram_indices)
        all_new_ngram_pairs = []
        if len(src) < max_len:
            for (neighbour, direction), count in src_neighbours:
                new_src_ngram = (src + (neighbour,) if direction == 1 else (neighbour,) + src)
                all_new_ngram_pairs.append(((new_src_ngram, tgt), count, True))
        if len(tgt) < max_len:
            for (neighbour, direction), count in tgt_neighbours:
                new_tgt_ngram = (tgt + (neighbour,) if direction == 1 else (neighbour,) + tgt)
                all_new_ngram_pairs.append(((src, new_tgt_ngram), count, False))
        return all_new_ngram_pairs

    def generate_ngram_pairs(self, previous_ngram_pairs, min_coocc=1):
        for pair in previous_ngram_pairs:
            (src, tgt), _ = pair
            src_occ = self._src.ngram_index(src)
            tgt_occ = self._tgt.ngram_index(tgt)
            ngram_indices = src_occ & tgt_occ
            all_new_ngram_pairs = self.ngram_pair_neighbours(pair, ngram_indices)
            
            results_for_pair = [(src, tgt)]
            for pair, coocc_c, src_changed in all_new_ngram_pairs:
                new_src, new_tgt = pair
                if src_changed:
                    cont_table = self.contingency_table(pair, tgt_occ_s=tgt_occ)
                else:
                    cont_table = self.contingency_table(pair, src_occ_s=src_occ)
                results_for_pair.append(((new_src, new_tgt), cont_table))
            yield results_for_pair

    def contingency_table(self, ngram_pair, **kwargs):
        """
开发者ID:harshnisar,项目名称:hundict,代码行数:70,代码来源:bicorpus.py


注:本文中的corpus.Corpus.ngram_neighbours方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。