当前位置: 首页>>代码示例>>Python>>正文


Python kenlm.State方法代码示例

本文整理汇总了Python中kenlm.State方法的典型用法代码示例。如果您正苦于以下问题:Python kenlm.State方法的具体用法?Python kenlm.State怎么用?Python kenlm.State使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kenlm的用法示例。


在下文中一共展示了kenlm.State方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, lm_path, sos, eos, num_syms, lm_weight=0.1):
        """
        Basic params will be initialized, the kenlm model will be created from
        the lm_path
        Args:
            lm_path: the saved lm model path
            sos: start symbol
            eos: end symbol
            num_syms: number of classes
            lm_weight: the lm weight
        """
        self.lang_model = kenlm.Model(lm_path)
        self.state_index = 0
        self.sos = sos
        self.eos = eos
        self.num_syms = num_syms
        self.lm_weight = lm_weight
        kenlm_state = kenlm.State()
        self.lang_model.BeginSentenceWrite(kenlm_state)
        self.cand_kenlm_states = np.array([[kenlm_state] * num_syms]) 
开发者ID:athena-team,项目名称:athena,代码行数:22,代码来源:lm_scorer.py

示例2: score_partial_

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def score_partial_(self, y, next_token, state, x):
        """Score interface for both full and partial scorer.

        Args:
            y: previous char
            next_token: next token need to be score
            state: previous state
            x: encoded feature

        Returns:
            tuple[torch.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        out_state = kenlm.State()
        ys = self.chardict[y[-1]] if y.shape[0] > 1 else "<s>"
        self.lm.BaseScore(state, ys, out_state)
        scores = torch.empty_like(next_token, dtype=x.dtype, device=y.device)
        for i, j in enumerate(next_token):
            scores[i] = self.lm.BaseScore(
                out_state, self.chardict[j], self.tmpkenlmstate
            )
        return scores, out_state 
开发者ID:espnet,项目名称:espnet,代码行数:26,代码来源:ngram.py

示例3: _infer_instance

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def _infer_instance(self, candidates: List[List[Tuple[float, str]]]):
        candidates = candidates + [[(0, '</s>')]]
        state = kenlm.State()
        self.lm.BeginSentenceWrite(state)
        beam = [(0, state, [])]
        for sublist in candidates:
            new_beam = []
            for beam_score, beam_state, beam_words in beam:
                for score, candidate in sublist:
                    prev_state = beam_state
                    c_score = 0
                    cs = candidate.split()
                    for candidate in cs:
                        state = kenlm.State()
                        c_score += self.lm.BaseScore(prev_state, candidate, state)
                        prev_state = state
                    new_beam.append((beam_score + score + c_score, state, beam_words + cs))
            new_beam.sort(reverse=True)
            beam = new_beam[:self.beam_size]
        score, state, words = beam[0]
        return words[:-1] 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:23,代码来源:kenlm_elector.py

示例4: reset

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def reset(self):
        """
        Call this function to reset the lm to predict on a new sequence
        """
        kenlm_state = kenlm.State()
        self.lang_model.BeginSentenceWrite(kenlm_state)
        self.cand_kenlm_states = np.array([[kenlm_state] * self.num_syms]) 
开发者ID:athena-team,项目名称:athena,代码行数:9,代码来源:lm_scorer.py

示例5: get_score

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def get_score(self, cand_parents, cand_syms, lang_model):
        """
        the saved lm model will be called here
        Args:
            cand_parents: last selected top candidates
            cand_syms: last selected top char index
            lang_model: the language model
        Return:
            scores: the lm scores
        """
        scale = 1.0 / np.log10(np.e)  # convert log10 to ln

        num_cands = len(cand_syms)
        scores = np.zeros((num_cands, self.num_syms))
        new_states = np.zeros((num_cands, self.num_syms), dtype=object)
        chars = [str(x) for x in range(self.num_syms)]
        chars[self.sos] = "<s>"
        chars[self.eos] = "</s>"
        chars[0] = "<space>"

        for i in range(num_cands):
            parent = cand_parents[i]
            kenlm_state_list = self.cand_kenlm_states[parent]
            kenlm_state = kenlm_state_list[cand_syms[i]]
            for sym in range(self.num_syms):
                char = chars[sym]
                out_state = kenlm.State()
                score = scale * lang_model.BaseScore(kenlm_state, char, out_state)
                scores[i, sym] = score
                new_states[i, sym] = out_state
        self.cand_kenlm_states = new_states
        return scores 
开发者ID:athena-team,项目名称:athena,代码行数:34,代码来源:lm_scorer.py

示例6: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, ngram_model, token_list):
        """Initialize Ngrambase.

        Args:
            ngram_model: ngram model path
            token_list: token list from dict or model.json

        """
        self.chardict = [x if x != "<eos>" else "</s>" for x in token_list]
        self.charlen = len(self.chardict)
        self.lm = kenlm.LanguageModel(ngram_model)
        self.tmpkenlmstate = kenlm.State() 
开发者ID:espnet,项目名称:espnet,代码行数:14,代码来源:ngram.py

示例7: init_state

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def init_state(self, x):
        """Initialize tmp state."""
        state = kenlm.State()
        self.lm.NullContextWrite(state)
        return state 
开发者ID:espnet,项目名称:espnet,代码行数:7,代码来源:ngram.py

示例8: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, path):
        """Creates a new n-gram language model predictor.
        
        Args:
            path (string): Path to the ARPA language model file
            
        Raises:
            NameError. If KenLM is not installed
        """
        super(KenLMPredictor, self).__init__()
        self.lm = kenlm.Model(path)
        self.lm_state2 = kenlm.State() 
开发者ID:ucam-smt,项目名称:sgnmt,代码行数:14,代码来源:ngram.py

示例9: _update_lm_state

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def _update_lm_state(self):
        self.lm_state = kenlm.State()
        tmp_state = kenlm.State()
        self.lm.BeginSentenceWrite(self.lm_state)
        for w in self.history[-6:]:
            self.lm.BaseScore(self.lm_state, w, tmp_state)
            self.lm_state, tmp_state = tmp_state, self.lm_state 
开发者ID:ucam-smt,项目名称:sgnmt,代码行数:9,代码来源:ngram.py


注:本文中的kenlm.State方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。