本文整理汇总了Python中kenlm.State方法的典型用法代码示例。如果您正苦于以下问题:Python kenlm.State方法的具体用法?Python kenlm.State怎么用?Python kenlm.State使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kenlm
的用法示例。
在下文中一共展示了kenlm.State方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, lm_path, sos, eos, num_syms, lm_weight=0.1):
"""
Basic params will be initialized, the kenlm model will be created from
the lm_path
Args:
lm_path: the saved lm model path
sos: start symbol
eos: end symbol
num_syms: number of classes
lm_weight: the lm weight
"""
self.lang_model = kenlm.Model(lm_path)
self.state_index = 0
self.sos = sos
self.eos = eos
self.num_syms = num_syms
self.lm_weight = lm_weight
kenlm_state = kenlm.State()
self.lang_model.BeginSentenceWrite(kenlm_state)
self.cand_kenlm_states = np.array([[kenlm_state] * num_syms])
示例2: score_partial_
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def score_partial_(self, y, next_token, state, x):
"""Score interface for both full and partial scorer.
Args:
y: previous char
next_token: next token need to be score
state: previous state
x: encoded feature
Returns:
tuple[torch.Tensor, List[Any]]: Tuple of
batchfied scores for next token with shape of `(n_batch, n_vocab)`
and next state list for ys.
"""
out_state = kenlm.State()
ys = self.chardict[y[-1]] if y.shape[0] > 1 else "<s>"
self.lm.BaseScore(state, ys, out_state)
scores = torch.empty_like(next_token, dtype=x.dtype, device=y.device)
for i, j in enumerate(next_token):
scores[i] = self.lm.BaseScore(
out_state, self.chardict[j], self.tmpkenlmstate
)
return scores, out_state
示例3: _infer_instance
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def _infer_instance(self, candidates: List[List[Tuple[float, str]]]):
candidates = candidates + [[(0, '</s>')]]
state = kenlm.State()
self.lm.BeginSentenceWrite(state)
beam = [(0, state, [])]
for sublist in candidates:
new_beam = []
for beam_score, beam_state, beam_words in beam:
for score, candidate in sublist:
prev_state = beam_state
c_score = 0
cs = candidate.split()
for candidate in cs:
state = kenlm.State()
c_score += self.lm.BaseScore(prev_state, candidate, state)
prev_state = state
new_beam.append((beam_score + score + c_score, state, beam_words + cs))
new_beam.sort(reverse=True)
beam = new_beam[:self.beam_size]
score, state, words = beam[0]
return words[:-1]
示例4: reset
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def reset(self):
"""
Call this function to reset the lm to predict on a new sequence
"""
kenlm_state = kenlm.State()
self.lang_model.BeginSentenceWrite(kenlm_state)
self.cand_kenlm_states = np.array([[kenlm_state] * self.num_syms])
示例5: get_score
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def get_score(self, cand_parents, cand_syms, lang_model):
"""
the saved lm model will be called here
Args:
cand_parents: last selected top candidates
cand_syms: last selected top char index
lang_model: the language model
Return:
scores: the lm scores
"""
scale = 1.0 / np.log10(np.e) # convert log10 to ln
num_cands = len(cand_syms)
scores = np.zeros((num_cands, self.num_syms))
new_states = np.zeros((num_cands, self.num_syms), dtype=object)
chars = [str(x) for x in range(self.num_syms)]
chars[self.sos] = "<s>"
chars[self.eos] = "</s>"
chars[0] = "<space>"
for i in range(num_cands):
parent = cand_parents[i]
kenlm_state_list = self.cand_kenlm_states[parent]
kenlm_state = kenlm_state_list[cand_syms[i]]
for sym in range(self.num_syms):
char = chars[sym]
out_state = kenlm.State()
score = scale * lang_model.BaseScore(kenlm_state, char, out_state)
scores[i, sym] = score
new_states[i, sym] = out_state
self.cand_kenlm_states = new_states
return scores
示例6: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, ngram_model, token_list):
"""Initialize Ngrambase.
Args:
ngram_model: ngram model path
token_list: token list from dict or model.json
"""
self.chardict = [x if x != "<eos>" else "</s>" for x in token_list]
self.charlen = len(self.chardict)
self.lm = kenlm.LanguageModel(ngram_model)
self.tmpkenlmstate = kenlm.State()
示例7: init_state
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def init_state(self, x):
"""Initialize tmp state."""
state = kenlm.State()
self.lm.NullContextWrite(state)
return state
示例8: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def __init__(self, path):
"""Creates a new n-gram language model predictor.
Args:
path (string): Path to the ARPA language model file
Raises:
NameError. If KenLM is not installed
"""
super(KenLMPredictor, self).__init__()
self.lm = kenlm.Model(path)
self.lm_state2 = kenlm.State()
示例9: _update_lm_state
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import State [as 别名]
def _update_lm_state(self):
self.lm_state = kenlm.State()
tmp_state = kenlm.State()
self.lm.BeginSentenceWrite(self.lm_state)
for w in self.history[-6:]:
self.lm.BaseScore(self.lm_state, w, tmp_state)
self.lm_state, tmp_state = tmp_state, self.lm_state