本文整理汇总了Python中kenlm.Model方法的典型用法代码示例。如果您正苦于以下问题:Python kenlm.Model方法的具体用法?Python kenlm.Model怎么用?Python kenlm.Model使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kenlm
的用法示例。
在下文中一共展示了kenlm.Model方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, lm_path, sos, eos, num_syms, lm_weight=0.1):
"""
Basic params will be initialized, the kenlm model will be created from
the lm_path
Args:
lm_path: the saved lm model path
sos: start symbol
eos: end symbol
num_syms: number of classes
lm_weight: the lm weight
"""
self.lang_model = kenlm.Model(lm_path)
self.state_index = 0
self.sos = sos
self.eos = eos
self.num_syms = num_syms
self.lm_weight = lm_weight
kenlm_state = kenlm.State()
self.lang_model.BeginSentenceWrite(kenlm_state)
self.cand_kenlm_states = np.array([[kenlm_state] * num_syms])
示例2: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self):
resource_package = __name__
yelp_acc_path = 'acc_yelp.bin'
yelp_ppl_path = 'ppl_yelp.binary'
yelp_ref0_path = 'yelp.refs.0'
yelp_ref1_path = 'yelp.refs.1'
yelp_acc_file = pkg_resources.resource_stream(resource_package, yelp_acc_path)
yelp_ppl_file = pkg_resources.resource_stream(resource_package, yelp_ppl_path)
yelp_ref0_file = pkg_resources.resource_stream(resource_package, yelp_ref0_path)
yelp_ref1_file = pkg_resources.resource_stream(resource_package, yelp_ref1_path)
self.yelp_ref = []
with open(yelp_ref0_file.name, 'r') as fin:
self.yelp_ref.append(fin.readlines())
with open(yelp_ref1_file.name, 'r') as fin:
self.yelp_ref.append(fin.readlines())
self.classifier_yelp = fasttext.load_model(yelp_acc_file.name)
self.yelp_ppl_model = kenlm.Model(yelp_ppl_file.name)
示例3: train_ngram_lm
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(self, kenlm_path, data_path, output_path, n_gram):
"""
Trains a modified Kneser-Ney n-gram KenLM from a text file.
Creates a .arpa file to store n-grams.
"""
import kenlm
import subprocess
# create .arpa and .bin file of n-grams
curdir = os.path.abspath(os.path.curdir)
cd_command = "cd " + os.path.join(kenlm_path, 'build')
command_1 = "bin/lmplz -o {} <{} >{} --discount_fallback &".format(str(n_gram), os.path.join(curdir, data_path),
output_path)
command_2 = "bin/build_binary -s {} {} &".format(output_path, output_path + ".bin")
while True:
subprocess.getstatusoutput(cd_command + " && " + command_1) # call without logging output
subprocess.getstatusoutput(cd_command + " && " + command_2) # call without logging output
if os.path.exists(output_path + ".bin"):
break
# create language model
model = kenlm.Model(output_path + ".bin")
return model
示例4: train_ngram_lm
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
"""
Trains a modified Kneser-Ney n-gram KenLM from a text file.
Creates a .arpa file to store n-grams.
"""
# create .arpa file of n-grams
curdir = os.path.abspath(os.path.curdir)
command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
" >"+os.path.join(curdir, output_path)
os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)
load_kenlm()
# create language model
model = kenlm.Model(output_path)
return model
示例5: train_ngram_lm
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
"""
Trains a modified Kneser-Ney n-gram KenLM from a text file.
Creates a .arpa file to store n-grams.
"""
# create .arpa file of n-grams
curdir = os.path.abspath(os.path.curdir)
#
command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
" >"+os.path.join(curdir, output_path)
os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)
load_kenlm()
# create language model
assert(output_path) # captured by try..except block outside
model = kenlm.Model(output_path)
return model
示例6: wrap_existing_kenlm_model
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def wrap_existing_kenlm_model(self, kenlm_model):
if not (kenlm_model.endswith('.binary') or '.binlm' in kenlm_model):
raise Exception('expected file with .binlm* or .binary extension')
self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)
model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')
subprocess.check_call('cp %s %s'%(kenlm_model, model_binary_path), shell=True)
self.kenlm_model = kenlm.Model(model_binary_path)
示例7: train
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train(self, path_to_text):
# also stores binary in temp directory
self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)
model_arpa_path = os.path.join(self.tmpdir, 'kenlm_model.arpa')
model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')
myinput = open(path_to_text)
myoutput = open(model_arpa_path, 'w')
args = shlex.split(os.path.join(KENLM_PATH, 'bin/lmplz') + ' -o 5 -S 40% --skip_symbols </s> <unk>')
# from kenlm exception: --skip_symbols: to avoid this exception:
# Special word </s> is not allowed in the corpus. I plan to support models containing <unk> in the future.
# Pass --skip_symbols to convert these symbols to whitespace.
p = subprocess.Popen(args, stdin=myinput, stdout=myoutput)
p.wait()
#convert arpa to binary
p = subprocess.Popen(shlex.split('%s %s %s' % (os.path.join(KENLM_PATH, 'bin/build_binary'), model_arpa_path, model_binary_path)))
p.wait()
#remove arpa file
p=subprocess.Popen(shlex.split('rm %s' % model_arpa_path))
p.wait()
#lm_bin = os.path.join(KENLM_PATH, 'bin/lmplz')
#binarize_bin = os.path.join(KENLM_PATH, 'bin/build_binary')
#subprocess.check_call('%s -o 5 -S 40%% > %s' % (lm_bin, model_arpa_path))
#subprocess.check_call('%s %s %s' % (binarize_bin, model_arpa_path, model_binary_path))
#subprocess.check_call('rm %s' % model_arpa_path)
self.kenlm_model = kenlm.Model(model_binary_path)
示例8: load
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def load(self, model_file_name):
self.tmpdir = _unzip_to_tempdir(model_file_name)
self.kenlm_model = kenlm.Model(os.path.join(self.tmpdir, 'kenlm_model.binary'))
示例9: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, name, path, normalize=False, debpe=False):
self.path = path
c = kenlm.Config()
c.load_method = kenlm.LoadMethod.LAZY
self.model = kenlm.Model(path, c)
self.name = name
self.normalize = normalize
self.debpe = debpe
logger.info('Intialized ' + str(self.model.order) + "-gram language model: " + path)
示例10: set_language_model_path
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def set_language_model_path(self, path):
self.check_detector_initialized()
import kenlm
self.lm = kenlm.Model(path)
logger.debug('Loaded language model: %s' % path)
示例11: get_model
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
global MODEL
if MODEL is None:
#MODEL = kenlm.Model('./lm/timit-lm.klm')
MODEL = kenlm.Model('./lm/libri-timit-lm.klm')
return MODEL
示例12: get_model
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
global MODEL
if MODEL is None:
MODEL = kenlm.Model('./data/lm/lm.binary')
return MODEL
示例13: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, binary_path):
self.model = kenlm.Model(binary_path)
示例14: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, load_path: Path, beam_size: int = 4, *args, **kwargs):
self.lm = kenlm.Model(str(expand_path(load_path)))
self.beam_size = beam_size
示例15: __init__
# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, path):
"""Creates a new n-gram language model predictor.
Args:
path (string): Path to the ARPA language model file
Raises:
NameError. If KenLM is not installed
"""
super(KenLMPredictor, self).__init__()
self.lm = kenlm.Model(path)
self.lm_state2 = kenlm.State()