当前位置: 首页>>代码示例>>Python>>正文


Python kenlm.Model方法代码示例

本文整理汇总了Python中kenlm.Model方法的典型用法代码示例。如果您正苦于以下问题:Python kenlm.Model方法的具体用法?Python kenlm.Model怎么用?Python kenlm.Model使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kenlm的用法示例。


在下文中一共展示了kenlm.Model方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, lm_path, sos, eos, num_syms, lm_weight=0.1):
        """
        Basic params will be initialized, the kenlm model will be created from
        the lm_path
        Args:
            lm_path: the saved lm model path
            sos: start symbol
            eos: end symbol
            num_syms: number of classes
            lm_weight: the lm weight
        """
        self.lang_model = kenlm.Model(lm_path)
        self.state_index = 0
        self.sos = sos
        self.eos = eos
        self.num_syms = num_syms
        self.lm_weight = lm_weight
        kenlm_state = kenlm.State()
        self.lang_model.BeginSentenceWrite(kenlm_state)
        self.cand_kenlm_states = np.array([[kenlm_state] * num_syms]) 
开发者ID:athena-team,项目名称:athena,代码行数:22,代码来源:lm_scorer.py

示例2: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self):
        resource_package = __name__

        yelp_acc_path = 'acc_yelp.bin'
        yelp_ppl_path = 'ppl_yelp.binary'
        yelp_ref0_path = 'yelp.refs.0'
        yelp_ref1_path = 'yelp.refs.1'

        
        yelp_acc_file = pkg_resources.resource_stream(resource_package, yelp_acc_path)
        yelp_ppl_file = pkg_resources.resource_stream(resource_package, yelp_ppl_path)
        yelp_ref0_file = pkg_resources.resource_stream(resource_package, yelp_ref0_path)
        yelp_ref1_file = pkg_resources.resource_stream(resource_package, yelp_ref1_path)

        
        self.yelp_ref = []
        with open(yelp_ref0_file.name, 'r') as fin:
            self.yelp_ref.append(fin.readlines())
        with open(yelp_ref1_file.name, 'r') as fin:
            self.yelp_ref.append(fin.readlines())
        self.classifier_yelp = fasttext.load_model(yelp_acc_file.name)
        self.yelp_ppl_model = kenlm.Model(yelp_ppl_file.name) 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:24,代码来源:evaluator.py

示例3: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(self, kenlm_path, data_path, output_path, n_gram):
        """
        Trains a modified Kneser-Ney n-gram KenLM from a text file.
        Creates a .arpa file to store n-grams.
        """
        import kenlm
        import subprocess

        # create .arpa and .bin file of n-grams
        curdir = os.path.abspath(os.path.curdir)
        cd_command = "cd " + os.path.join(kenlm_path, 'build')
        command_1 = "bin/lmplz -o {} <{} >{} --discount_fallback &".format(str(n_gram), os.path.join(curdir, data_path),
                                                                           output_path)
        command_2 = "bin/build_binary -s {} {} &".format(output_path, output_path + ".bin")

        while True:
            subprocess.getstatusoutput(cd_command + " && " + command_1)  # call without logging output
            subprocess.getstatusoutput(cd_command + " && " + command_2)  # call without logging output
            if os.path.exists(output_path + ".bin"):
                break

        # create language model
        model = kenlm.Model(output_path + ".bin")

        return model 
开发者ID:williamSYSU,项目名称:TextGAN-PyTorch,代码行数:27,代码来源:ppl.py

示例4: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
    """
    Trains a modified Kneser-Ney n-gram KenLM from a text file.
    Creates a .arpa file to store n-grams.
    """
    # create .arpa file of n-grams
    curdir = os.path.abspath(os.path.curdir)
    
    command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
              " >"+os.path.join(curdir, output_path)
    os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)

    load_kenlm()
    # create language model
    model = kenlm.Model(output_path)

    return model 
开发者ID:jakezhaojb,项目名称:ARAE,代码行数:19,代码来源:utils.py

示例5: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
    """
    Trains a modified Kneser-Ney n-gram KenLM from a text file.
    Creates a .arpa file to store n-grams.
    """
    # create .arpa file of n-grams
    curdir = os.path.abspath(os.path.curdir)
    #
    command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
              " >"+os.path.join(curdir, output_path)
    os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)

    load_kenlm()
    # create language model
    assert(output_path)  # captured by try..except block outside
    model = kenlm.Model(output_path)

    return model 
开发者ID:jakezhaojb,项目名称:ARAE,代码行数:20,代码来源:utils.py

示例6: wrap_existing_kenlm_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def wrap_existing_kenlm_model(self, kenlm_model):
        if not (kenlm_model.endswith('.binary') or '.binlm' in kenlm_model):
            raise Exception('expected file with .binlm* or .binary extension')

        self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)

        model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')

        subprocess.check_call('cp %s %s'%(kenlm_model, model_binary_path), shell=True)

        self.kenlm_model = kenlm.Model(model_binary_path) 
开发者ID:thompsonb,项目名称:DL4MT,代码行数:13,代码来源:lm.py

示例7: train

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train(self, path_to_text):
        # also stores binary in temp directory
        self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)
        model_arpa_path = os.path.join(self.tmpdir, 'kenlm_model.arpa')
        model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')

        myinput = open(path_to_text)
        myoutput = open(model_arpa_path, 'w') 
        args = shlex.split(os.path.join(KENLM_PATH, 'bin/lmplz') + ' -o 5 -S 40% --skip_symbols </s> <unk>')
        # from kenlm exception: --skip_symbols: to avoid this exception:
        # Special word </s> is not allowed in the corpus.  I plan to support models containing <unk> in the future.
        # Pass --skip_symbols to convert these symbols to whitespace.
        p = subprocess.Popen(args, stdin=myinput, stdout=myoutput)
        p.wait()

        #convert arpa to binary
        p = subprocess.Popen(shlex.split('%s %s %s' % (os.path.join(KENLM_PATH, 'bin/build_binary'), model_arpa_path, model_binary_path)))
        p.wait()

        #remove arpa file
        p=subprocess.Popen(shlex.split('rm %s' % model_arpa_path))
        p.wait()

        #lm_bin = os.path.join(KENLM_PATH, 'bin/lmplz')
        #binarize_bin = os.path.join(KENLM_PATH, 'bin/build_binary')
        #subprocess.check_call('%s -o 5 -S 40%% > %s' % (lm_bin, model_arpa_path))
        #subprocess.check_call('%s %s %s' % (binarize_bin, model_arpa_path, model_binary_path))
        #subprocess.check_call('rm %s' % model_arpa_path)

        self.kenlm_model = kenlm.Model(model_binary_path) 
开发者ID:thompsonb,项目名称:DL4MT,代码行数:32,代码来源:lm.py

示例8: load

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def load(self, model_file_name):
        self.tmpdir = _unzip_to_tempdir(model_file_name)
        self.kenlm_model = kenlm.Model(os.path.join(self.tmpdir, 'kenlm_model.binary')) 
开发者ID:thompsonb,项目名称:DL4MT,代码行数:5,代码来源:lm.py

示例9: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, name, path, normalize=False, debpe=False):
        self.path = path
        c = kenlm.Config()
        c.load_method = kenlm.LoadMethod.LAZY
        self.model = kenlm.Model(path, c)
        self.name = name
        self.normalize = normalize
        self.debpe = debpe
        logger.info('Intialized ' + str(self.model.order) + "-gram language model: " + path) 
开发者ID:nusnlp,项目名称:crosentgec,代码行数:11,代码来源:features.py

示例10: set_language_model_path

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def set_language_model_path(self, path):
        self.check_detector_initialized()
        import kenlm
        self.lm = kenlm.Model(path)
        logger.debug('Loaded language model: %s' % path) 
开发者ID:shibing624,项目名称:pycorrector,代码行数:7,代码来源:detector.py

示例11: get_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
    global MODEL
    if MODEL is None:
        #MODEL = kenlm.Model('./lm/timit-lm.klm')
        MODEL = kenlm.Model('./lm/libri-timit-lm.klm')
    return MODEL 
开发者ID:robmsmt,项目名称:KerasDeepSpeech,代码行数:8,代码来源:text.py

示例12: get_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
    global MODEL
    if MODEL is None:
        MODEL = kenlm.Model('./data/lm/lm.binary')
    return MODEL 
开发者ID:pandeydivesh15,项目名称:AVSR-Deep-Speech,代码行数:7,代码来源:spell.py

示例13: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, binary_path):
        self.model = kenlm.Model(binary_path) 
开发者ID:iotayo,项目名称:aivivn-tone,代码行数:4,代码来源:lm.py

示例14: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, load_path: Path, beam_size: int = 4, *args, **kwargs):
        self.lm = kenlm.Model(str(expand_path(load_path)))
        self.beam_size = beam_size 
开发者ID:deepmipt,项目名称:DeepPavlov,代码行数:5,代码来源:kenlm_elector.py

示例15: __init__

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, path):
        """Creates a new n-gram language model predictor.
        
        Args:
            path (string): Path to the ARPA language model file
            
        Raises:
            NameError. If KenLM is not installed
        """
        super(KenLMPredictor, self).__init__()
        self.lm = kenlm.Model(path)
        self.lm_state2 = kenlm.State() 
开发者ID:ucam-smt,项目名称:sgnmt,代码行数:14,代码来源:ngram.py


注:本文中的kenlm.Model方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。