Python kenlm.Model方法代码示例

本文整理汇总了Python中kenlm.Model方法的典型用法代码示例。如果您正苦于以下问题：Python kenlm.Model方法的具体用法？Python kenlm.Model怎么用？Python kenlm.Model使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kenlm的用法示例。

在下文中一共展示了kenlm.Model方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, lm_path, sos, eos, num_syms, lm_weight=0.1):
        """
        Basic params will be initialized, the kenlm model will be created from
        the lm_path
        Args:
            lm_path: the saved lm model path
            sos: start symbol
            eos: end symbol
            num_syms: number of classes
            lm_weight: the lm weight
        """
        self.lang_model = kenlm.Model(lm_path)
        self.state_index = 0
        self.sos = sos
        self.eos = eos
        self.num_syms = num_syms
        self.lm_weight = lm_weight
        kenlm_state = kenlm.State()
        self.lang_model.BeginSentenceWrite(kenlm_state)
        self.cand_kenlm_states = np.array([[kenlm_state] * num_syms])

开发者ID:athena-team，项目名称:athena，代码行数:22，代码来源:lm_scorer.py

示例2: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self):
        resource_package = __name__

        yelp_acc_path = 'acc_yelp.bin'
        yelp_ppl_path = 'ppl_yelp.binary'
        yelp_ref0_path = 'yelp.refs.0'
        yelp_ref1_path = 'yelp.refs.1'

        
        yelp_acc_file = pkg_resources.resource_stream(resource_package, yelp_acc_path)
        yelp_ppl_file = pkg_resources.resource_stream(resource_package, yelp_ppl_path)
        yelp_ref0_file = pkg_resources.resource_stream(resource_package, yelp_ref0_path)
        yelp_ref1_file = pkg_resources.resource_stream(resource_package, yelp_ref1_path)

        
        self.yelp_ref = []
        with open(yelp_ref0_file.name, 'r') as fin:
            self.yelp_ref.append(fin.readlines())
        with open(yelp_ref1_file.name, 'r') as fin:
            self.yelp_ref.append(fin.readlines())
        self.classifier_yelp = fasttext.load_model(yelp_acc_file.name)
        self.yelp_ppl_model = kenlm.Model(yelp_ppl_file.name)

开发者ID:plkmo，项目名称:NLP_Toolkit，代码行数:24，代码来源:evaluator.py

示例3: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(self, kenlm_path, data_path, output_path, n_gram):
        """
        Trains a modified Kneser-Ney n-gram KenLM from a text file.
        Creates a .arpa file to store n-grams.
        """
        import kenlm
        import subprocess

        # create .arpa and .bin file of n-grams
        curdir = os.path.abspath(os.path.curdir)
        cd_command = "cd " + os.path.join(kenlm_path, 'build')
        command_1 = "bin/lmplz -o {} <{} >{} --discount_fallback &".format(str(n_gram), os.path.join(curdir, data_path),
                                                                           output_path)
        command_2 = "bin/build_binary -s {} {} &".format(output_path, output_path + ".bin")

        while True:
            subprocess.getstatusoutput(cd_command + " && " + command_1)  # call without logging output
            subprocess.getstatusoutput(cd_command + " && " + command_2)  # call without logging output
            if os.path.exists(output_path + ".bin"):
                break

        # create language model
        model = kenlm.Model(output_path + ".bin")

        return model

开发者ID:williamSYSU，项目名称:TextGAN-PyTorch，代码行数:27，代码来源:ppl.py

示例4: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
    """
    Trains a modified Kneser-Ney n-gram KenLM from a text file.
    Creates a .arpa file to store n-grams.
    """
    # create .arpa file of n-grams
    curdir = os.path.abspath(os.path.curdir)
    
    command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
              " >"+os.path.join(curdir, output_path)
    os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)

    load_kenlm()
    # create language model
    model = kenlm.Model(output_path)

    return model

开发者ID:jakezhaojb，项目名称:ARAE，代码行数:19，代码来源:utils.py

示例5: train_ngram_lm

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train_ngram_lm(kenlm_path, data_path, output_path, N):
    """
    Trains a modified Kneser-Ney n-gram KenLM from a text file.
    Creates a .arpa file to store n-grams.
    """
    # create .arpa file of n-grams
    curdir = os.path.abspath(os.path.curdir)
    #
    command = "bin/lmplz -o "+str(N)+" <"+os.path.join(curdir, data_path) + \
              " >"+os.path.join(curdir, output_path)
    os.system("cd "+os.path.join(kenlm_path, 'build')+" && "+command)

    load_kenlm()
    # create language model
    assert(output_path)  # captured by try..except block outside
    model = kenlm.Model(output_path)

    return model

开发者ID:jakezhaojb，项目名称:ARAE，代码行数:20，代码来源:utils.py

示例6: wrap_existing_kenlm_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def wrap_existing_kenlm_model(self, kenlm_model):
        if not (kenlm_model.endswith('.binary') or '.binlm' in kenlm_model):
            raise Exception('expected file with .binlm* or .binary extension')

        self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)

        model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')

        subprocess.check_call('cp %s %s'%(kenlm_model, model_binary_path), shell=True)

        self.kenlm_model = kenlm.Model(model_binary_path)

开发者ID:thompsonb，项目名称:DL4MT，代码行数:13，代码来源:lm.py

示例7: train

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def train(self, path_to_text):
        # also stores binary in temp directory
        self.tmpdir = tempfile.mkdtemp(dir=TEMP_DIR)
        model_arpa_path = os.path.join(self.tmpdir, 'kenlm_model.arpa')
        model_binary_path = os.path.join(self.tmpdir, 'kenlm_model.binary')

        myinput = open(path_to_text)
        myoutput = open(model_arpa_path, 'w') 
        args = shlex.split(os.path.join(KENLM_PATH, 'bin/lmplz') + ' -o 5 -S 40% --skip_symbols </s> <unk>')
        # from kenlm exception: --skip_symbols: to avoid this exception:
        # Special word </s> is not allowed in the corpus.  I plan to support models containing <unk> in the future.
        # Pass --skip_symbols to convert these symbols to whitespace.
        p = subprocess.Popen(args, stdin=myinput, stdout=myoutput)
        p.wait()

        #convert arpa to binary
        p = subprocess.Popen(shlex.split('%s %s %s' % (os.path.join(KENLM_PATH, 'bin/build_binary'), model_arpa_path, model_binary_path)))
        p.wait()

        #remove arpa file
        p=subprocess.Popen(shlex.split('rm %s' % model_arpa_path))
        p.wait()

        #lm_bin = os.path.join(KENLM_PATH, 'bin/lmplz')
        #binarize_bin = os.path.join(KENLM_PATH, 'bin/build_binary')
        #subprocess.check_call('%s -o 5 -S 40%% > %s' % (lm_bin, model_arpa_path))
        #subprocess.check_call('%s %s %s' % (binarize_bin, model_arpa_path, model_binary_path))
        #subprocess.check_call('rm %s' % model_arpa_path)

        self.kenlm_model = kenlm.Model(model_binary_path)

开发者ID:thompsonb，项目名称:DL4MT，代码行数:32，代码来源:lm.py

示例8: load

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def load(self, model_file_name):
        self.tmpdir = _unzip_to_tempdir(model_file_name)
        self.kenlm_model = kenlm.Model(os.path.join(self.tmpdir, 'kenlm_model.binary'))

开发者ID:thompsonb，项目名称:DL4MT，代码行数:5，代码来源:lm.py

示例9: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, name, path, normalize=False, debpe=False):
        self.path = path
        c = kenlm.Config()
        c.load_method = kenlm.LoadMethod.LAZY
        self.model = kenlm.Model(path, c)
        self.name = name
        self.normalize = normalize
        self.debpe = debpe
        logger.info('Intialized ' + str(self.model.order) + "-gram language model: " + path)

开发者ID:nusnlp，项目名称:crosentgec，代码行数:11，代码来源:features.py

示例10: set_language_model_path

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def set_language_model_path(self, path):
        self.check_detector_initialized()
        import kenlm
        self.lm = kenlm.Model(path)
        logger.debug('Loaded language model: %s' % path)

开发者ID:shibing624，项目名称:pycorrector，代码行数:7，代码来源:detector.py

示例11: get_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
    global MODEL
    if MODEL is None:
        #MODEL = kenlm.Model('./lm/timit-lm.klm')
        MODEL = kenlm.Model('./lm/libri-timit-lm.klm')
    return MODEL

开发者ID:robmsmt，项目名称:KerasDeepSpeech，代码行数:8，代码来源:text.py

示例12: get_model

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def get_model():
    global MODEL
    if MODEL is None:
        MODEL = kenlm.Model('./data/lm/lm.binary')
    return MODEL

开发者ID:pandeydivesh15，项目名称:AVSR-Deep-Speech，代码行数:7，代码来源:spell.py

示例13: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, binary_path):
        self.model = kenlm.Model(binary_path)

开发者ID:iotayo，项目名称:aivivn-tone，代码行数:4，代码来源:lm.py

示例14: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, load_path: Path, beam_size: int = 4, *args, **kwargs):
        self.lm = kenlm.Model(str(expand_path(load_path)))
        self.beam_size = beam_size

开发者ID:deepmipt，项目名称:DeepPavlov，代码行数:5，代码来源:kenlm_elector.py

示例15: init

# 需要导入模块: import kenlm [as 别名]
# 或者: from kenlm import Model [as 别名]
def __init__(self, path):
        """Creates a new n-gram language model predictor.
        
        Args:
            path (string): Path to the ARPA language model file
            
        Raises:
            NameError. If KenLM is not installed
        """
        super(KenLMPredictor, self).__init__()
        self.lm = kenlm.Model(path)
        self.lm_state2 = kenlm.State()

开发者ID:ucam-smt，项目名称:sgnmt，代码行数:14，代码来源:ngram.py

注：本文中的kenlm.Model方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __init__

示例2: __init__