当前位置: 首页>>代码示例>>Python>>正文


Python HMM.train方法代码示例

本文整理汇总了Python中hmm.HMM.train方法的典型用法代码示例。如果您正苦于以下问题:Python HMM.train方法的具体用法?Python HMM.train怎么用?Python HMM.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在hmm.HMM的用法示例。


在下文中一共展示了HMM.train方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: hmm_train

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def hmm_train(results):
	train_file = results.train
	freq_file = results.freq

	logger.debug(	'Started training HMM with options:'	+ "\n" +
					'training file:	' + str(train_file) 	+ "\n" +
					'frequency file:' + str(freq_file)		+ "\n")


	if not os.path.exists('model/hmm-model'):
		classifier = HMM()
		classifier.train(train_file,freq_file)
		logger.info("Done Training, model is written in model file")
		model = classifier.get_theta()
		write_obj(model, 'hmm-model')
	else:
		logger.info('model already exists, nothing to do!')
开发者ID:tarekmehrez,项目名称:POSTagger,代码行数:19,代码来源:main.py

示例2: testSupervisedTraining

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
	def testSupervisedTraining(self):
		"""Test parameter fitting
	
		Come up with a good non-trivial way to test your training function	
		You can use the given icecream dataset or make up your own 
		
		Add your implementation
		"""
		seq1 = Instance(label = ['odd', 'even', 'odd', 'even', 'odd'], data = [3, 2, 1, 4, 1])
		seq2 = Instance(label = ['even', 'even', 'odd', 'odd', 'even'], data = [2, 4, 1, 3, 2])
		seq3 = Instance(label = ['even', 'even', 'odd', 'odd', 'odd'], data = [1, 2, 3, 4, 3])
		seq4 = Instance(label = ['odd', 'odd', 'even', 'even', 'even'], data = [4, 3, 4, 1, 2])
		instances = [seq1, seq2, seq3, seq4]
		hmm = HMM()
		hmm.train(instances)
		mystery = Instance(data = [2, 1, 3, 4, 2, 2, 1, 3])
		labels = hmm.classify_instance(mystery)
		self.assertEqual(labels, ['even', 'odd', 'odd', 'even', 'even', 'even', 'odd', 'odd'], 'NOOO')
开发者ID:flosincapite,项目名称:machine-learning,代码行数:20,代码来源:test_hmm.py

示例3: testViterbi

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
	def testViterbi(self):
		"""Test viterbi algorithm
	
		Come up with a good non-trivial way to test your training function	
		You can use the given icecream dataset or make up your own 
		
		Add your implementation
		"""
		seq1 = Instance(label = "he saw a dog".split(), data = ['NN', 'VB', 'D', 'NN'])
		seq2 = Instance(label = "the dog sees a saw".split(), data = ['D', 'NN', 'VB', 'D', 'NN'])
		seq3 = Instance(label = "the saw saw a dog".split(), data = ['D', 'NN', 'VB', 'D', 'NN'])
		seq4 = Instance(label = "he saw the saw".split(), data = ['NN', 'VB', 'D', 'NN'])
		instances = [seq1, seq2, seq3, seq4]
		hmm = HMM()
		hmm.train(instances)
		sawind = hmm.label_alphabet.get_index('saw')
		self.assertNotEqual(hmm.emission_matrix[sawind, 0], 0, 'WRONG')
		self.assertNotEqual(hmm.emission_matrix[sawind, 1], 0, 'WRONG')
开发者ID:flosincapite,项目名称:machine-learning,代码行数:20,代码来源:test_hmm.py

示例4: __load_dictionary

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
 def __load_dictionary(self, dir_name):
     print('Loading dictionary from ' + dir_name + "...")
     for dir in os.walk(dir_name).next()[1]:
         self.dictionary[dir] = []
         for file in os.walk(dir_name + '/' + dir).next()[2]:
             if file.endswith('.wav'):
                 rate, data = wavfile.read(dir_name + '/' + dir + '/' + file)
                 if self.rate is not None and self.rate != rate:
                     print('Error: Dictionary sampling rate not constant.')
                 self.rate = rate
                 coefficients = self.__get_features(data)
                 self.dictionary[dir].append((coefficients, len(data)))
     print('Done.')
     print('Computing HMMs...')
     for key, value in self.dictionary.items():
         hmm = HMM(key)
         model_size = self.__get_model_size([len(x[0]) for x in value])
         hmm.train(model_size, [x[0] for x in value])
         self.hmms.append(hmm)
         print('Trained {0} with {1} states'.format(key, model_size))
     print('Done.')
开发者ID:doannguyen94,项目名称:speech_recognition,代码行数:23,代码来源:speech_recognizer.py

示例5: TagHMM

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
class TagHMM(TestCase):
    """Train and test an HMM POS tagger."""

    def setUp(self):
        self.train, self.test = self.split_sents()
        self.hmm = HMM()
        self.hmm.train(self.train)

    def split_sents(self, train=0.95, total=3500,
                    document_class=TaggedSentence):
        sents = tagged_corpus.tagged_sents()[:total]
        total = len(sents) if total is None else total
        i = int(round(train * total))
        j = i + int(round(total - train * total))
        return (map(document_class, sents[0:i]),
                map(document_class, sents[i:j]))

    def accuracy(self, test_sents, verbose=sys.stderr):
        """Compute accuracy of the HMM tagger on the given sentences."""
        total = correct = 0
        for sent in test_sents:
            tags = self.hmm.classify(sent)
            total += len(tags)
            for guess, tag in zip(tags, sent.label):
                correct += (guess == tag)
        if verbose:
            print >> verbose, "%.2d%% " % (100 * correct / total),
        return correct / total

    @skip("too slow")
    def test_tag_train(self):
        """Tag the training data"""
        self.assertGreater(self.accuracy(self.train), 0.85)

    def test_tag(self):
        """Tag the test data"""
        self.assertGreater(self.accuracy(self.test), 0.85)
开发者ID:KechenQin,项目名称:Hidden_Markov_Model,代码行数:39,代码来源:test_hmm.py

示例6: train_model

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def train_model(model_name, num_states, data):
    hmm = HMM(num_states)
    wordmap, init, trans, emiss = hmm.train(data, epsilon=0.01)
    save_model(model_name, trans, emiss, wordmap, init)
    
开发者ID:hatooku,项目名称:shakespeare-learn,代码行数:6,代码来源:hmm_helper.py

示例7: Retrainer

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
class Retrainer(object):
    def __init__(self, raw_segments, observation_sequences, label_sequences):
        super(Retrainer, self).__init__()
        self.raw_segments = raw_segments
        self.observation_sequences = observation_sequences
        self.label_sequences = label_sequences
        self.hmm_new = None
        self.feature_entity_list = FeatureEntityList()
        self.lm = LanguageModel()
        self.boosting_feature_generator = BoostingFeatureGenerator()

        self.DOMINANT_RATIO = 0.85  # dominant label ratio: set empirically

        self.retrain_with_boosting_features()
    
    def retrain(self):
        self.hmm_new = HMM('retrainer', 6)
        self.hmm_new.train(self.observation_sequences, self.label_sequences, useLaplaceRule=False)  #important to set laplace to be no
    
    # With new features
    def retrain_with_boosting_features(self):
        # Build language model
        for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
            for token, label in zip(Tokens(raw_segment).tokens, label_sequence):
                self.lm.add(token, label)
        self.lm.prettify()
        self.token_BGM = self.lm.prettify_model
        self.pattern_BGM = None

        # Retrain
        self.hmm_new = HMM('retrainer', 6)
        partial_features = []
        for raw_segment in self.raw_segments:
            partial_features.append(BoostingFeatureGenerator(raw_segment, self.token_BGM, self.pattern_BGM).features)
        self.hmm_new.train(partial_features, self.label_sequences, useLaplaceRule=False)
        self.observation_sequences = partial_features


    def run(self):
        i = 0
        self.new_labels = []
        for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
            new_labels = self.hmm_new.decode(raw_segment)[1]
            self.new_labels.append(new_labels)
            tokens = Tokens(raw_segment).tokens
            feature_vectors = FeatureGenerator(raw_segment).features
            print i, ':  ', raw_segment
            for token, old_label, new_label, feature_vector in zip(tokens, label_sequence, new_labels, feature_vectors):
                print to_label(old_label), '\t', to_label(new_label), '\t', token
                self.feature_entity_list.add_entity(feature_vector, old_label, token)   #???? Old label first
            print '\n'
            i+=1

    def find_pattern(self):        
        self.hmm_new.feature_entity_list.print_all_entity()


    # Find the first tokens at VN boundaries
    def find_venue_boundary_tokens(self):
        recorder = {}
        for raw_segment, observation_sequence, label_sequence in zip(self.raw_segments, self.observation_sequences, self.label_sequences):
            first_target_label_flag = True
            tokens = Tokens(raw_segment).tokens
            for token, feature_vector, label in zip(tokens, observation_sequence, label_sequence):
                # First meet a VN label
                if label == 4 and first_target_label_flag:
                    key = token.lower()
                    if not key.islower():
                        continue
                    if recorder.has_key(key):
                        recorder[key] += 1
                    else:
                        recorder[key] = 1
                    first_target_label_flag = False

                elif (first_target_label_flag is False) and label in [0,1,3]:
                    first_target_label_flag = True

        for k,v in recorder.iteritems():
            print k, '\t', v
        return recorder


    # Learn the general order of structure of publications before moving forward
    def find_majority_structure(self):
        first_bit_counter = {'0': 0, '3': 0, '4':0, '5':0}
        overall_pattern_counter = {}
        for label_sequence in self.label_sequences:
            label = label_sequence[0]
            if label == 2:
                continue
            elif label == 5:
                continue
            elif label in [0,1]:
                first_bit_counter['0'] += 1
            else:
                first_bit_counter[str(label)] += 1

            pattern = []
            for label in label_sequence:
#.........这里部分代码省略.........
开发者ID:xiaoyao1991,项目名称:hmmpy,代码行数:103,代码来源:retrainer.py

示例8: HMM

# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
model = HMM()
model.init_random(3,4)

print model.trans
for line in model.trans:
    print sum(line)

print model.emit
for line in model.emit:
    print sum(line)

def dataFormatter(filename):
    with open(filename,'r') as f:
        tmp = [ list(s) for s in f.read().split()]
        sample = []
        for seq in tmp:
            seq = map(lambda x:ord(x)-ord('A'), seq)
            sample.append(seq)
        return sample

#sample = sample[0]
sample = dataFormatter('hmm_test1.in')
print sample

model.train(sample)

forEvaluation = dataFormatter('hmm_evaluation.in')

for seq in forEvaluation:
    print model.evaluate(seq)
开发者ID:uronce-cc,项目名称:Cpattern,代码行数:32,代码来源:hmm_test.py


注:本文中的hmm.HMM.train方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。