本文整理汇总了Python中hmm.HMM.train方法的典型用法代码示例。如果您正苦于以下问题:Python HMM.train方法的具体用法?Python HMM.train怎么用?Python HMM.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hmm.HMM
的用法示例。
在下文中一共展示了HMM.train方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: hmm_train
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def hmm_train(results):
train_file = results.train
freq_file = results.freq
logger.debug( 'Started training HMM with options:' + "\n" +
'training file: ' + str(train_file) + "\n" +
'frequency file:' + str(freq_file) + "\n")
if not os.path.exists('model/hmm-model'):
classifier = HMM()
classifier.train(train_file,freq_file)
logger.info("Done Training, model is written in model file")
model = classifier.get_theta()
write_obj(model, 'hmm-model')
else:
logger.info('model already exists, nothing to do!')
示例2: testSupervisedTraining
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def testSupervisedTraining(self):
"""Test parameter fitting
Come up with a good non-trivial way to test your training function
You can use the given icecream dataset or make up your own
Add your implementation
"""
seq1 = Instance(label = ['odd', 'even', 'odd', 'even', 'odd'], data = [3, 2, 1, 4, 1])
seq2 = Instance(label = ['even', 'even', 'odd', 'odd', 'even'], data = [2, 4, 1, 3, 2])
seq3 = Instance(label = ['even', 'even', 'odd', 'odd', 'odd'], data = [1, 2, 3, 4, 3])
seq4 = Instance(label = ['odd', 'odd', 'even', 'even', 'even'], data = [4, 3, 4, 1, 2])
instances = [seq1, seq2, seq3, seq4]
hmm = HMM()
hmm.train(instances)
mystery = Instance(data = [2, 1, 3, 4, 2, 2, 1, 3])
labels = hmm.classify_instance(mystery)
self.assertEqual(labels, ['even', 'odd', 'odd', 'even', 'even', 'even', 'odd', 'odd'], 'NOOO')
示例3: testViterbi
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def testViterbi(self):
"""Test viterbi algorithm
Come up with a good non-trivial way to test your training function
You can use the given icecream dataset or make up your own
Add your implementation
"""
seq1 = Instance(label = "he saw a dog".split(), data = ['NN', 'VB', 'D', 'NN'])
seq2 = Instance(label = "the dog sees a saw".split(), data = ['D', 'NN', 'VB', 'D', 'NN'])
seq3 = Instance(label = "the saw saw a dog".split(), data = ['D', 'NN', 'VB', 'D', 'NN'])
seq4 = Instance(label = "he saw the saw".split(), data = ['NN', 'VB', 'D', 'NN'])
instances = [seq1, seq2, seq3, seq4]
hmm = HMM()
hmm.train(instances)
sawind = hmm.label_alphabet.get_index('saw')
self.assertNotEqual(hmm.emission_matrix[sawind, 0], 0, 'WRONG')
self.assertNotEqual(hmm.emission_matrix[sawind, 1], 0, 'WRONG')
示例4: __load_dictionary
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def __load_dictionary(self, dir_name):
print('Loading dictionary from ' + dir_name + "...")
for dir in os.walk(dir_name).next()[1]:
self.dictionary[dir] = []
for file in os.walk(dir_name + '/' + dir).next()[2]:
if file.endswith('.wav'):
rate, data = wavfile.read(dir_name + '/' + dir + '/' + file)
if self.rate is not None and self.rate != rate:
print('Error: Dictionary sampling rate not constant.')
self.rate = rate
coefficients = self.__get_features(data)
self.dictionary[dir].append((coefficients, len(data)))
print('Done.')
print('Computing HMMs...')
for key, value in self.dictionary.items():
hmm = HMM(key)
model_size = self.__get_model_size([len(x[0]) for x in value])
hmm.train(model_size, [x[0] for x in value])
self.hmms.append(hmm)
print('Trained {0} with {1} states'.format(key, model_size))
print('Done.')
示例5: TagHMM
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
class TagHMM(TestCase):
"""Train and test an HMM POS tagger."""
def setUp(self):
self.train, self.test = self.split_sents()
self.hmm = HMM()
self.hmm.train(self.train)
def split_sents(self, train=0.95, total=3500,
document_class=TaggedSentence):
sents = tagged_corpus.tagged_sents()[:total]
total = len(sents) if total is None else total
i = int(round(train * total))
j = i + int(round(total - train * total))
return (map(document_class, sents[0:i]),
map(document_class, sents[i:j]))
def accuracy(self, test_sents, verbose=sys.stderr):
"""Compute accuracy of the HMM tagger on the given sentences."""
total = correct = 0
for sent in test_sents:
tags = self.hmm.classify(sent)
total += len(tags)
for guess, tag in zip(tags, sent.label):
correct += (guess == tag)
if verbose:
print >> verbose, "%.2d%% " % (100 * correct / total),
return correct / total
@skip("too slow")
def test_tag_train(self):
"""Tag the training data"""
self.assertGreater(self.accuracy(self.train), 0.85)
def test_tag(self):
"""Tag the test data"""
self.assertGreater(self.accuracy(self.test), 0.85)
示例6: train_model
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
def train_model(model_name, num_states, data):
hmm = HMM(num_states)
wordmap, init, trans, emiss = hmm.train(data, epsilon=0.01)
save_model(model_name, trans, emiss, wordmap, init)
示例7: Retrainer
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
class Retrainer(object):
def __init__(self, raw_segments, observation_sequences, label_sequences):
super(Retrainer, self).__init__()
self.raw_segments = raw_segments
self.observation_sequences = observation_sequences
self.label_sequences = label_sequences
self.hmm_new = None
self.feature_entity_list = FeatureEntityList()
self.lm = LanguageModel()
self.boosting_feature_generator = BoostingFeatureGenerator()
self.DOMINANT_RATIO = 0.85 # dominant label ratio: set empirically
self.retrain_with_boosting_features()
def retrain(self):
self.hmm_new = HMM('retrainer', 6)
self.hmm_new.train(self.observation_sequences, self.label_sequences, useLaplaceRule=False) #important to set laplace to be no
# With new features
def retrain_with_boosting_features(self):
# Build language model
for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
for token, label in zip(Tokens(raw_segment).tokens, label_sequence):
self.lm.add(token, label)
self.lm.prettify()
self.token_BGM = self.lm.prettify_model
self.pattern_BGM = None
# Retrain
self.hmm_new = HMM('retrainer', 6)
partial_features = []
for raw_segment in self.raw_segments:
partial_features.append(BoostingFeatureGenerator(raw_segment, self.token_BGM, self.pattern_BGM).features)
self.hmm_new.train(partial_features, self.label_sequences, useLaplaceRule=False)
self.observation_sequences = partial_features
def run(self):
i = 0
self.new_labels = []
for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
new_labels = self.hmm_new.decode(raw_segment)[1]
self.new_labels.append(new_labels)
tokens = Tokens(raw_segment).tokens
feature_vectors = FeatureGenerator(raw_segment).features
print i, ': ', raw_segment
for token, old_label, new_label, feature_vector in zip(tokens, label_sequence, new_labels, feature_vectors):
print to_label(old_label), '\t', to_label(new_label), '\t', token
self.feature_entity_list.add_entity(feature_vector, old_label, token) #???? Old label first
print '\n'
i+=1
def find_pattern(self):
self.hmm_new.feature_entity_list.print_all_entity()
# Find the first tokens at VN boundaries
def find_venue_boundary_tokens(self):
recorder = {}
for raw_segment, observation_sequence, label_sequence in zip(self.raw_segments, self.observation_sequences, self.label_sequences):
first_target_label_flag = True
tokens = Tokens(raw_segment).tokens
for token, feature_vector, label in zip(tokens, observation_sequence, label_sequence):
# First meet a VN label
if label == 4 and first_target_label_flag:
key = token.lower()
if not key.islower():
continue
if recorder.has_key(key):
recorder[key] += 1
else:
recorder[key] = 1
first_target_label_flag = False
elif (first_target_label_flag is False) and label in [0,1,3]:
first_target_label_flag = True
for k,v in recorder.iteritems():
print k, '\t', v
return recorder
# Learn the general order of structure of publications before moving forward
def find_majority_structure(self):
first_bit_counter = {'0': 0, '3': 0, '4':0, '5':0}
overall_pattern_counter = {}
for label_sequence in self.label_sequences:
label = label_sequence[0]
if label == 2:
continue
elif label == 5:
continue
elif label in [0,1]:
first_bit_counter['0'] += 1
else:
first_bit_counter[str(label)] += 1
pattern = []
for label in label_sequence:
#.........这里部分代码省略.........
示例8: HMM
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import train [as 别名]
model = HMM()
model.init_random(3,4)
print model.trans
for line in model.trans:
print sum(line)
print model.emit
for line in model.emit:
print sum(line)
def dataFormatter(filename):
with open(filename,'r') as f:
tmp = [ list(s) for s in f.read().split()]
sample = []
for seq in tmp:
seq = map(lambda x:ord(x)-ord('A'), seq)
sample.append(seq)
return sample
#sample = sample[0]
sample = dataFormatter('hmm_test1.in')
print sample
model.train(sample)
forEvaluation = dataFormatter('hmm_evaluation.in')
for seq in forEvaluation:
print model.evaluate(seq)