本文整理汇总了Python中hmm.HMM.decode方法的典型用法代码示例。如果您正苦于以下问题:Python HMM.decode方法的具体用法?Python HMM.decode怎么用?Python HMM.decode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hmm.HMM
的用法示例。
在下文中一共展示了HMM.decode方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import decode [as 别名]
def main():
hmm = HMM(*train(sys.argv[1]))
with open(sys.argv[2]) as f:
correct = 0
wrong = 0
correct_sents = 0
wrong_sents = 0
correct_known = 0
wrong_known = 0
for i, sent in enumerate(Reader(f)):
prob, path = hmm.decode([word for (word, pos) in sent])
correct1 = 0
wrong1 = 0
for (gold, predicted) in zip(sent, path):
if gold == predicted:
correct1 += 1
else:
wrong1 += 1
print('%e\t%.3f\t%s' % (prob, correct1 / (correct1 + wrong1), ' '.join('%s/%s' % pair for pair in path)))
if prob > 0:
correct_sents += 1
correct_known += correct1
wrong_known += wrong1
else:
wrong_sents += 1
correct += correct1
wrong += wrong1
print("Correctly tagged words: %s" % (correct / (correct + wrong)))
print("Sentences with non-zero probability: %s" % (correct_sents / (correct_sents + wrong_sents)))
print("Correctly tagged words when only considering sentences with non-zero probability: %s" % (correct_known / (correct_known + wrong_known)))
示例2: main
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import decode [as 别名]
def main():
hmm = HMM(3, ('up', 'down', 'unchanged'),
initial_probability=[0.5, 0.2, 0.3],
transition_probability=[[0.6, 0.2, 0.2],
[0.5, 0.3, 0.2],
[0.4, 0.1, 0.5]],
observation_probability=[[0.7, 0.1, 0.2],
[0.1, 0.6, 0.3],
[0.3, 0.3, 0.4]])
observation = ("up", "up", "unchanged", "down", "unchanged", "down", "up")
ob_length = len(observation)
p, _ = hmm.forward(observation, ob_length)
path = hmm.decode(observation, ob_length)
print("P{} = {:.13f}".format(tuple(observation), p))
print("Observation sequence =", tuple(i+1 for i in path))
示例3: HMMClassifier
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import decode [as 别名]
class HMMClassifier(object):
def __init__(self, **kwarg): # lname, url, other prior knowledge
super(HMMClassifier, self).__init__()
self.HMMauthor = HMM('author', 2)
self.HMMvenue = HMM('venue', 2) # Not important
self.HMMentire = HMM('entire', 6) # Set empirically
self.observations_raw = []
self.observation_sequences = []
self.labels = []
def predict(self, segment):
author_likelihood = self.HMMauthor.evaluate(segment)
venue_likelihood = self.HMMvenue.evaluate(segment)
print segment
print 'author likelihood:\t' , author_likelihood
print 'venue likelihood:\t' , venue_likelihood
def decode(self, segment):
# print segment
observation_sequence, decoded_sequence = self.HMMentire.decode(segment)
self.observations_raw.append(segment)
self.observation_sequences.append(observation_sequence)
self.labels.append(decoded_sequence)
# segment the labeling into parts
author_field = []
title_field = []
venue_field = []
year_field = []
raw_tokens = Tokens(segment).tokens
for i in range(len(decoded_sequence)):
token_i = raw_tokens[i]
label_i = decoded_sequence[i]
if label_i in [0,1]:
author_field.append(token_i)
if label_i == 2:
continue
if label_i == 3:
title_field.append(token_i)
if label_i == 4:
venue_field.append(token_i)
if label_i == 5:
year_field.append(token_i)
return ' '.join(author_field), ' '.join(title_field), ' '.join(venue_field), list(set(year_field))
# Additional step: to calculate the overall sum of P(X1|FN,LN,DL...) + P(X2|TI,TI,TI...) + P(X3|VN,VN,VN...) + P(X4|DT)
# 1. Find boundaries:
# boundaries = [[], [], []]
# label_ranges = [[0,1,2], [3], [2,4,5]]
# for i in range(len(label_ranges)):
# label_range = label_ranges[i]
# for j in range(len(decoded_sequence)):
# if decoded_sequence[j] in label_range:
# boundaries[i].append()
def decode_without_constraints(self, segment):
print segment
observation_sequence, decoded_sequence = self.HMMentire.decode_without_constraints(segment)
self.observations_raw.append(segment)
self.observation_sequences.append(observation_sequence)
self.labels.append(decoded_sequence)
for vector, decoding, token in zip(observation_sequence, decoded_sequence, Tokens(segment).tokens):
if decoding == 0:
label = 'FN'
elif decoding == 1:
label = 'LN'
elif decoding == 2:
label = 'DL'
elif decoding == 3:
label = 'TI'
elif decoding == 4:
label = 'VN'
elif decoding == 5:
label = 'YR'
else:
label = str(decoding) + ', PROBLEM'
print vector, '\t', label, '\t', token
print '\n\n'
def cross_correct(self):
absolute_correct = []
absolute_wrong = []
# 1. Confirm what's the big structure of the publication inside this specific domain
counter = {}
for l in self.labels:
first_label = str(l[0])
if counter.has_key(first_label):
counter[first_label] += 1
else:
counter[first_label] = 1
sorted_counter = sorted(counter.iteritems(), key=operator.itemgetter(1), reverse=True)
print 'First labels distribution: ', sorted_counter
#.........这里部分代码省略.........
示例4: Retrainer
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import decode [as 别名]
class Retrainer(object):
def __init__(self, raw_segments, observation_sequences, label_sequences):
super(Retrainer, self).__init__()
self.raw_segments = raw_segments
self.observation_sequences = observation_sequences
self.label_sequences = label_sequences
self.hmm_new = None
self.feature_entity_list = FeatureEntityList()
self.lm = LanguageModel()
self.boosting_feature_generator = BoostingFeatureGenerator()
self.DOMINANT_RATIO = 0.85 # dominant label ratio: set empirically
self.retrain_with_boosting_features()
def retrain(self):
self.hmm_new = HMM('retrainer', 6)
self.hmm_new.train(self.observation_sequences, self.label_sequences, useLaplaceRule=False) #important to set laplace to be no
# With new features
def retrain_with_boosting_features(self):
# Build language model
for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
for token, label in zip(Tokens(raw_segment).tokens, label_sequence):
self.lm.add(token, label)
self.lm.prettify()
self.token_BGM = self.lm.prettify_model
self.pattern_BGM = None
# Retrain
self.hmm_new = HMM('retrainer', 6)
partial_features = []
for raw_segment in self.raw_segments:
partial_features.append(BoostingFeatureGenerator(raw_segment, self.token_BGM, self.pattern_BGM).features)
self.hmm_new.train(partial_features, self.label_sequences, useLaplaceRule=False)
self.observation_sequences = partial_features
def run(self):
i = 0
self.new_labels = []
for raw_segment, label_sequence in zip(self.raw_segments, self.label_sequences):
new_labels = self.hmm_new.decode(raw_segment)[1]
self.new_labels.append(new_labels)
tokens = Tokens(raw_segment).tokens
feature_vectors = FeatureGenerator(raw_segment).features
print i, ': ', raw_segment
for token, old_label, new_label, feature_vector in zip(tokens, label_sequence, new_labels, feature_vectors):
print to_label(old_label), '\t', to_label(new_label), '\t', token
self.feature_entity_list.add_entity(feature_vector, old_label, token) #???? Old label first
print '\n'
i+=1
def find_pattern(self):
self.hmm_new.feature_entity_list.print_all_entity()
# Find the first tokens at VN boundaries
def find_venue_boundary_tokens(self):
recorder = {}
for raw_segment, observation_sequence, label_sequence in zip(self.raw_segments, self.observation_sequences, self.label_sequences):
first_target_label_flag = True
tokens = Tokens(raw_segment).tokens
for token, feature_vector, label in zip(tokens, observation_sequence, label_sequence):
# First meet a VN label
if label == 4 and first_target_label_flag:
key = token.lower()
if not key.islower():
continue
if recorder.has_key(key):
recorder[key] += 1
else:
recorder[key] = 1
first_target_label_flag = False
elif (first_target_label_flag is False) and label in [0,1,3]:
first_target_label_flag = True
for k,v in recorder.iteritems():
print k, '\t', v
return recorder
# Learn the general order of structure of publications before moving forward
def find_majority_structure(self):
first_bit_counter = {'0': 0, '3': 0, '4':0, '5':0}
overall_pattern_counter = {}
for label_sequence in self.label_sequences:
label = label_sequence[0]
if label == 2:
continue
elif label == 5:
continue
elif label in [0,1]:
first_bit_counter['0'] += 1
else:
first_bit_counter[str(label)] += 1
pattern = []
for label in label_sequence:
#.........这里部分代码省略.........