本文整理汇总了Python中viterbi.viterbi函数的典型用法代码示例。如果您正苦于以下问题:Python viterbi函数的具体用法?Python viterbi怎么用?Python viterbi使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了viterbi函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learning
def learning(T):
weight = defaultdict(lambda : uniform(-1.0, 1.0))
data = []
possible_tags, transition = set(["<s>", "</s>"]), set()
for line in iter(sys.stdin.readline, ""):
X, Y = [], []
pre_y = "<s>"
for x_y in line.rstrip().split():
(x, y) = x_y.split('_')
X.append(x)
Y.append(y)
possible_tags.add(y)
transition.add(" ".join([pre_y, y]))
pre_y = y
transition.add(" ".join([pre_y, "</s>"]) )
data.append((X, Y))
data_size = len(data)
for t in range(T):
for line_num, (X, Y_prime) in enumerate(data):
sys.stdout.write("\rIteration %d, linenum %d / %d" % (t+1, line_num+1, data_size))
sys.stdout.flush()
Y_hat = viterbi(weight, X, possible_tags, transition)
phi_prime = create_feature(X, Y_prime)
phi_hat = create_feature(X, Y_hat)
update_weight(weight, phi_prime, phi_hat)
return (weight, possible_tags, transition)
示例2: evaluate
def evaluate(model, examples, gold, label=None):
output = list(model.predict({'input': examples },
batch_size=config.batch_size)['output'])
pred = np.argmax(np.asarray(output), axis=2).flatten()
vpred = viterbi.viterbi(np.concatenate(output), *viterbi_probabilities)
return (common.classification_summary(gold, pred) + '\n' +
'w/viterbi ' + common.classification_summary(gold, vpred))
示例3: seg
def seg(inp):
"segmenter main function"
tail = ""
if len(inp) % 2 != 0:
tail = inp[-1]
inp = inp[:-1]
#load wubimap
for line in wubi:
wubi_map[line[0]] = int(line[1])
#all segments here
seg = []
all_seg(inp,seg,0,tail)
#find viterbi path every segment
vit = []
for segline in seg:
ans = vt.viterbi(segline,uni_map, big_map, wd_map, wubi)
if not ans == None:
#ans[1] = ans[1] - 20*len(ans[0])
vit.append(ans)
vit.sort(key=lambda path: path[1], reverse=False)
for v in vit:
#print(v)
pass
#return max viterbi path
if len(vit) > 0:
return vit[-1][0]
else:
return False
示例4: viterbi_decode
def viterbi_decode(self, seq):
node_potentials, edge_potentials = self.build_potentials(seq)
viterbi_path, _ = viterbi(node_potentials, edge_potentials)
res = viterbi_path
new_seq = seq.copy_sequence()
new_seq.y = res
return new_seq
示例5: decode
def decode(self, initials):
timer = Timer()
states = set()
for obs in initials:
states.update(self.words_by_letter[obs])
logger.info("Searching %s possible states", len(states))
result = viterbi.viterbi(initials, states, self.start_p, self.transition_p, self.emission_p)
logger.info("Decoding %r took %s s", initials, timer.elapsed())
return result
示例6: t_BMES
def t_BMES():
PI, A, B = build()
S = B.keys()
for k in S:
if k not in PI:
PI[k] = 0.0
for sen in samples:
Y = tuple(sen)
prob, X = viterbi(Y, S, PI, A, B)
print u''.join(sen[i] + (X[i] in 'ES' and '|' or '') for i in xrange(len(sen)))
示例7: test_trellis
def test_trellis(self):
_, actual, _ = viterbi(self.obs, self.A, self.B, self.pi)
expected = LMatrix(("H", "L"),
xrange(len(self.obs)),
data = np.array([
[ -2.737, -5.474, -8.211, -11.533, -14.007, -17.329, -19.54, -22.862, -25.657],
[ -3.322, -6.059, -8.796, -10.948, -14.007, -16.481, -19.54, -22.014, -24.487]
])
)
for s in actual.rlabels:
for t in actual.clabels:
self.assertAlmostEqual(actual[s,t], expected[s,t], 3)
示例8: evaluate
def evaluate():
global possible_tags
global strings
global cca_length
get_words()
get_strings()
get_alpha()
get_phi()
get_regExp()
# get_codeWords()
get_cca()
# cca_length = len(cca1['amended'])
cca_length = 20
data = open('inputs/eng.test{0}'.format(sys.argv[2]), 'r')
s = 'outputs_cca_pos_egw30_rounding_currentOnly/result_{0}_{1}.txt'.format(sys.argv[2], sys.argv[1])
output = open(s, 'w')
line = data.readline()
output.write('{0}\n\n'.format(line.strip()))
line = data.readline()
vals = get_sentence(data)
sentence = vals[0]
correct_tags = vals[1]
POS = vals[2]
count = 0
time1 = 0.0
time2 = 0.0
avg_time = 0.0
time_val = 0.0
first = True
while sentence:
#------------------------
#-------TIME-STATS-------
#------------------------
count += 1
time2 = time()
if not first:
avg_time = (avg_time*(count-1)+(time2-time1))/count
time_val = int((avg_time)*(number_of_sentences-count))
first = False
progress = open('progress_test.txt', 'w')
progress.write('Percent complete:\n{0}/{1} = {2}%\n\nTime remaining: \n{3} h {4} min {5} sec'.format(int(count), int(number_of_sentences), float(count*100)/float(number_of_sentences), time_val/3600, (time_val%3600)/60, time_val%60))
time1 = time2
progress.close()
#--------------------------
#--------------------------
tags = viterbi.viterbi(sentence, POS, phi, possible_tags, alpha, strings, Words, regExp, codes, cca1, cca_length)
for i in range(len(sentence)):
output.write('{0} {1} {2} {3}\n'.format(sentence[i], POS[i][0], correct_tags[i], tags[i]))
output.write('\n')
vals = get_sentence(data)
sentence = vals[0]
correct_tags = vals[1]
POS = vals[2]
示例9: perceptron
def perceptron(print_alpha = 0, mult = 0, import_alpha = 0):
global alpha
global alpha_average
global possible_tags
global strings
global strings_abr
global add_factor
global mult_factor
init_phi_alpha(mult)
get_strings()
if import_alpha:
read_alpha()
alpha_average = copy.deepcopy(alpha)
for t in range(T_DEFAULT):
print '---{0}---'.format(t)
sys.stdout.flush()
dont_repeat = True
data = open(sys.argv[2], 'r')
vals = get_sentence_and_tags(data)
j = 0
while vals:
sentence = vals[0]
correct_tags = vals[1]
result = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, mult)
z = result[0]
indices = result[1]
if not z == correct_tags:
dont_repeat = False
correct_indices = get_indices(sentence, correct_tags)
if mult:
for i in indices:
alpha[i] = float(alpha[i])/mult_factor
for i in correct_indices:
alpha[i] = float(alpha[i])*mult_factor
else:
for i in indices:
alpha[i] += -1*add_factor
for i in correct_indices:
alpha[i] += add_factor
else:
j += 1
for i in range(len(alpha)):
alpha_average[i] += alpha[i]
vals = get_sentence_and_tags(data)
data.close()
if dont_repeat:
print 'SUCCESS!!!'
break
# print 'number correct: {0}'.format(j)
if print_alpha:
write_alpha(t)
示例10: run
def run(self):
if self.isTest:
print "Running HMM"
h = HiddenMarkovModel(self.train_file,smoothed=self.smoothing)
print "Running Viterbi"
toc = time.clock()
predicted = viterbi(h,self.test_file, test = False)
tic = time.clock()
print "Viterbi ran in %f seconds"%(tic-toc)
actual, tokens = zip(*self.parse_file(self.test_answers))
return (predicted,actual,tokens)
else:
print "Splitting Data"
(train,test) = self.splitCV(self.parse_file(self.train_file),self.cv_validation_percentage)
print "Converting Lists"
train_text = "".join(["%s %s\n" % (p,t) for [p,t] in train])
test_text = "".join(["%s\n" % t for [p,t] in test])
print "Running HMM"
h = HiddenMarkovModel(text=train_text, smoothed=self.smoothing)
print "Running Viterbi"
predicted = viterbi(h,text=test_text, test=False)
actual = self.getActual(test)
return (predicted,actual)
示例11: t_wordseg
def t_wordseg():
PI, A, B = build(True)
for k in B.keys():
if '|' == k[-1]:
B[k[:-1]] = {k[:-1]: 1.0}
else:
B[k + '|'] = B[k]
S = B.keys()
for k in S:
if k not in PI:
PI[k] = 0.0
for sen in samples:
Y = tuple(sen)
prob, X = viterbi(Y, S, PI, A, B)
print u''.join(X)
示例12: __cut
def __cut(sentence):
prob, pos_list = viterbi.viterbi(sentence,char_state_tab_P, start_P, trans_P, emit_P)
begin, next = 0,0
for i,char in enumerate(sentence):
pos = pos_list[i][0]
if pos=='B':
begin = i
elif pos=='E':
yield pair(sentence[begin:i+1], pos_list[i][1])
next = i+1
elif pos=='S':
yield pair(char,pos_list[i][1])
next = i+1
if next<len(sentence):
yield pair(sentence[next:], pos_list[next][1] )
示例13: viterbi_run
def viterbi_run(training, test_file):
#returns a list of sentence list containing tuples (word,part of speech)
corpus_list = viterbi.corpus_list(training)
#creates a dictionary of corpus part of speech tag : occurences
corpus_dictionary = viterbi.corpus_dictionary(training)
#pos_keys
keys = viterbi.key_list(corpus_dictionary)
#creates the prior_probabilities transitions table for the entire corpus
prior_probabilities_table = viterbi.transition_table(corpus_dictionary,corpus_list)
#creates a word dictionary
#word: list of part of speeches and increment occurences of word as part of speech
word_dic = viterbi.word_dic(corpus_list,keys)
#word_keys
words = viterbi.key_list(word_dic)
#likelihood_table
likelihood_table = viterbi.word_freq(corpus_dictionary,word_dic)
#Emissions and Transitions
sentences = viterbi.corpus_list_2(test_file)
error_list = []
error_list_i = []
new_sentences = []
count = 0
for sentence in sentences:
trans = viterbi.sentence_tag(sentence,keys,words,likelihood_table)
s_pos = viterbi.sentence_pos(trans)
transition_table = viterbi.transition_probabilities(trans,s_pos,prior_probabilities_table,keys)
observed_like = viterbi.observed_likelihoods(sentence,s_pos,trans,likelihood_table,words,keys)
vit_sent = viterbi.viterbi(observed_like,sentence,s_pos,transition_table)
示例14: evaluate
def evaluate():
global possible_tags
global strings
global strings_abr
get_words()
get_strings()
get_alpha()
get_phi()
get_regExp()
data = open(sys.argv[4], 'r')
output = open(sys.argv[5], 'w')
sentence = get_sentence(data)
while sentence:
tags = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
for i in range(len(sentence)):
output.write('{} {}\n'.format(sentence[i], tags[i]))
output.write('\n')
sentence = get_sentence(data)
示例15: gen_couplet
def gen_couplet(transition_prob_tree, output_prob_tree, unigram_freq, first_half):
assert type(first_half) == unicode
couplet_length = len(first_half)
visible_words = np.array([first_half[i] for i in range (couplet_length)])
hidden_candidate_words = np.array([u' ' for _ in range(top_k_word*couplet_length)]).reshape(top_k_word, couplet_length)
output_prob = np.random.rand(top_k_word, couplet_length)
for i in range(couplet_length):
key = first_half[i]
if not output_prob_tree.has_key(key):
print '%s, Cannot generate couplet' % key
return ''
hash_leaf = output_prob_tree[key]
hidden_candidate_words[:,i], output_prob[:,i] = gen_candidates(first_half, hash_leaf, top_k_word)
for i in range(couplet_length):
candidate = u''
for j in range(top_k_word):
candidate += hidden_candidate_words[j, i]
try:
transition_prob, init_prob = init_model(transition_prob_tree, unigram_freq, hidden_candidate_words, top_k_word)
except:
return ''
optimal_path, prob = viterbi(transition_prob, output_prob, init_prob, [], visible_words, top_k_word, top_k_candidate)
optimal_path = deal_repeat(first_half, optimal_path)
results = []
for i in range(optimal_path.shape[0]):
second_half = ''
for j in range(optimal_path.shape[1]):
second_half += hidden_candidate_words[optimal_path[i, j], j]
score = ranking_function(output_prob_tree, first_half, second_half)
results.append((score, second_half))
results = sorted(results, reverse=True)[:top_k_output]
return results