当前位置: 首页>>代码示例>>Python>>正文


Python viterbi.viterbi函数代码示例

本文整理汇总了Python中viterbi.viterbi函数的典型用法代码示例。如果您正苦于以下问题:Python viterbi函数的具体用法?Python viterbi怎么用?Python viterbi使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了viterbi函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: learning

def learning(T):
    weight = defaultdict(lambda : uniform(-1.0, 1.0))
    data = []
    possible_tags, transition = set(["<s>", "</s>"]), set()
    for line in iter(sys.stdin.readline, ""):
        X, Y = [], []
        pre_y = "<s>"
        for x_y in line.rstrip().split():
            (x, y) = x_y.split('_')
            X.append(x)
            Y.append(y)
            possible_tags.add(y)
            transition.add(" ".join([pre_y, y]))
            pre_y = y
        transition.add(" ".join([pre_y, "</s>"]) )
        data.append((X, Y))
    data_size = len(data)
    for t in range(T):
        for line_num, (X, Y_prime) in enumerate(data):
            sys.stdout.write("\rIteration %d, linenum %d / %d" % (t+1, line_num+1, data_size))
            sys.stdout.flush()
            Y_hat = viterbi(weight, X, possible_tags, transition)
            phi_prime = create_feature(X, Y_prime)
            phi_hat = create_feature(X, Y_hat)
            update_weight(weight, phi_prime, phi_hat)
    return (weight, possible_tags, transition)
开发者ID:biligee,项目名称:nlp_programming_tutorial,代码行数:26,代码来源:learning.py

示例2: evaluate

def evaluate(model, examples, gold, label=None):
    output = list(model.predict({'input': examples },
                                batch_size=config.batch_size)['output'])
    pred = np.argmax(np.asarray(output), axis=2).flatten()
    vpred = viterbi.viterbi(np.concatenate(output), *viterbi_probabilities)
    return (common.classification_summary(gold, pred) + '\n' +
            'w/viterbi ' + common.classification_summary(gold, vpred))
开发者ID:billy322,项目名称:BioNLP-2016,代码行数:7,代码来源:rnn.py

示例3: seg

def seg(inp):
	"segmenter main function"
	tail = ""
	if len(inp) % 2 != 0:
		tail = inp[-1]
		inp = inp[:-1]
		
	#load wubimap
	for line in wubi:
		wubi_map[line[0]] = int(line[1])
	
	#all segments here
	seg = []
	all_seg(inp,seg,0,tail)
	
	#find viterbi path every segment
	vit = []
	for segline in seg:
		ans = vt.viterbi(segline,uni_map, big_map, wd_map, wubi)
		if not ans == None:
			#ans[1] = ans[1] - 20*len(ans[0])
			vit.append(ans)
	
	vit.sort(key=lambda path: path[1], reverse=False)
	
	for v in vit:
		#print(v)
		pass
	
	#return max viterbi path
	if len(vit) > 0:
		return vit[-1][0]
	else:
		return False
开发者ID:ChestnutHeng,项目名称:Wubi-Kernel,代码行数:34,代码来源:segment.py

示例4: viterbi_decode

 def viterbi_decode(self, seq):
     node_potentials, edge_potentials = self.build_potentials(seq)
     viterbi_path, _ = viterbi(node_potentials, edge_potentials)
     res = viterbi_path
     new_seq = seq.copy_sequence()
     new_seq.y = res
     return new_seq
开发者ID:Joao-M-Almeida,项目名称:lxmls-toolkit,代码行数:7,代码来源:hmm_n_order.py

示例5: decode

 def decode(self, initials):
     timer = Timer()
     states = set()
     for obs in initials:
         states.update(self.words_by_letter[obs])
     logger.info("Searching %s possible states", len(states))
     result = viterbi.viterbi(initials, states, self.start_p, self.transition_p, self.emission_p)
     logger.info("Decoding %r took %s s", initials, timer.elapsed())
     return result
开发者ID:shirlston,项目名称:initialisms,代码行数:9,代码来源:decode.py

示例6: t_BMES

def t_BMES():
    PI, A, B = build()
    S = B.keys()
    for k in S:
        if k not in PI:
            PI[k] = 0.0
    for sen in samples:
        Y = tuple(sen)
        prob, X = viterbi(Y, S, PI, A, B)
        print u''.join(sen[i] + (X[i] in 'ES' and '|' or '') for i in xrange(len(sen)))
开发者ID:Catentropy,项目名称:mylab,代码行数:10,代码来源:t_wordseg.py

示例7: test_trellis

 def test_trellis(self):
     _, actual, _ = viterbi(self.obs, self.A, self.B, self.pi)
     expected = LMatrix(("H", "L"),
                        xrange(len(self.obs)),
                        data = np.array([
                            [ -2.737, -5.474, -8.211, -11.533, -14.007, -17.329, -19.54, -22.862, -25.657],
                            [ -3.322, -6.059, -8.796, -10.948, -14.007, -16.481, -19.54, -22.014, -24.487]
                        ])
     )
     for s in actual.rlabels:
         for t in actual.clabels:
             self.assertAlmostEqual(actual[s,t], expected[s,t], 3)
开发者ID:xiaohan2012,项目名称:irem,代码行数:12,代码来源:t_viterbi.py

示例8: evaluate

def evaluate():
    global possible_tags
    global strings
    global cca_length
    get_words()
    get_strings()
    get_alpha()
    get_phi()
    get_regExp()
#    get_codeWords()
    get_cca()
#    cca_length = len(cca1['amended'])
    cca_length = 20
    data = open('inputs/eng.test{0}'.format(sys.argv[2]), 'r')
    s = 'outputs_cca_pos_egw30_rounding_currentOnly/result_{0}_{1}.txt'.format(sys.argv[2], sys.argv[1])
    output = open(s, 'w')
    line = data.readline()
    output.write('{0}\n\n'.format(line.strip()))
    line = data.readline()
    vals = get_sentence(data)
    sentence = vals[0]
    correct_tags = vals[1]
    POS = vals[2]
    count = 0
    time1 = 0.0
    time2 = 0.0
    avg_time = 0.0
    time_val = 0.0
    first = True
    while sentence:
#------------------------
#-------TIME-STATS-------
#------------------------
        count += 1
        time2 = time()
        if not first:
            avg_time = (avg_time*(count-1)+(time2-time1))/count
            time_val = int((avg_time)*(number_of_sentences-count))
        first = False
        progress = open('progress_test.txt', 'w')
        progress.write('Percent complete:\n{0}/{1} = {2}%\n\nTime remaining: \n{3} h {4} min {5} sec'.format(int(count), int(number_of_sentences), float(count*100)/float(number_of_sentences), time_val/3600, (time_val%3600)/60, time_val%60))
        time1 = time2
        progress.close()
#--------------------------
#--------------------------
        tags = viterbi.viterbi(sentence, POS, phi, possible_tags, alpha, strings, Words, regExp, codes, cca1, cca_length)
        for i in range(len(sentence)):
            output.write('{0} {1} {2} {3}\n'.format(sentence[i], POS[i][0], correct_tags[i], tags[i]))
        output.write('\n')
        vals = get_sentence(data)
        sentence = vals[0]
        correct_tags = vals[1]
        POS = vals[2]
开发者ID:NweZinOo,项目名称:perceptron-crf,代码行数:53,代码来源:perceptron_test.py

示例9: perceptron

def perceptron(print_alpha = 0, mult = 0, import_alpha = 0):
    global alpha
    global alpha_average
    global possible_tags
    global strings
    global strings_abr
    global add_factor
    global mult_factor
    init_phi_alpha(mult)
    get_strings()
    if import_alpha:
        read_alpha()
    alpha_average = copy.deepcopy(alpha)
    for t in range(T_DEFAULT):
        print '---{0}---'.format(t)
        sys.stdout.flush()
        dont_repeat = True
        data = open(sys.argv[2], 'r')
        vals = get_sentence_and_tags(data)
        j = 0
        while vals:
            sentence = vals[0]
            correct_tags = vals[1]
            result = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, mult)
            z = result[0]
            indices = result[1]
            if not z == correct_tags:
                dont_repeat = False
                correct_indices = get_indices(sentence, correct_tags)
                if mult:
                    for i in indices:
                        alpha[i] = float(alpha[i])/mult_factor
                    for i in correct_indices:
                        alpha[i] = float(alpha[i])*mult_factor
                else:
                    for i in indices:
                        alpha[i] += -1*add_factor
                    for i in correct_indices:
                        alpha[i] += add_factor
            else:
                j += 1
            for i in range(len(alpha)):
                alpha_average[i] += alpha[i]
            vals = get_sentence_and_tags(data)
        data.close()
        if dont_repeat:
            print 'SUCCESS!!!'
            break
#        print 'number correct: {0}'.format(j)
        if print_alpha:
            write_alpha(t)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:51,代码来源:perceptron.py

示例10: run

 def run(self):
     if self.isTest:
         print "Running HMM"
         h = HiddenMarkovModel(self.train_file,smoothed=self.smoothing)
         print "Running Viterbi"
         toc = time.clock()
         predicted = viterbi(h,self.test_file, test = False)
         tic = time.clock()
         print "Viterbi ran in %f seconds"%(tic-toc)
         actual, tokens = zip(*self.parse_file(self.test_answers))
         return (predicted,actual,tokens)
     else:
         print "Splitting Data"
         (train,test) = self.splitCV(self.parse_file(self.train_file),self.cv_validation_percentage)
         print "Converting Lists"
         train_text = "".join(["%s %s\n" % (p,t) for [p,t] in train])
         test_text = "".join(["%s\n" % t for [p,t] in test])
         print "Running HMM"
         h = HiddenMarkovModel(text=train_text, smoothed=self.smoothing)
         print "Running Viterbi"
         predicted = viterbi(h,text=test_text, test=False)
         actual = self.getActual(test)
         return (predicted,actual)
开发者ID:amm385,项目名称:POS,代码行数:23,代码来源:Analyzer.py

示例11: t_wordseg

def t_wordseg():
    PI, A, B = build(True)
    for k in B.keys():
        if '|' == k[-1]:
            B[k[:-1]] = {k[:-1]: 1.0}
        else:
            B[k + '|'] = B[k]
    S = B.keys()
    for k in S:
        if k not in PI:
            PI[k] = 0.0
    for sen in samples:
        Y = tuple(sen)
        prob, X = viterbi(Y, S, PI, A, B)
        print u''.join(X)
开发者ID:Catentropy,项目名称:mylab,代码行数:15,代码来源:t_wordseg.py

示例12: __cut

def __cut(sentence):
    prob, pos_list =  viterbi.viterbi(sentence,char_state_tab_P, start_P, trans_P, emit_P)
    begin, next = 0,0

    for i,char in enumerate(sentence):
        pos = pos_list[i][0]
        if pos=='B':
            begin = i
        elif pos=='E':
            yield pair(sentence[begin:i+1], pos_list[i][1])
            next = i+1
        elif pos=='S':
            yield pair(char,pos_list[i][1])
            next = i+1
    if next<len(sentence):
        yield pair(sentence[next:], pos_list[next][1] )
开发者ID:smerdy,项目名称:newsmosaic,代码行数:16,代码来源:__init__.py

示例13: viterbi_run

def viterbi_run(training, test_file): 

	#returns a list of sentence list containing tuples (word,part of speech) 
	

	corpus_list = viterbi.corpus_list(training) 

	#creates a dictionary of corpus part of speech tag : occurences 
	corpus_dictionary = viterbi.corpus_dictionary(training) 

	#pos_keys 
	keys = viterbi.key_list(corpus_dictionary) 

	#creates the prior_probabilities transitions table for the entire corpus 


	prior_probabilities_table = viterbi.transition_table(corpus_dictionary,corpus_list)


	#creates a word dictionary 
	#word: list of part of speeches and increment occurences of word as part of speech 
	word_dic = viterbi.word_dic(corpus_list,keys) 


	#word_keys
	words = viterbi.key_list(word_dic)


	#likelihood_table 
	likelihood_table  = viterbi.word_freq(corpus_dictionary,word_dic)


	#Emissions and Transitions 
	sentences = viterbi.corpus_list_2(test_file) 

	error_list = [] 
	error_list_i = [] 
	new_sentences = [] 
	count = 0 
	for sentence in sentences:
		trans = viterbi.sentence_tag(sentence,keys,words,likelihood_table)
		s_pos = viterbi.sentence_pos(trans)
		transition_table = viterbi.transition_probabilities(trans,s_pos,prior_probabilities_table,keys)

		observed_like = viterbi.observed_likelihoods(sentence,s_pos,trans,likelihood_table,words,keys)
		vit_sent = viterbi.viterbi(observed_like,sentence,s_pos,transition_table) 
开发者ID:lesliemanrique1,项目名称:HMM-Part-of-Speech-Tagger,代码行数:46,代码来源:viterbi_run.py

示例14: evaluate

def evaluate():
    global possible_tags
    global strings
    global strings_abr
    get_words()
    get_strings()
    get_alpha()
    get_phi()
    get_regExp()
    data = open(sys.argv[4], 'r')
    output = open(sys.argv[5], 'w')
    sentence = get_sentence(data)
    while sentence:
        tags = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
        for i in range(len(sentence)):
            output.write('{} {}\n'.format(sentence[i], tags[i]))
        output.write('\n')
        sentence = get_sentence(data)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:18,代码来源:perceptron_test.py

示例15: gen_couplet

def gen_couplet(transition_prob_tree, output_prob_tree, unigram_freq, first_half):
    assert type(first_half) == unicode
    couplet_length = len(first_half)
    visible_words = np.array([first_half[i] for i in range (couplet_length)])
    hidden_candidate_words = np.array([u' ' for _ in range(top_k_word*couplet_length)]).reshape(top_k_word, couplet_length)
    output_prob = np.random.rand(top_k_word, couplet_length)
    for i in range(couplet_length):
        key = first_half[i]
        if not output_prob_tree.has_key(key):
            print '%s, Cannot generate couplet' % key
            return ''

        hash_leaf = output_prob_tree[key]
        hidden_candidate_words[:,i], output_prob[:,i] = gen_candidates(first_half, hash_leaf, top_k_word)

    for i in range(couplet_length):
        candidate = u''
        for j in range(top_k_word):
            candidate += hidden_candidate_words[j, i]

    try:
        transition_prob, init_prob = init_model(transition_prob_tree, unigram_freq, hidden_candidate_words, top_k_word)
    except:
        return ''

    optimal_path, prob = viterbi(transition_prob, output_prob, init_prob, [], visible_words, top_k_word, top_k_candidate)
    optimal_path = deal_repeat(first_half, optimal_path)

    results = []
    for i in range(optimal_path.shape[0]):
        second_half = ''
        for j in range(optimal_path.shape[1]):
            second_half += hidden_candidate_words[optimal_path[i, j], j]
        score = ranking_function(output_prob_tree, first_half, second_half)
        results.append((score, second_half))


    results = sorted(results, reverse=True)[:top_k_output]
    return results
开发者ID:dnc1994,项目名称:ReKan,代码行数:39,代码来源:gen_couplets.py


注:本文中的viterbi.viterbi函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。