本文整理汇总了Python中hmm.HMM.viterbi方法的典型用法代码示例。如果您正苦于以下问题:Python HMM.viterbi方法的具体用法?Python HMM.viterbi怎么用?Python HMM.viterbi使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hmm.HMM
的用法示例。
在下文中一共展示了HMM.viterbi方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
def main():
tagged_words = brown.tagged_words()
words_corpus = brown.words()
word2vec = Word2Vec()
word2vec.train(words_corpus)
word_vecs = [word2vec.word2vec(word) for word in words_corpus]
n_clusters = 10 # random number for now
kmeans = KMeans(n_clusters)
kmeans.compute(word_vecs)
# word-cluster HMM
p_word = {}
p_cluster = {}
p_cluster_given_word = None # softmax
p_word_given_cluster = None # joint probability formula
p_transition_cluster = None # count
p_initial_cluster = None # count
# cluster-tag HMM
p_cluster_given_tag = None # softmax
p_transition_tag = None # count from tagged data
p_initial_tag = None # count from tagged data
hmm_word_cluster = HMM(p_initial_cluster, p_transition_cluster, p_word_given_cluster)
hmm_cluster_tag = HMM(p_initial_tag, p_transition_tag, p_cluster_given_tag)
words = []
clusters = hmm_word_cluster.viterbi(words)
tags = hmm_cluster_tag.viterbi(clusters)
示例2: TestHMM
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
class TestHMM():
def __init__(self):
self.Z = numpy.array([
[0.8, 0.09, 0.01],
[0.09, 0.8, 0.01],
[0.1, 0, 0.8]
])
self.b = numpy.array([
[0.1, 0.1, 0.8],
[0.05, 0.9, 0.05],
[0.8, 0.1, 0.1]
])
self.pi = numpy.array([0.9,0.05,0.05])
self.T = 2000
# we want the errors to be less than 20%
self.error_threshold = 0.2
def setup(self):
self.model = HMM(self.Z,self.b,self.pi)
def gen_states_obs(self):
states = []
obsvns = []
for (s,o) in self.model.gen(self.T):
states.append(s)
obsvns.append(o)
return states, obsvns
def test_init(self):
self.model = HMM(self.Z,self.b,self.pi)
def test_gen(self):
self.setup()
states = []
obsvns = []
for (s,o) in self.model.gen(10):
states.append(s)
obsvns.append(o)
assert len(states) == 10
assert len(obsvns) == 10
def test_forward_backward(self):
self.setup()
states, obsvns = self.gen_states_obs()
alpha,beta = self.model.forward_backward(obsvns)
gamma = [a*b/sum(a*b) for a,b in zip(alpha,beta)]
state_est = numpy.array([numpy.where(g==max(g))[0][0] for g in gamma])
err = sum(state_est != numpy.array(states))/float(len(states))
assert err < self.error_threshold
def test_viterbi(self):
self.setup()
states, obsvns = self.gen_states_obs()
state_est = self.model.viterbi(obsvns)
err = sum(state_est != numpy.array(states))/float(len(states))
assert err < self.error_threshold
示例3: test_hmm
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
def test_hmm():
m = HMM(2, 2)
observations = [[0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,1,1,1,1],[0,0,0,0,1,0,1,1,0,1,1,0,0,1,0,0,1,1,1,1,0,0,1,0,0]]
ground = [[0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,1],[0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,0,0]]
m.learn(observations, ground, smooth=None)
trueres = ([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0], -21.944)
res = m.viterbi(observations[1])
assert trueres[0] == res[0]
print trueres[1]
print res[1]
assert abs(trueres[1] - res[1]) < 0.1
示例4: HMMSegmenter
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
class HMMSegmenter(object):
"""
采用 HMM 模型实现基于字标注的中文分词 (Character-based Tagging).
以往的分词方法, 无论是基于规则的, 还是基于统计的, 一般都依赖一个事先编制
的词表(词典). 自动分词过程就是通过词表和歧义消除来做出词语切分的决策.
与此不同, 基于字标注的分词方法从构词角度入手, 把分词过程视为字在字串中的
标注问题. 由于每个字在构造一个特定的词语时都占据着一个确定的构词位置(即词位),
假如规定每个字最多只有四个构词位置:即B(词首), M(词中), E(词尾)和S(单独成词),
那么下面句子(a)的分词结果就可以直接表示成如(b)所示的逐字标注形式:
(a)分词结果:上海/ 计划/ N/ 本/ 世纪/ 末/ 实现/ 人均/ 国内/
生产/ 总值/ 五千美元/ 。
(b)字标注形式:上/B 海/E 计/B 划/E N/S 本/S 世/B 纪/E 末/S
实/B 现/E 人/B 均/E 国/B 内/E生/B产/E总/B值/E
五/B千/M 美/M 元/E 。/S
需要特别说明的是,这里说到的“字”不限于汉字. 考虑到中文真实文本中不可避免地
包含一定数量的非汉字字符, “字”也包括外文字母、阿拉伯数字和标点符号等字符,
所有这些字符都是构词的基本单元. 当然, 汉字依然是这个单元集合中数量最多的.
把分词过程视为字的标注问题的一个重要优势在于, 它能够平衡地看待词表词和
未登录词的识别问题. 在这种分词技术中, 文本中的词表词和未登录词都可以用统一
的字标注过程来实现. 在学习架构上, 既可以不必专门强调词表词信息, 也不用
专门设计特定的未登录词 (如人名、地名、机构名) 识别模块. 这使得分词系统的
设计大大简化.
(1) 首先, 在字标注过程中, 所有的字根据预定义的特征进行词位特性的学习,
获得一个概率模型, 可以使用 HMM、MaxEnt、CRF 建模.
(2) 然后, 在待分字串上, 根据字与字之间的结合紧密程度, 得到词位的标注结果.
(3) 最后, 根据词位定义直接获得最终的分词结果.
"""
def __init__(self):
self.hmm = HMM()
self.re_chinese = re.compile(ur"([\u4E00-\u9FA5]+)") # 正则匹配汉字串
self.re_skip = re.compile(ur"([\.0-9]+|[a-zA-Z0-9]+)") # 正则匹配英文串和数字串
def load(self, model_dir):
"""
加载模型文件.
"""
self.hmm.load(model_dir)
def segment(self, text):
"""
对输入文本 text 做中文切词, 返回词序列.
NOTE: 支持 text 编码自动识别.
"""
if not (type(text) is unicode):
try:
text = text.decode('utf-8')
except:
text = text.decode('gbk', 'ignore')
blocks = self.re_chinese.split(text)
for block in blocks:
if self.re_chinese.match(block):
for word in self._tagging(block):
yield word
else:
words = self.re_skip.split(block)
for word in words:
if len(word) > 0:
yield word
def _tagging(self, text):
"""
基于 HMM 模型切词.
"""
log_prob, tag_list = self.hmm.viterbi(text)
begin = 0
for i, ch in enumerate(text):
tag = tag_list[i]
if tag == 'B':
begin = i
elif tag == 'E':
yield text[begin : i + 1]
elif tag == 'S':
yield ch
示例5: WordClassifier
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
class WordClassifier(object):
'''
Classifies a possible misspelled word to a word
'''
def __init__(self, words=None):
self.words = words
self.alphabet =['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
self.alphabet = ["@"] + self.alphabet + ["$"]
len_states = len(self.alphabet)
pi = zeros(len_states)
pi[0] = 1.0
A = zeros(len_states, len_states)
for word in self.words:
numbers = self.observationsToNumbers('@' + word + '$')
for n1, n2 in zip(numbers[:-1], numbers[1:]):
A[n1][n2] += 1
for i in range(len_states):
A[i] = [j * 1.0 / sum(A[i]) for j in A[i]]
#words = ["pig","dog","cat","bee","ape","elk","hen","cow"]
content = 0.3 / (len_states - 3)
B = zeros(len_states, len_states, 1e-10)
normalized_count_matrix_100_examples = \
[[0.8, 0.0, 0.01, 0.0, 0.0, 0.07, 0.0, 0.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, 0.0, 0.05, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0],
[0.0, 0.86, 0.0, 0.0, 0.04, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04],
[0.0, 0.0, 0.86, 0.0, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.07, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.48, 0.0, 0.02, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.08, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.05, 0.0, 0.0, 0.79, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06],
[0.0, 0.0, 0.11, 0.0, 0.0, 0.62, 0.0, 0.0, 0.01, 0.0, 0.23, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.79, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.0, 0.13, 0.01, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02],
[0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.56, 0.0, 0.13, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.2, 0.04, 0.0, 0.0, 0.01, 0.0],
[0.05, 0.01, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0, 0.34, 0.0, 0.0, 0.0, 0.15, 0.02, 0.0, 0.0, 0.0, 0.04, 0.03, 0.02, 0.0, 0.01, 0.08, 0.15, 0.08, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.81, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.01, 0.09, 0.02, 0.0, 0.27, 0.0, 0.0, 0.0, 0.0, 0.58, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0],
[0.0, 0.0, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.01, 0.0, 0.0, 0.44, 0.13, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17, 0.08, 0.14, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06, 0.0, 0.0, 0.0, 0.0, 0.24, 0.21, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04, 0.18, 0.05, 0.22, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.03, 0.04, 0.01, 0.0, 0.08, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.79, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.01, 0.0, 0.0, 0.29, 0.0, 0.01, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.13, 0.54, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.89, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.14, 0.0, 0.0, 0.1, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.62, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.1],
[0.0, 0.12, 0.0, 0.01, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0, 0.0, 0.73, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.93, 0.0, 0.0, 0.01, 0.0, 0.03, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05, 0.0, 0.09, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.79, 0.02, 0.04, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.0, 0.14, 0.13, 0.03, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21, 0.3, 0.07, 0.0, 0.09, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.17, 0.09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13, 0.05, 0.55, 0.0, 0.0, 0.0],
[0.03, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06, 0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, 0.83, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14, 0.01, 0.06, 0.0, 0.0, 0.62, 0.0],
[0.0, 0.04, 0.0, 0.0, 0.12, 0.0, 0.07, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12, 0.12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.52]]
for i in range(1, len_states - 1):
B[i][1:-1] = normalized_count_matrix_100_examples[i-1]
for i in range(len(B)):
B[i] = [j if j > 1e-10 else 1e-10 for j in B[i]]
B[0] = zeros(len_states, content = 1e-10)
B[0][0] = 1.0
last_state = len_states - 1
B[last_state] = zeros(len_states, content = 1e-10)
B[last_state][last_state] = 1.0
self.hmm = HMM(pi,A,B,self.alphabet)
def observationsToNumbers(self, observations):
return [self.alphabet.index(o) for o in observations]
def distanceBetweenTwoWords(self, word1, word2):
if len(word1) != len(word2):
return max(len(word1), len(word2))
return sum([1 if c1 != c2 else 0 for c1, c2 in zip(word1, word2)])
def classify(self,observations):
O = self.observationsToNumbers('@' + observations + '$')
result = self.hmm.viterbi(O)
result = [self.alphabet[i] for i in result]
result = reduce(str.__add__, result)
result = result[1:-1]
distances = [self.distanceBetweenTwoWords(result, eachWord) for eachWord in self.words]
corrected_result = self.words[distances.index(min(distances))]
return result,corrected_result
def test(self,test_examples):
'''
Parameter:
test_examples - is a list of tuples were the first element in the tuples
is a string representing a word that the classifier should handle and the second
element is a list of test examples for that word.
Returns:
Fraction of correctly classified test examples
'''
correctly_classified_counter = 0.0
wrongly__classified_counter = 0.0
#.........这里部分代码省略.........
示例6: _callback
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
#.........这里部分代码省略.........
# Sleep to allow for image to be published.
rospy.sleep(2)
if (fCount == 0) or (fCount == 2) or (fCount == 4):
fCount = fCount + 1
handPoint = [self._baxterCoordinates[0, 0], self._baxterCoordinates[1, 0], self._baxterCoordinates[2, 0], fCount]
print "Point Appended"
self._gPoints.append(handPoint)
print self._gPoints
print self._gOn
self._gPointCount += 1
elif(self._startGesture == '0'):
print "Else"
if(self._gOn == 1):
self._gPointCount = 0
self._gOn = 0
img = cv.LoadImage('GestureAnalyze.png')
msg = cv_bridge.CvBridge().cv_to_imgmsg(img, encoding="bgr8")
self._pub.publish(msg)
# Sleep to allow for image to be published.
rospy.sleep(3)
print "recorded"
# Process points
print "GPoINTS"
print self._gPoints
symbols = getsym(numpy.matrix(self._gPoints))
log_prob = [self._hmmObjG1.viterbi(symbols), self._hmmObjG2.viterbi(symbols), self._hmmObjG3.viterbi(symbols), self._hmmObjG4.viterbi(symbols)]
print log_prob
max_value = max(log_prob)
max_index = log_prob.index(max_value)
self._gPoints = []
# Do gesture recognised processing
self._flag = str(max_index + 1)
gflag = max_index + 1
if gflag == 1:
#while(not sf.Keyboard.is_key_pressed(sf.Keyboard.R_SHIFT)):
# print max_index + 1
# print log_prob
img = cv.LoadImage('Gesture1.png')
msg = cv_bridge.CvBridge().cv_to_imgmsg(img, encoding="bgr8")
self._pub.publish(msg)
# Sleep to allow for image to be published.
rospy.sleep(3)
elif gflag == 2:
#while(not sf.Keyboard.is_key_pressed(sf.Keyboard.R_SHIFT)):
# print max_index + 1
# print log_prob
img = cv.LoadImage('Gesture2.png')
msg = cv_bridge.CvBridge().cv_to_imgmsg(img, encoding="bgr8")
self._pub.publish(msg)
# Sleep to allow for image to be published.
rospy.sleep(3)
elif gflag == 3:
#while(not sf.Keyboard.is_key_pressed(sf.Keyboard.R_SHIFT)):
# print max_index + 1
示例7: CorpusFactory
# 需要导入模块: from hmm import HMM [as 别名]
# 或者: from hmm.HMM import viterbi [as 别名]
if __name__ == "__main__":
'''
A validation dataset can be passed as the test file.
The script will ignore the the correct test assignments.
'''
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("TEST", help="Test file")
parser.add_argument("OUT", help="Output path for tagged test data to be written")
parser.add_argument("MODEL", help="Path for previously trained model")
args = parser.parse_args()
test_data = args.TEST
model_path = args.MODEL
output_path = args.OUT
corpus, yseqs, sentences = CorpusFactory().load_test(model_path, test_data)
tag_dictionary_t = dict([(corpus.tag_dictionary[t], t) for t in corpus.tag_dictionary])
hmm = HMM(corpus.transitions, corpus.observations)
bw = codecs.open(output_path, mode="w", encoding="utf-8")
for sentence, words in zip(yseqs, sentences):
tags_predicted = hmm.viterbi(sentence)
tags_predicted = tags_predicted[1:-1]
for w, x in zip(words, tags_predicted):
bw.write(w+"|"+tag_dictionary_t[int(x)]+"\n")
bw.write("\n")