本文整理汇总了Python中nltk.probability.ConditionalFreqDist.iteritems方法的典型用法代码示例。如果您正苦于以下问题:Python ConditionalFreqDist.iteritems方法的具体用法?Python ConditionalFreqDist.iteritems怎么用?Python ConditionalFreqDist.iteritems使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.probability.ConditionalFreqDist
的用法示例。
在下文中一共展示了ConditionalFreqDist.iteritems方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MorphProbModel
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import iteritems [as 别名]
class MorphProbModel():
UNK_PROB = -99
def __init__(self,
beam=1000,
max_guess=20,
rare_treshold=10,
capitalization=True):
self._uni = FreqDist()
self._bi = ConditionalFreqDist()
self._tri = ConditionalFreqDist()
self._wd = ConditionalFreqDist()
self._l1 = 0.0
self._l2 = 0.0
self._l3 = 0.0
self._beam_size = beam
self._use_capitalization = capitalization
self._max_guess = max_guess
self._treshold = rare_treshold
self._unk = Guesser(10)
self._analyzer = None
self.cache = {}
def set_analyzer(self, obj):
self._analyzer = obj
def train(self, data):
C = False
for sent in data:
history = [('BOS', False), ('BOS', False)]
for w, l, t in sent:
# Ezt azért szedtem ki mert megeszik 4 giga memóriát ha marad
# t = encode((w, l, t))
if self._use_capitalization and w[0].isupper():
C = True
self._wd[w].inc(t)
self._uni.inc((t, C))
self._bi[history[1]].inc((t, C))
self._tri[tuple(history)].inc((t, C))
history.append((t, C))
history.pop(0)
C = False
for word, fd in self._wd.iteritems():
for tag, count in fd.iteritems():
if count < self._treshold:
self._unk.add_word(word.lower(), tag, count)
self._unk.finalize()
self._compute_lambda()
def _compute_lambda(self):
tl1 = 0.0
tl2 = 0.0
tl3 = 0.0
for history in self._tri.conditions():
(h1, h2) = history
for tag in self._tri[history].samples():
if self._uni[tag] == 1:
continue
c3 = self._safe_div((self._tri[history][tag] - 1),
(self._tri[history].N() - 1))
c2 = self._safe_div((self._bi[h2][tag] - 1),
(self._bi[h2].N() - 1))
c1 = self._safe_div((self._uni[tag] - 1), (self._uni.N() - 1))
if (c1 > c3) and (c1 > c2):
tl1 += self._tri[history][tag]
elif (c2 > c3) and (c2 > c1):
tl2 += self._tri[history][tag]
elif (c3 > c2) and (c3 > c1):
tl3 += self._tri[history][tag]
elif (c3 == c2) and (c3 > c1):
tl2 += float(self._tri[history][tag]) / 2.0
tl3 += float(self._tri[history][tag]) / 2.0
elif (c2 == c1) and (c1 > c3):
tl1 += float(self._tri[history][tag]) / 2.0
tl2 += float(self._tri[history][tag]) / 2.0
else:
pass
self._l1 = tl1 / (tl1 + tl2 + tl3)
self._l2 = tl2 / (tl1 + tl2 + tl3)
self._l3 = tl3 / (tl1 + tl2 + tl3)
#.........这里部分代码省略.........