本文整理汇总了Python中nltk.metrics.distance方法的典型用法代码示例。如果您正苦于以下问题:Python metrics.distance方法的具体用法?Python metrics.distance怎么用?Python metrics.distance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.metrics
的用法示例。
在下文中一共展示了metrics.distance方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: agr
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def agr(self, cA, cB, i, data=None):
"""Agreement between two coders on a given item
"""
data = data or self.data
# cfedermann: we don't know what combination of coder/item will come
# first in x; to avoid StopIteration problems due to assuming an order
# cA,cB, we allow either for k1 and then look up the missing as k2.
k1 = next((x for x in data if x['coder'] in (cA,cB) and x['item']==i))
if k1['coder'] == cA:
k2 = next((x for x in data if x['coder']==cB and x['item']==i))
else:
k2 = next((x for x in data if x['coder']==cA and x['item']==i))
ret = 1.0 - float(self.distance(k1['labels'], k2['labels']))
log.debug("Observed agreement between %s and %s on %s: %f",
cA, cB, i, ret)
log.debug("Distance between \"%r\" and \"%r\": %f",
k1['labels'], k2['labels'], 1.0 - ret)
return ret
示例2: Do_alpha
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def Do_alpha(self):
"""The observed disagreement for the alpha coefficient.
The alpha coefficient, unlike the other metrics, uses this rather than
observed agreement.
"""
total = 0.0
for i, itemdata in self._grouped_data('item'):
label_freqs = FreqDist(x['labels'] for x in itemdata)
for j, nj in iteritems(label_freqs):
for l, nl in iteritems(label_freqs):
total += float(nj * nl) * self.distance(l, j)
ret = (1.0 / (len(self.I) * len(self.C) * (len(self.C) - 1))) * total
log.debug("Observed disagreement: %f", ret)
return ret
示例3: Do_alpha
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def Do_alpha(self):
"""The observed disagreement for the alpha coefficient.
The alpha coefficient, unlike the other metrics, uses this rather than
observed agreement.
"""
total = 0.0
for i, itemdata in self._grouped_data('item'):
label_freqs = FreqDist(x['labels'] for x in itemdata)
for j, nj in iteritems(label_freqs):
for l, nl in iteritems(label_freqs):
total += float(nj * nl) * self.distance(l, j)
ret = (1.0 / float((len(self.I) * len(self.C) * (len(self.C) - 1)))) * total
log.debug("Observed disagreement: %f", ret)
return ret
示例4: __init__
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def __init__(self, data=None, distance=binary_distance):
"""Initialize an annotation task.
The data argument can be None (to create an empty annotation task) or a sequence of 3-tuples,
each representing a coder's labeling of an item:
(coder,item,label)
The distance argument is a function taking two arguments (labels) and producing a numerical distance.
The distance from a label to itself should be zero:
distance(l,l) = 0
"""
self.distance = distance
self.I = set()
self.K = set()
self.C = set()
self.data = []
if data is not None:
self.load_array(data)
示例5: __init__
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def __init__(self, data=None, distance=binary_distance):
"""Initialize an empty annotation task.
"""
self.distance = distance
self.I = set()
self.K = set()
self.C = set()
self.data = []
if data is not None:
self.load_array(data)
示例6: Do_Kw_pairwise
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def Do_Kw_pairwise(self,cA,cB,max_distance=1.0):
"""The observed disagreement for the weighted kappa coefficient.
"""
total = 0.0
data = (x for x in self.data if x['coder'] in (cA, cB))
for i, itemdata in self._grouped_data('item', data):
# we should have two items; distance doesn't care which comes first
total += self.distance(next(itemdata)['labels'],
next(itemdata)['labels'])
ret = total / (len(self.I) * max_distance)
log.debug("Observed disagreement between %s and %s: %f", cA, cB, ret)
return ret
示例7: alpha
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def alpha(self):
"""Krippendorff 1980
"""
De = 0.0
label_freqs = FreqDist(x['labels'] for x in self.data)
for j in self.K:
nj = label_freqs[j]
for l in self.K:
De += float(nj * label_freqs[l]) * self.distance(j, l)
De = (1.0 / (len(self.I) * len(self.C) * (len(self.I) * len(self.C) - 1))) * De
log.debug("Expected disagreement: %f", De)
ret = 1.0 - (self.Do_alpha() / De)
return ret
示例8: alpha
# 需要导入模块: from nltk import metrics [as 别名]
# 或者: from nltk.metrics import distance [as 别名]
def alpha(self):
"""Krippendorff 1980
"""
# check for degenerate cases
if len(self.K)==0:
raise ValueError("Cannot calculate alpha, no data present!")
if len(self.K) == 1:
log.debug("Only one annotation value, allpha returning 1.")
return 1
if len(self.C)==1 and len(self.I) == 1:
raise ValueError("Cannot calculate alpha, only one coder and item present!")
De = 0.0
label_freqs = FreqDist(x['labels'] for x in self.data)
for j in self.K:
nj = label_freqs[j]
for l in self.K:
De += float(nj * label_freqs[l]) * self.distance(j, l)
try:
De = (1.0 / (len(self.I) * len(self.C) * (len(self.I) * len(self.C) - 1))) * De
log.debug("Expected disagreement: %f", De)
ret = 1.0 - (self.Do_alpha() / De)
except ZeroDivisionError:
raise ValueError("Cannot calculate alpha, expected disagreement zero, check the distance function!")
return ret