本文整理汇总了Python中nltk.compat.Fraction方法的典型用法代码示例。如果您正苦于以下问题:Python compat.Fraction方法的具体用法?Python compat.Fraction怎么用?Python compat.Fraction使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.compat
的用法示例。
在下文中一共展示了compat.Fraction方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: method2
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import Fraction [as 别名]
def method2(self, p_n, *args, **kwargs):
"""
Smoothing method 2: Add 1 to both numerator and denominator from
Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
machine translation quality using longest common subsequence and
skip-bigram statistics. In ACL04.
"""
return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n]
示例2: method2
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import Fraction [as 别名]
def method2(self, p_n, *args, **kwargs):
"""
Smoothing method 2: Add 1 to both numerator and denominator from
Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
machine translation quality using longest common subsequence and
skip-bigram statistics. In ACL04.
"""
return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n]
开发者ID:SignalMedia,项目名称:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda,代码行数:10,代码来源:bleu_score.py
示例3: test_unnoramlize_fraction
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import Fraction [as 别名]
def test_unnoramlize_fraction(self):
from fractions import Fraction as NativePythonFraction
from nltk.compat import Fraction as NLTKFraction
# The native fraction should throw a TypeError in Python < 3.5
with self.assertRaises(TypeError):
NativePythonFraction(0, 1000, _normalize=False)
# Using nltk.compat.Fraction in Python < 3.5
compat_frac = NLTKFraction(0, 1000, _normalize=False)
# The numerator and denominator does not change.
assert compat_frac.numerator == 0
assert compat_frac.denominator == 1000
# The floating point value remains normalized.
assert float(compat_frac) == 0.0
# Checks that the division is not divided by
# # by greatest common divisor (gcd).
six_twelve = NLTKFraction(6, 12, _normalize=False)
assert six_twelve.numerator == 6
assert six_twelve.denominator == 12
one_two = NLTKFraction(1, 2, _normalize=False)
assert one_two.numerator == 1
assert one_two.denominator == 2
# Checks against the native fraction.
six_twelve_original = NativePythonFraction(6, 12)
# Checks that rational values of one_two and six_twelve is the same.
assert float(one_two) == float(six_twelve) == float(six_twelve_original)
# Checks that the fraction does get normalized, even when
# _normalize == False when numerator is using native
# fractions.Fraction.from_float
assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142)
开发者ID:SignalMedia,项目名称:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda,代码行数:37,代码来源:test_2x_compat.py
示例4: corpus_bleu
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import Fraction [as 别名]
def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25),
smoothing_function=None, auto_reweigh=False,
emulate_multibleu=False):
# Before proceeding to compute BLEU, perform sanity checks.
p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
hyp_lengths, ref_lengths = 0, 0
assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
# Iterate through each hypothesis and their corresponding references.
for references, hypothesis in zip(list_of_references, hypotheses):
# For each order of ngram, calculate the numerator and
# denominator for the corpus-level modified precision.
for i, _ in enumerate(weights, start=1):
p_i = modified_precision(references, hypothesis, i)
p_numerators[i] += p_i.numerator
p_denominators[i] += p_i.denominator
# Calculate the hypothesis length and the closest reference length.
# Adds them to the corpus-level hypothesis and reference counts.
hyp_len = len(hypothesis)
hyp_lengths += hyp_len
ref_lengths += closest_ref_length(references, hyp_len)
# Calculate corpus-level brevity penalty.
bp = brevity_penalty(ref_lengths, hyp_lengths)
# Uniformly re-weighting based on maximum hypothesis lengths if largest
# order of n-grams < 4 and weights is set at default.
if auto_reweigh:
if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
weights = ( 1 / hyp_lengths ,) * hyp_lengths
# Collects the various precision values for the different ngram orders.
p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False)
for i, _ in enumerate(weights, start=1)]
# Returns 0 if there's no matching n-grams
# We only need to check for p_numerators[1] == 0, since if there's
# no unigrams, there won't be any higher order ngrams.
if p_numerators[1] == 0:
return 0
# If there's no smoothing, set use method0 from SmoothinFunction class.
if not smoothing_function:
smoothing_function = SmoothingFunction().method0
# Smoothen the modified precision.
# Note: smoothing_function() may convert values into floats;
# it tries to retain the Fraction object as much as the
# smoothing method allows.
p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis,
hyp_len=hyp_len, emulate_multibleu=emulate_multibleu)
s = (w * math.log(p_i) for i, (w, p_i) in enumerate(zip(weights, p_n)))
s = bp * math.exp(math.fsum(s))
return round(s, 4) if emulate_multibleu else s
示例5: modified_precision
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import Fraction [as 别名]
def modified_precision(references, hypothesis, n):
"""
Calculate modified ngram precision.
The normal precision method may lead to some wrong translations with
high-precision, e.g., the translation, in which a word of reference
repeats several times, has very high precision.
This function only returns the Fraction object that contains the numerator
and denominator necessary to calculate the corpus-level precision.
To calculate the modified precision for a single pair of hypothesis and
references, cast the Fraction object into a float.
The famous "the the the ... " example shows that you can get BLEU precision
by duplicating high frequency words.
:param references: A list of reference translations.
:type references: list(list(str))
:param hypothesis: A hypothesis translation.
:type hypothesis: list(str)
:param n: The ngram order.
:type n: int
:return: BLEU's modified precision for the nth order ngram.
:rtype: Fraction
"""
# Extracts all ngrams in hypothesis
# Set an empty Counter if hypothesis is empty.
counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
# Extract a union of references' counts.
## max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
max_counts = {}
for reference in references:
reference_counts = Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
for ngram in counts:
max_counts[ngram] = max(max_counts.get(ngram, 0),
reference_counts[ngram])
# Assigns the intersection between hypothesis and references' counts.
clipped_counts = {ngram: min(count, max_counts[ngram])
for ngram, count in counts.items()}
numerator = sum(clipped_counts.values())
# Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
# Usually this happens when the ngram order is > len(reference).
denominator = max(1, sum(counts.values()))
return Fraction(numerator, denominator, _normalize=False)