本文整理汇总了Python中nltk.translate.bleu_score.sentence_bleu函数的典型用法代码示例。如果您正苦于以下问题:Python sentence_bleu函数的具体用法?Python sentence_bleu怎么用?Python sentence_bleu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sentence_bleu函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_case_where_n_is_bigger_than_hypothesis_length
def test_case_where_n_is_bigger_than_hypothesis_length(self):
# Test BLEU to nth order of n-grams, where n > len(hypothesis).
references = ['John loves Mary ?'.split()]
hypothesis = 'John loves Mary'.split()
n = len(hypothesis) + 1 #
weights = [1.0/n] * n # Uniform weights.
self.assertAlmostEqual(sentence_bleu(references, hypothesis, weights), 0.7165, places=4)
# Test case where n > len(hypothesis) but so is n > len(reference), and
# it's a special case where reference == hypothesis.
references = ['John loves Mary'.split()]
hypothesis = 'John loves Mary'.split()
assert(sentence_bleu(references, hypothesis, weights) == 1.0)
示例2: bleu_advanced
def bleu_advanced(y_true: List[Any], y_predicted: List[Any],
weights: Tuple=(1,), smoothing_function=SMOOTH.method1,
auto_reweigh=False, penalty=True) -> float:
"""Calculate BLEU score
Parameters:
y_true: list of reference tokens
y_predicted: list of query tokens
weights: n-gram weights
smoothing_function: SmoothingFunction
auto_reweigh: Option to re-normalize the weights uniformly
penalty: either enable brevity penalty or not
Return:
BLEU score
"""
bleu_measure = sentence_bleu([y_true], y_predicted, weights, smoothing_function, auto_reweigh)
hyp_len = len(y_predicted)
hyp_lengths = hyp_len
ref_lengths = closest_ref_length([y_true], hyp_len)
bpenalty = brevity_penalty(ref_lengths, hyp_lengths)
if penalty is True or bpenalty == 0:
return bleu_measure
return bleu_measure/bpenalty
示例3: calc_test_bleu_and_loss
def calc_test_bleu_and_loss(sess, epoch):
test_feed_generator = get_batch(test_vect_eng_sentences, test_decoder_input_data, test_decoder_target_data,
batch_size)
number_of_batches_in_test = int(len(test_vect_eng_sentences) / batch_size)
# Calcualte the bleu of the translations of the test data
bleu_scores = []
average_loss = 0
for i in tqdm(range(number_of_batches_in_test), desc="test metrics"):
fd = next(test_feed_generator)
predict_, loss_ = sess.run([decoder_prediction, loss], fd)
for i, (inp, pred, exp) in enumerate(zip(fd[encoder_inputs].T, predict_.T, fd[decoder_targets].T)):
input_sentence = decode_sequence(inp[::-1], rev_eng_vocab)
output_sentence = decode_sequence(pred, rev_heb_vocab)
expected_sentence = decode_sequence(exp, rev_heb_vocab)
score = sentence_bleu([decode_sequence(pred, rev_heb_vocab, False)],
decode_sequence(exp, rev_heb_vocab, False),
smoothing_function=chencherry.method1)
bleu_scores.append(score)
average_loss += (loss_ / number_of_batches_in_test)
train_writer.add_summary(
tf.Summary(value=[tf.Summary.Value(tag="test_loss", simple_value=average_loss), ]), epoch)
train_writer.add_summary(
tf.Summary(value=[tf.Summary.Value(tag="test_bleu", simple_value=np.mean(bleu_scores)), ]), epoch)
示例4: main
def main():
"""
bleu function parameters:
bleu(candidate, references, weights)
:param candidate: a candidate sentence
:type candidate: list(str)
:param references: reference sentences
:type references: list(list(str))
:param weights: weights for unigrams, bigrams, trigrams and so on
:type weights: list(float)
"""
# Command line argument checking
if(len(sys.argv) != 3):
sys.exit("ERROR: Invalid number of arguments, expecting 2")
# Import the files, first the candidate into cFile and the reference to rFile
cFile = open(sys.argv[1])
rFile = open(sys.argv[2])
cRaw = cFile.read()
rRaw = rFile.read()
# Then tokenize them both
cToken = word_tokenize(cRaw)
rToken = word_tokenize(rRaw)
# Finally compute the BLEU score
bleuSc = bleu_score.sentence_bleu([rToken], cToken)
print(bleuSc)
示例5: main
def main():
"""
bleu function parameters:
bleu(candidate, references, weights)
:param candidate: a candidate sentence
:type candidate: list(str)
:param references: reference sentences
:type references: list(list(str))
:param weights: weights for unigrams, bigrams, trigrams and so on
:type weights: list(float)
"""
# First define some test strings to work with
refTextRaw = "This is the story of a man who fell from the fiftieth story of a building. While he fell, he reassured himself by repeating, 'So far, so good. So far, so good. So far, so good'. But, the important thing is not the fall - only the landing."
candidateTextRaw = "This is the story of a man who fell from the 50th floor of a block. To reassure himself while he fell, he repeated, 'So far, so good. So far, so good. So far, so good'. However, the important thing is not the fall. Only the landing."
refTextTokens = word_tokenize(refTextRaw)
candidateTextTokens = word_tokenize(candidateTextRaw)
candidate1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party']
candidate2 = ['It', 'is', 'to', 'insure', 'the', 'troops', 'forever', 'hearing', 'the', 'activity', 'guidebook', 'that', 'party', 'direct']
reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', 'that', 'the', 'military', 'will', 'forever', 'heed', 'Party', 'commands']
reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', 'guarantees', 'the', 'military', 'forces', 'always', 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', 'army', 'always', 'to', 'heed', 'the', 'directions', 'of', 'the', 'party']
# Work out the BLEU score
bleuSc = bleu_score.sentence_bleu([refTextTokens], candidateTextTokens)
print(bleuSc)
示例6: computeSimple
def computeSimple(sentence1, sentence2):
features = [0] * 7
tokenizer = RegexpTokenizer(r'\w+')
words1 = tokenizer.tokenize(sentence1)
words2 = tokenizer.tokenize(sentence2)
n = len(words1)
m = len(words2)
# word overlap features
count = 0 # num of same words in sentence
for word1 in words1:
for word2 in words2:
if word1 == word2:
count += 1
features[0] = count / n # "precision"
features[1] = count / m # "recall"
features[2] = sentence_bleu([sentence1], sentence2)
features[3] = sentence_bleu([sentence2], sentence1)
# Obtain pairs of adjacent words
skipgrams1 = skipgrams(words1, 2, 0)
skipgrams2 = skipgrams(words2, 2, 0)
count = 0
for gram1 in skipgrams1:
for gram2 in skipgrams2:
if gram1 == gram2:
count += 1
features[4] = count / combinations(n, count)
features[5] = count / combinations(m, count)
"""if (n > m):
features[6] = m / n
else:
features[6] = n / m"""
if len(sentence1) > len(sentence2):
features[7] = len(sentence2) / len(sentence1)
else:
features[7] = len(sentence1) / len(sentence2)
return features
示例7: test_case_where_n_is_bigger_than_hypothesis_length
def test_case_where_n_is_bigger_than_hypothesis_length(self):
# Test BLEU to nth order of n-grams, where n > len(hypothesis).
# TODO: Currently this test breaks the BLEU implementation (13.03.2016)
references = ['John loves Mary'.split()]
hypothesis = 'John loves Mary'.split()
n = len(hypothesis) + 1 #
weights = [1.0/n] * n # Uniform weights.
assert(sentence_bleu(references, hypothesis, weights) == 1.0)
示例8: evaluate_score
def evaluate_score(translation, score, smoothing_func):
if score == 'BLEU':
translation_split = translation.translation
reference_split = translation.reference
try:
return bleu.sentence_bleu([reference_split], translation_split, smoothing_function=smoothing_func)
except:
word_count = min(len(reference_split), len(translation_split))
weights = []
weight = 0.25
if word_count < 4:
weight = 1 / float(word_count)
for i in range(min(4, word_count)):
weights.append(weight)
return bleu.sentence_bleu([reference_split], translation_split, weights=weights, smoothing_function=smoothing_func)
else:
print 'evaluate_score: unrecognized score \'{0}\''.format(score)
示例9: test
def test():
"""Test the translation model."""
nltk.download('punkt')
with tf.Session() as sess:
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
src_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.src_lang + "_mapping%d.txt" % FLAGS.src_lang_vocab_size
dst_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.dst_lang + "_mapping%d.txt" % FLAGS.dst_lang_vocab_size
src_lang_vocab, _ = data_utils.initialize_vocabulary(src_lang_vocab_path)
_, rev_dst_lang_vocab = data_utils.initialize_vocabulary(dst_lang_vocab_path)
weights = [0.25, 0.25, 0.25, 0.25]
first_lang_file = open(generate_src_lang_sentences_file_name(FLAGS.src_lang))
second_lang_file = open(generate_src_lang_sentences_file_name(FLAGS.dst_lang))
total_bleu_value = 0.0
computing_bleu_iterations = 0
for first_lang_raw in first_lang_file:
second_lang_gold_raw = second_lang_file.readline()
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(first_lang_raw), src_lang_vocab)
# Which bucket does it belong to?
try:
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
except ValueError:
continue
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out sentence corresponding to outputs.
model_tran_res = " ".join([tf.compat.as_str(rev_dst_lang_vocab[output]) for output in outputs])
second_lang_gold_tokens = word_tokenize(second_lang_gold_raw)
model_tran_res_tokens = word_tokenize(model_tran_res)
try:
current_bleu_value = sentence_bleu([model_tran_res_tokens], second_lang_gold_tokens, weights)
total_bleu_value += current_bleu_value
computing_bleu_iterations += 1
except ZeroDivisionError:
pass
if computing_bleu_iterations % 10 == 0:
print("BLEU value after %d iterations: %.2f"
% (computing_bleu_iterations, total_bleu_value / computing_bleu_iterations))
final_bleu_value = total_bleu_value / computing_bleu_iterations
print("Final BLEU value after %d iterations: %.2f" % (computing_bleu_iterations, final_bleu_value))
return
示例10: test_zero_matches
def test_zero_matches(self):
# Test case where there's 0 matches
references = ['The candidate has no alignment to any of the references'.split()]
hypothesis = 'John loves Mary'.split()
# Test BLEU to nth order of n-grams, where n is len(hypothesis).
for n in range(1,len(hypothesis)):
weights = [1.0/n] * n # Uniform weights.
assert(sentence_bleu(references, hypothesis, weights) == 0)
示例11: test_case_where_n_is_bigger_than_hypothesis_length
def test_case_where_n_is_bigger_than_hypothesis_length(self):
# Test BLEU to nth order of n-grams, where n > len(hypothesis).
references = ['John loves Mary ?'.split()]
hypothesis = 'John loves Mary'.split()
n = len(hypothesis) + 1 #
weights = [1.0/n] * n # Uniform weights.
self.assertAlmostEqual(sentence_bleu(references, hypothesis, weights), 0.7165, places=4)
# Checks that the warning has been raised because len(hypothesis) < 4.
try:
self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
except AttributeError:
pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
# Test case where n > len(hypothesis) but so is n > len(reference), and
# it's a special case where reference == hypothesis.
references = ['John loves Mary'.split()]
hypothesis = 'John loves Mary'.split()
assert(sentence_bleu(references, hypothesis, weights) == 1.0)
示例12: test_partial_matches_hypothesis_longer_than_reference
def test_partial_matches_hypothesis_longer_than_reference(self):
references = ['John loves Mary'.split()]
hypothesis = 'John loves Mary who loves Mike'.split()
self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.4729, places=4)
# Checks that the warning has been raised because len(reference) < 4.
try:
self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
except AttributeError:
pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
示例13: test_full_matches
def test_full_matches(self):
# Test case where there's 100% matches
references = ['John loves Mary'.split()]
hypothesis = 'John loves Mary'.split()
# Test BLEU to nth order of n-grams, where n is len(hypothesis).
for n in range(1,len(hypothesis)):
weights = [1.0/n] * n # Uniform weights.
assert(sentence_bleu(references, hypothesis, weights) == 1.0)
示例14: main
def main():
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
args = setup_args()
logging.info(args)
f = codecs.open('report-%s.csv'% args.model, 'w')
csv_f = csv.writer(f, delimiter=',', encoding='utf-8')
src_lines = codecs.open(args.src, 'r', 'utf-8').readlines()
src_lines_nounk = codecs.open(args.src + '.nounk', 'r', 'utf-8').readlines()
target_lines = codecs.open(args.target, 'r', 'utf-8').readlines()
target_lines_nounk = codecs.open(args.target + '.nounk', 'r', 'utf-8').readlines()
gold_lines = codecs.open(args.gold, 'r', 'utf-8').readlines()
gold_lines_nounk = codecs.open(args.gold + '.nounk', 'r', 'utf-8').readlines()
data = ['Src', 'Src_UNK', 'Target_UNK', 'Target', 'Gold_UNK', 'Gold', 'BLEU1']
csv_f.writerow(data)
num_lines = len(gold_lines)
logging.info('Num Lines: %d'% num_lines)
references = []
hypotheses = []
for index in range(num_lines):
data = []
data.append(src_lines_nounk[index].strip())
data.append(src_lines[index].strip())
data.append(target_lines[index].strip())
data.append(target_lines_nounk[index].strip())
data.append(gold_lines[index].strip())
data.append(gold_lines_nounk[index].strip())
gold = gold_lines[index].strip().split()
output = target_lines[index].strip().split()
default = 'UNK UNK UNK UNK'.split()
if len(output) < 4:
bleu_score = 0.0
hypotheses.append(default)
else:
bleu_score = sentence_bleu([gold], output, weights=(1.0,))
hypotheses.append(output)
references.append([gold])
logging.info('sentence:%d bleu:%f'%(index, bleu_score))
data.append(str(bleu_score))
csv_f.writerow(data)
final_bleu = corpus_bleu(references, hypotheses)
unigram_bleu = corpus_bleu(references, hypotheses, weights=(1.0,))
logging.info('Final BLEU: %f Unigram_BLEU: %f '% (final_bleu, unigram_bleu))
示例15: test_reference_or_hypothesis_shorter_than_fourgrams
def test_reference_or_hypothesis_shorter_than_fourgrams(self):
# Tese case where the length of reference or hypothesis
# is shorter than 4.
references = ['let it go'.split()]
hypothesis = 'let go it'.split()
# Checks that the value the hypothesis and reference returns is 1.0
assert(sentence_bleu(references, hypothesis) == 1.0)
# Checks that the warning has been raised.
try:
self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
except AttributeError:
pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2.