本文整理汇总了Python中nltk.metrics.distance.edit_distance方法的典型用法代码示例。如果您正苦于以下问题:Python distance.edit_distance方法的具体用法?Python distance.edit_distance怎么用?Python distance.edit_distance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.metrics.distance
的用法示例。
在下文中一共展示了distance.edit_distance方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: seq_cases
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def seq_cases():
""" Cases are of the form (reference, hypothesis, substitution_cost, dist).
"""
hardcoded_seqs = [("", "", 1, 0),
("ab", "ad", 1, 1),
("abde", "abcde", 1, 1),
([1,3,5], [], 1, 3),
([1,3,5], [3], 1, 2),
]
# Here we assume the nltk.metrics.distance implementation is correct.
generated_seqs = []
for length in range(25):
for _ in range(10):
length2 = random.randint(0, int(length*1.5))
s1 = rand_str(length)
s2 = rand_str(length2)
sub_cost = random.randint(0, 3)
dist = distance.edit_distance(s1, s2, substitution_cost=sub_cost)
generated_seqs.append((s1, s2, sub_cost, dist))
return hardcoded_seqs + generated_seqs
示例2: wer
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def wer(self, decode, target):
"""Computes the Word Error Rate (WER).
WER is defined as the edit distance between the two provided sentences after
tokenizing to words.
Args:
decode: string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number for the WER of the current decode-target pair.
"""
# Map each word to a new char.
words = set(decode.split() + target.split())
word2char = dict(zip(words, range(len(words))))
new_decode = [chr(word2char[w]) for w in decode.split()]
new_target = [chr(word2char[w]) for w in target.split()]
return distance.edit_distance(''.join(new_decode), ''.join(new_target))
示例3: process_rel_candidate_for_drop_led
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_rel_candidate_for_drop_led(relnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_rel):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
temp_nodeset, temp_filtered_mod_pos = boxer_graph.drop_relation(nodeset, relnode_candidate, filtered_mod_pos)
sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, temp_filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_rel, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
# functions : Drop-MOD Candidate
示例4: process_mod_candidate_for_drop_led
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_mod_candidate_for_drop_led(modcand_to_process, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_mod):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
modcand_position_to_process = modcand_to_process[0]
temp_filtered_mod_pos = filtered_mod_pos[:]+[modcand_position_to_process]
sentence_after_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, temp_filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_mod, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
# functions : Drop-OOD Candidate
示例5: process_ood_candidate_for_drop_led
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_ood_candidate_for_drop_led(oodnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_ood):
simple_sentence = " ".join(simple_sentences)
sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
temp_nodeset = nodeset[:]
temp_nodeset.remove(oodnode_candidate)
sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, filtered_mod_pos)
edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
isDrop = compare_edit_distance(opr_drop_ood, edit_dist_after_drop, edit_dist_before_drop)
return isDrop
示例6: batch_per
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def batch_per(hyps: Sequence[Sequence[T]],
refs: Sequence[Sequence[T]]) -> float:
""" Calculates the phoneme error rate of a batch."""
macro_per = 0.0
for i in range(len(hyps)):
ref = [phn_i for phn_i in refs[i] if phn_i != 0]
hyp = [phn_i for phn_i in hyps[i] if phn_i != 0]
macro_per += distance.edit_distance(ref, hyp)/len(ref)
return macro_per/len(hyps)
示例7: cer
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def cer(self, decode, target):
"""Computes the Character Error Rate (CER).
CER is defined as the edit distance between the two given strings.
Args:
decode: a string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number denoting the CER for the current sentence pair.
"""
return distance.edit_distance(decode, target)
示例8: edit_ratio
# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def edit_ratio(self, wordA, wordB):
""" Computes the number of edits required to transform one
(stemmed already, probably) word into another word, and
adjusts for the average number of letters in each.
Examples:
color, colour: 0.1818181818
theater, theatre: 0.2857
day, today: 0.5
foobar, foo56bar: 0.2857
"""
distance = editDistance(wordA, wordB)
averageLength = (len(wordA) + len(wordB))/2
return distance/averageLength