当前位置: 首页>>代码示例>>Python>>正文


Python distance.edit_distance方法代码示例

本文整理汇总了Python中nltk.metrics.distance.edit_distance方法的典型用法代码示例。如果您正苦于以下问题:Python distance.edit_distance方法的具体用法?Python distance.edit_distance怎么用?Python distance.edit_distance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.metrics.distance的用法示例。


在下文中一共展示了distance.edit_distance方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: seq_cases

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def seq_cases():
    """ Cases are of the form (reference, hypothesis, substitution_cost, dist).
    """
    hardcoded_seqs = [("", "", 1, 0),
                      ("ab", "ad", 1, 1),
                      ("abde", "abcde", 1, 1),
                      ([1,3,5], [], 1, 3),
                      ([1,3,5], [3], 1, 2),
                     ]

    # Here we assume the nltk.metrics.distance implementation is correct.
    generated_seqs = []
    for length in range(25):
        for _ in range(10):
            length2 = random.randint(0, int(length*1.5))
            s1 = rand_str(length)
            s2 = rand_str(length2)
            sub_cost = random.randint(0, 3)
            dist = distance.edit_distance(s1, s2, substitution_cost=sub_cost)
            generated_seqs.append((s1, s2, sub_cost, dist))

    return hardcoded_seqs + generated_seqs 
开发者ID:persephone-tools,项目名称:persephone,代码行数:24,代码来源:test_distance.py

示例2: wer

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def wer(self, decode, target):
    """Computes the Word Error Rate (WER).

    WER is defined as the edit distance between the two provided sentences after
    tokenizing to words.

    Args:
      decode: string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number for the WER of the current decode-target pair.
    """
    # Map each word to a new char.
    words = set(decode.split() + target.split())
    word2char = dict(zip(words, range(len(words))))

    new_decode = [chr(word2char[w]) for w in decode.split()]
    new_target = [chr(word2char[w]) for w in target.split()]

    return distance.edit_distance(''.join(new_decode), ''.join(new_target)) 
开发者ID:generalized-iou,项目名称:g-tensorflow-models,代码行数:23,代码来源:decoder.py

示例3: process_rel_candidate_for_drop_led

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_rel_candidate_for_drop_led(relnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_rel):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())        
    
    temp_nodeset, temp_filtered_mod_pos = boxer_graph.drop_relation(nodeset, relnode_candidate, filtered_mod_pos)
    sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, temp_filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
    
    isDrop = compare_edit_distance(opr_drop_rel, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop

# functions : Drop-MOD Candidate 
开发者ID:shashiongithub,项目名称:Sentence-Simplification-ACL14,代码行数:16,代码来源:methods_training_graph.py

示例4: process_mod_candidate_for_drop_led

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_mod_candidate_for_drop_led(modcand_to_process, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_mod):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
    
    modcand_position_to_process = modcand_to_process[0]
    temp_filtered_mod_pos = filtered_mod_pos[:]+[modcand_position_to_process]
    sentence_after_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, temp_filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
    
    isDrop = compare_edit_distance(opr_drop_mod, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop

# functions : Drop-OOD Candidate 
开发者ID:shashiongithub,项目名称:Sentence-Simplification-ACL14,代码行数:17,代码来源:methods_training_graph.py

示例5: process_ood_candidate_for_drop_led

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def process_ood_candidate_for_drop_led(oodnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_ood):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
    
    temp_nodeset = nodeset[:]
    temp_nodeset.remove(oodnode_candidate)
    sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())

    isDrop = compare_edit_distance(opr_drop_ood, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop 
开发者ID:shashiongithub,项目名称:Sentence-Simplification-ACL14,代码行数:15,代码来源:methods_training_graph.py

示例6: batch_per

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def batch_per(hyps: Sequence[Sequence[T]],
              refs: Sequence[Sequence[T]]) -> float:
    """ Calculates the phoneme error rate of a batch."""

    macro_per = 0.0
    for i in range(len(hyps)):
        ref = [phn_i for phn_i in refs[i] if phn_i != 0]
        hyp = [phn_i for phn_i in hyps[i] if phn_i != 0]
        macro_per += distance.edit_distance(ref, hyp)/len(ref)
    return macro_per/len(hyps) 
开发者ID:persephone-tools,项目名称:persephone,代码行数:12,代码来源:utils.py

示例7: cer

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def cer(self, decode, target):
    """Computes the Character Error Rate (CER).

    CER is defined as the edit distance between the two given strings.

    Args:
      decode: a string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number denoting the CER for the current sentence pair.
    """
    return distance.edit_distance(decode, target) 
开发者ID:generalized-iou,项目名称:g-tensorflow-models,代码行数:15,代码来源:decoder.py

示例8: edit_ratio

# 需要导入模块: from nltk.metrics import distance [as 别名]
# 或者: from nltk.metrics.distance import edit_distance [as 别名]
def edit_ratio(self, wordA, wordB):
        """ Computes the number of edits required to transform one
        (stemmed already, probably) word into another word, and
        adjusts for the average number of letters in each.

        Examples:
        color, colour: 0.1818181818
        theater, theatre: 0.2857
        day, today: 0.5
        foobar, foo56bar: 0.2857
        """
        distance = editDistance(wordA, wordB)
        averageLength = (len(wordA) + len(wordB))/2
        return distance/averageLength 
开发者ID:JonathanReeve,项目名称:text-matcher,代码行数:16,代码来源:matcher.py


注:本文中的nltk.metrics.distance.edit_distance方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。