当前位置: 首页>>代码示例>>Python>>正文


Python Levenshtein.ratio方法代码示例

本文整理汇总了Python中Levenshtein.ratio方法的典型用法代码示例。如果您正苦于以下问题:Python Levenshtein.ratio方法的具体用法?Python Levenshtein.ratio怎么用?Python Levenshtein.ratio使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Levenshtein的用法示例。


在下文中一共展示了Levenshtein.ratio方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: similar_string_fast

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def similar_string_fast(first_string, second_string):
    """Determine if two strings are similar (using two most effective methods).

    Params:
    - first_string: (type: string) first string.
    - second_string: (type: string) second string.

    Returns:
    - result: (type: bool) match result.
    """
    partial_score = fuzz.ratio(first_string, second_string)
    token_score = fuzz.token_set_ratio(first_string, second_string)

    if max(partial_score, token_score) >= SCORE_THRESHOLD_FAST:
        return True

    return False 
开发者ID:phage-nz,项目名称:ph0neutria,代码行数:19,代码来源:string_utils.py

示例2: get_sub_cost

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def get_sub_cost(self, o, c):
        # Short circuit if the only difference is case
        if o.lower == c.lower: return 0
        # Lemma cost
        if o.lemma == c.lemma: lemma_cost = 0
        else: lemma_cost = 0.499
        # POS cost
        if o.pos == c.pos: pos_cost = 0
        elif o.pos in self._open_pos and c.pos in self._open_pos: pos_cost = 0.25
        else: pos_cost = 0.5
        # Char cost
        char_cost = 1-Levenshtein.ratio(o.text, c.text)
        # Combine the costs
        return lemma_cost + pos_cost + char_cost

    # Get the cheapest alignment sequence and indices from the op matrix
    # align_seq = [(op, o_start, o_end, c_start, c_end), ...] 
开发者ID:chrisjbryant,项目名称:errant,代码行数:19,代码来源:alignment.py

示例3: _get_compare_data

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _get_compare_data(tif_txt_pair):
    tif = tif_txt_pair[0]
    txt = tif_txt_pair[1]
    if tif[:-4] == txt[:-4]: # This should always be true
#         ocr = run_main(tif, conf=Config(path='/home/zr/letters/conf/443cf9ec-76c7-44bc-95ad-593138d2d5fc.conf'), text=True)
#         ocr = run_main(tif, conf=Config(segmenter='stochastic', recognizer='hmm', break_width=3.6), text=True)
        ocr = run_main(tif, text=True)
#         ocr = run_all_confs_for_page(tif, text = True)
        ocr = ocr.strip()
        txt = open(txt,'r').read()
        txt = _normalize_input(txt)
        edit_dist = L.distance(txt, ocr)
        edit_ratio = L.ratio(txt, ocr)
        html = _make_html_diff(txt, ocr)
#        sys.exit()
        data = {'edit_distance': edit_dist,
                'edit_ratio': edit_ratio,
                'filename': os.path.basename(tif), 
                'html': html
            }
    return data 
开发者ID:zmr,项目名称:namsel,代码行数:23,代码来源:generate_accuracy_report.py

示例4: get_message_change_ratio

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def get_message_change_ratio(status_update):
    """Expects a status update instance, returns a number representing
    how much a message has been edited (1.0 completely changed, 0.0 unchanged)
    based on Levenshtein ratio.
    If a status update has no associated notification, returns None
    https://github.com/ztane/python-Levenshtein
    """
    if hasattr(status_update, 'notification'):
        author_profile = status_update.author.profile
        intro_text = get_notification_intro(author_profile) + '\n\n'
        return 1.0 - Levenshtein.ratio(
            *[message.replace(intro_text, '')
              for message in (
                status_update.notification.base_message,
                status_update.notification.sent_message)])
    else:
        return None 
开发者ID:codeforamerica,项目名称:intake,代码行数:19,代码来源:status_notifications.py

示例5: __call__

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def __call__(self, input):
        subtitles = input["subtitles"]
        subset = random.sample(subtitles, self.num_samples_to_test)

        transcripts = [(s, _get_transcript_google_web_asr(s)) for s in subset]
        transcripts = [(t, s) for (t, s) in transcripts if s is not None]
        if len(transcripts) == 0:
            #filter removes all the subtitles, as potentially unreliable sample
            subtitles = []
        else:
            overlap_ratio = [ratio(t["phrase"].lower(), s.lower()) for (t, s) in transcripts]
            passed_threshold =  sum(overlap_ratio) / len(overlap_ratio) > self.mean_wer_threshold
            if not passed_threshold:
                #removing all subtitles, as potentially unreliable
                subtitles = []
        input["subtitles"] = subtitles
        return input 
开发者ID:EgorLakomkin,项目名称:KTSpeechCrawler,代码行数:19,代码来源:filters.py

示例6: init_predicate_alignment

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def init_predicate_alignment(predicate_local_name_dict_1, predicate_local_name_dict_2, predicate_init_sim):
    def get_predicate_match_dict(p_ln_dict_1, p_ln_dict_2):
        predicate_match_dict, sim_dict = {}, {}
        for p1, ln1 in p_ln_dict_1.items():
            match_p2 = ''
            max_sim = 0
            for p2, ln2 in p_ln_dict_2.items():
                sim_p2 = Levenshtein.ratio(ln1, ln2)
                if sim_p2 > max_sim:
                    match_p2 = p2
                    max_sim = sim_p2
            predicate_match_dict[p1] = match_p2
            sim_dict[p1] = max_sim
        return predicate_match_dict, sim_dict

    match_dict_1_2, sim_dict_1 = get_predicate_match_dict(predicate_local_name_dict_1, predicate_local_name_dict_2)
    match_dict_2_1, sim_dict_2 = get_predicate_match_dict(predicate_local_name_dict_2, predicate_local_name_dict_1)

    predicate_match_pairs_set = set()
    predicate_latent_match_pairs_similarity_dict = {}
    for p1, p2 in match_dict_1_2.items():
        if match_dict_2_1[p2] == p1:
            predicate_latent_match_pairs_similarity_dict[(p1, p2)] = sim_dict_1[p1]
            if sim_dict_1[p1] > predicate_init_sim:
                predicate_match_pairs_set.add((p1, p2, sim_dict_1[p1]))
                # print(p1, p2, sim_dict_1[p1], sim_dict_2[p2])
    return predicate_match_pairs_set, predicate_latent_match_pairs_similarity_dict 
开发者ID:nju-websoft,项目名称:MultiKE,代码行数:29,代码来源:predicate_alignment.py

示例7: _edit_dist

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _edit_dist(str1, str2):
    try:
        # very fast
        # http://stackoverflow.com/questions/14260126/how-python-levenshtein-ratio-is-computed
        # d = Levenshtein.ratio(str1, str2)
        d = Levenshtein.distance(str1, str2)/float(max(len(str1),len(str2)))
    except:
        # https://docs.python.org/2/library/difflib.html
        d = 1. - SequenceMatcher(lambda x: x==" ", str1, str2).ratio()
    return d 
开发者ID:yyht,项目名称:BERT,代码行数:12,代码来源:dist_utils.py

示例8: _count_stats

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _count_stats(s1, s2):
    # length
    l1 = len(s1)
    l2 = len(s2)
    len_diff = np_utils._try_divide(np.abs(l1-l2), (l1+l2)/2.)

    # set
    s1_set = set(s1)
    s2_set = set(s2)

    # unique length
    l1_unique = len(s1_set)
    l2_unique = len(s2_set)
    len_diff_unique = np_utils._try_divide(np.abs(l1_unique-l2_unique), (l1_unique+l2_unique)/2.)

    # unique ratio
    r1_unique = np_utils._try_divide(l1_unique, l1)
    r2_unique = np_utils._try_divide(l2_unique, l2)

    # jaccard coef
    li = len(s1_set.intersection(s2_set))
    lu = len(s1_set.union(s2_set))
    jaccard_coef = np_utils._try_divide(li, lu)

    # dice coef
    dice_coef = np_utils._try_divide(li, l1_unique + l2_unique)

    # common number
    common_ = _common_num(s1, s2)
    common_ratio_avg = np_utils._try_divide(common_, (l1 + l2) / 2.)
    common_ratio_max = np_utils._try_divide(common_, min(l1, l2))
    common_ratio_min = np_utils._try_divide(common_, max(l1, l2))

    # over all features
    f = [l1, l2, len_diff,
         l1_unique, l2_unique, len_diff_unique,
         r1_unique, r2_unique,
         li, lu, jaccard_coef, dice_coef,
         common_, common_ratio_avg, common_ratio_max, common_ratio_min
    ]
    return np.array(f, dtype=np.float32) 
开发者ID:yyht,项目名称:BERT,代码行数:43,代码来源:dist_utils.py

示例9: test_compare_implementations

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def test_compare_implementations():
    # Compare the implementations of python-Levenshtein to our
    # pure-Python implementations
    if Levenshtein is False:
        raise unittest.SkipTest
    # Test on strings with randomly placed common char
    for string1, string2 in _random_common_char_pairs(n_pairs=50):
        assert (string_distances._jaro_winkler(string1, string2,
                                               winkler=False)
                == Levenshtein.jaro(string1, string2)
                )
        assert (string_distances._jaro_winkler(string1, string2,
                                               winkler=True)
                == Levenshtein.jaro_winkler(string1, string2))
        assert (string_distances.levenshtein_ratio(string1, string2)
                == Levenshtein.ratio(string1, string2))
    # Test on random strings
    for string1, string2 in _random_string_pairs(n_pairs=50):
        assert (string_distances._jaro_winkler(string1, string2,
                                               winkler=False)
                == Levenshtein.jaro(string1, string2))
        assert (string_distances._jaro_winkler(string1, string2,
                                               winkler=True)
                == Levenshtein.jaro_winkler(string1, string2))
        assert (string_distances.levenshtein_ratio(string1, string2)
                == Levenshtein.ratio(string1, string2)) 
开发者ID:dirty-cat,项目名称:dirty_cat,代码行数:28,代码来源:test_string_distances.py

示例10: lev_ratio

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def lev_ratio(s1, s2):
    return ratio(s1, s2) 
开发者ID:AmitMY,项目名称:chimera,代码行数:4,代码来源:delex.py

示例11: on_message

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def on_message(self, source, message, whisper, **rest):
        if not message or whisper:
            return

        if self.question:
            right_answer = self.question["answer"].lower()
            user_answer = message.lower()
            if len(right_answer) <= 5:
                correct = right_answer == user_answer
            else:
                ratio = Levenshtein.ratio(right_answer, user_answer)
                correct = ratio >= 0.94

            if correct:
                if self.point_bounty > 0:
                    self.bot.safe_me(
                        f"{source} got the answer right! The answer was {self.question['answer']} FeelsGoodMan They get {self.point_bounty} points! PogChamp"
                    )
                    source.points += self.point_bounty
                else:
                    self.bot.safe_me(
                        f"{source} got the answer right! The answer was {self.question['answer']} FeelsGoodMan"
                    )

                self.question = None
                self.step = 0
                self.last_question = utils.now() 
开发者ID:pajbot,项目名称:pajbot,代码行数:29,代码来源:trivia.py

示例12: gen_feat_tensor

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def gen_feat_tensor(input, classes, total_attrs):
    vid = int(input[0])
    attr_idx = input[1]
    init_value = input[2]
    # TODO: To add more similarity metrics increase the last dimension of tensor.
    tensor = torch.zeros(1, classes, total_attrs)
    domain = input[3].split('|||')
    for idx, val in enumerate(domain):
        if val == init_value:
            sim = -1.0
        else:
            sim = (2 * Levenshtein.ratio(val, init_value)) - 1
        tensor[0][idx][attr_idx] = sim
    return tensor 
开发者ID:HoloClean,项目名称:holoclean,代码行数:16,代码来源:initsimfeat.py

示例13: worker

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def worker(num,total,foodStrings):
  stringMatches = []
  partialList = {}
  """thread worker function"""
  for foodString in foodStrings:
    for (i,key) in enumerate(foodList.keys()):
      if i%total==num:
        leven1 = fuzz.token_set_ratio(key,foodString)
        leven2 = Levenshtein.ratio(foodString,key)
        if leven2>0.5:
          stringMatches.append((key,foodList[key],leven1,leven2))
  pickle.dump(stringMatches,open(str(num)+'.p','wb'))
  return 
开发者ID:schollz,项目名称:extract_recipe,代码行数:15,代码来源:food_string_matching.py

示例14: char_cost

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def char_cost(a, b):
    return Levenshtein.ratio(a.text, b.text)
    
# Merge the input alignment sequence to a single edit span 
开发者ID:chrisjbryant,项目名称:errant,代码行数:6,代码来源:merger.py

示例15: question_answer_similarity_by_ratio

# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def question_answer_similarity_by_ratio(index, question, answer):
    global valid_emoticon

    # Disabled or short or char emoticon
    if score_settings['question_answer_similarity_modifier_value'] is None or len(answer) < score_settings['question_answer_similarity_sentence_len'] or valid_emoticon:
        return 0

    # Divide response into subsentences
    answer = list(filter(None, re.split(score_settings['subsentence_dividers'], answer))) + [answer]

    # Calculate similarity for every subsentence, gext maximum one
    ratio = max([Levenshtein.ratio(question, s) for s in answer])

    # Not similar
    if ratio < score_settings['question_answer_similarity_threshold']:
        return 0

    # Apply value
    if score_settings['question_answer_similarity_modifier'] == 'value':
        return score_settings['question_answer_similarity_modifier_value']

    # Apply multiplier
    if score_settings['question_answer_similarity_modifier'] == 'multiplier':
        return (ratio - score_settings['question_answer_similarity_threshold']) / (1 - score_settings['question_answer_similarity_threshold']) * score_settings['question_answer_similarity_modifier_value']

    return 0 
开发者ID:daniel-kukiela,项目名称:nmt-chatbot,代码行数:28,代码来源:scorer.py


注:本文中的Levenshtein.ratio方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。