本文整理汇总了Python中Levenshtein.ratio方法的典型用法代码示例。如果您正苦于以下问题:Python Levenshtein.ratio方法的具体用法?Python Levenshtein.ratio怎么用?Python Levenshtein.ratio使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Levenshtein
的用法示例。
在下文中一共展示了Levenshtein.ratio方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: similar_string_fast
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def similar_string_fast(first_string, second_string):
"""Determine if two strings are similar (using two most effective methods).
Params:
- first_string: (type: string) first string.
- second_string: (type: string) second string.
Returns:
- result: (type: bool) match result.
"""
partial_score = fuzz.ratio(first_string, second_string)
token_score = fuzz.token_set_ratio(first_string, second_string)
if max(partial_score, token_score) >= SCORE_THRESHOLD_FAST:
return True
return False
示例2: get_sub_cost
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def get_sub_cost(self, o, c):
# Short circuit if the only difference is case
if o.lower == c.lower: return 0
# Lemma cost
if o.lemma == c.lemma: lemma_cost = 0
else: lemma_cost = 0.499
# POS cost
if o.pos == c.pos: pos_cost = 0
elif o.pos in self._open_pos and c.pos in self._open_pos: pos_cost = 0.25
else: pos_cost = 0.5
# Char cost
char_cost = 1-Levenshtein.ratio(o.text, c.text)
# Combine the costs
return lemma_cost + pos_cost + char_cost
# Get the cheapest alignment sequence and indices from the op matrix
# align_seq = [(op, o_start, o_end, c_start, c_end), ...]
示例3: _get_compare_data
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _get_compare_data(tif_txt_pair):
tif = tif_txt_pair[0]
txt = tif_txt_pair[1]
if tif[:-4] == txt[:-4]: # This should always be true
# ocr = run_main(tif, conf=Config(path='/home/zr/letters/conf/443cf9ec-76c7-44bc-95ad-593138d2d5fc.conf'), text=True)
# ocr = run_main(tif, conf=Config(segmenter='stochastic', recognizer='hmm', break_width=3.6), text=True)
ocr = run_main(tif, text=True)
# ocr = run_all_confs_for_page(tif, text = True)
ocr = ocr.strip()
txt = open(txt,'r').read()
txt = _normalize_input(txt)
edit_dist = L.distance(txt, ocr)
edit_ratio = L.ratio(txt, ocr)
html = _make_html_diff(txt, ocr)
# sys.exit()
data = {'edit_distance': edit_dist,
'edit_ratio': edit_ratio,
'filename': os.path.basename(tif),
'html': html
}
return data
示例4: get_message_change_ratio
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def get_message_change_ratio(status_update):
"""Expects a status update instance, returns a number representing
how much a message has been edited (1.0 completely changed, 0.0 unchanged)
based on Levenshtein ratio.
If a status update has no associated notification, returns None
https://github.com/ztane/python-Levenshtein
"""
if hasattr(status_update, 'notification'):
author_profile = status_update.author.profile
intro_text = get_notification_intro(author_profile) + '\n\n'
return 1.0 - Levenshtein.ratio(
*[message.replace(intro_text, '')
for message in (
status_update.notification.base_message,
status_update.notification.sent_message)])
else:
return None
示例5: __call__
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def __call__(self, input):
subtitles = input["subtitles"]
subset = random.sample(subtitles, self.num_samples_to_test)
transcripts = [(s, _get_transcript_google_web_asr(s)) for s in subset]
transcripts = [(t, s) for (t, s) in transcripts if s is not None]
if len(transcripts) == 0:
#filter removes all the subtitles, as potentially unreliable sample
subtitles = []
else:
overlap_ratio = [ratio(t["phrase"].lower(), s.lower()) for (t, s) in transcripts]
passed_threshold = sum(overlap_ratio) / len(overlap_ratio) > self.mean_wer_threshold
if not passed_threshold:
#removing all subtitles, as potentially unreliable
subtitles = []
input["subtitles"] = subtitles
return input
示例6: init_predicate_alignment
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def init_predicate_alignment(predicate_local_name_dict_1, predicate_local_name_dict_2, predicate_init_sim):
def get_predicate_match_dict(p_ln_dict_1, p_ln_dict_2):
predicate_match_dict, sim_dict = {}, {}
for p1, ln1 in p_ln_dict_1.items():
match_p2 = ''
max_sim = 0
for p2, ln2 in p_ln_dict_2.items():
sim_p2 = Levenshtein.ratio(ln1, ln2)
if sim_p2 > max_sim:
match_p2 = p2
max_sim = sim_p2
predicate_match_dict[p1] = match_p2
sim_dict[p1] = max_sim
return predicate_match_dict, sim_dict
match_dict_1_2, sim_dict_1 = get_predicate_match_dict(predicate_local_name_dict_1, predicate_local_name_dict_2)
match_dict_2_1, sim_dict_2 = get_predicate_match_dict(predicate_local_name_dict_2, predicate_local_name_dict_1)
predicate_match_pairs_set = set()
predicate_latent_match_pairs_similarity_dict = {}
for p1, p2 in match_dict_1_2.items():
if match_dict_2_1[p2] == p1:
predicate_latent_match_pairs_similarity_dict[(p1, p2)] = sim_dict_1[p1]
if sim_dict_1[p1] > predicate_init_sim:
predicate_match_pairs_set.add((p1, p2, sim_dict_1[p1]))
# print(p1, p2, sim_dict_1[p1], sim_dict_2[p2])
return predicate_match_pairs_set, predicate_latent_match_pairs_similarity_dict
示例7: _edit_dist
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _edit_dist(str1, str2):
try:
# very fast
# http://stackoverflow.com/questions/14260126/how-python-levenshtein-ratio-is-computed
# d = Levenshtein.ratio(str1, str2)
d = Levenshtein.distance(str1, str2)/float(max(len(str1),len(str2)))
except:
# https://docs.python.org/2/library/difflib.html
d = 1. - SequenceMatcher(lambda x: x==" ", str1, str2).ratio()
return d
示例8: _count_stats
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def _count_stats(s1, s2):
# length
l1 = len(s1)
l2 = len(s2)
len_diff = np_utils._try_divide(np.abs(l1-l2), (l1+l2)/2.)
# set
s1_set = set(s1)
s2_set = set(s2)
# unique length
l1_unique = len(s1_set)
l2_unique = len(s2_set)
len_diff_unique = np_utils._try_divide(np.abs(l1_unique-l2_unique), (l1_unique+l2_unique)/2.)
# unique ratio
r1_unique = np_utils._try_divide(l1_unique, l1)
r2_unique = np_utils._try_divide(l2_unique, l2)
# jaccard coef
li = len(s1_set.intersection(s2_set))
lu = len(s1_set.union(s2_set))
jaccard_coef = np_utils._try_divide(li, lu)
# dice coef
dice_coef = np_utils._try_divide(li, l1_unique + l2_unique)
# common number
common_ = _common_num(s1, s2)
common_ratio_avg = np_utils._try_divide(common_, (l1 + l2) / 2.)
common_ratio_max = np_utils._try_divide(common_, min(l1, l2))
common_ratio_min = np_utils._try_divide(common_, max(l1, l2))
# over all features
f = [l1, l2, len_diff,
l1_unique, l2_unique, len_diff_unique,
r1_unique, r2_unique,
li, lu, jaccard_coef, dice_coef,
common_, common_ratio_avg, common_ratio_max, common_ratio_min
]
return np.array(f, dtype=np.float32)
示例9: test_compare_implementations
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def test_compare_implementations():
# Compare the implementations of python-Levenshtein to our
# pure-Python implementations
if Levenshtein is False:
raise unittest.SkipTest
# Test on strings with randomly placed common char
for string1, string2 in _random_common_char_pairs(n_pairs=50):
assert (string_distances._jaro_winkler(string1, string2,
winkler=False)
== Levenshtein.jaro(string1, string2)
)
assert (string_distances._jaro_winkler(string1, string2,
winkler=True)
== Levenshtein.jaro_winkler(string1, string2))
assert (string_distances.levenshtein_ratio(string1, string2)
== Levenshtein.ratio(string1, string2))
# Test on random strings
for string1, string2 in _random_string_pairs(n_pairs=50):
assert (string_distances._jaro_winkler(string1, string2,
winkler=False)
== Levenshtein.jaro(string1, string2))
assert (string_distances._jaro_winkler(string1, string2,
winkler=True)
== Levenshtein.jaro_winkler(string1, string2))
assert (string_distances.levenshtein_ratio(string1, string2)
== Levenshtein.ratio(string1, string2))
示例10: lev_ratio
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def lev_ratio(s1, s2):
return ratio(s1, s2)
示例11: on_message
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def on_message(self, source, message, whisper, **rest):
if not message or whisper:
return
if self.question:
right_answer = self.question["answer"].lower()
user_answer = message.lower()
if len(right_answer) <= 5:
correct = right_answer == user_answer
else:
ratio = Levenshtein.ratio(right_answer, user_answer)
correct = ratio >= 0.94
if correct:
if self.point_bounty > 0:
self.bot.safe_me(
f"{source} got the answer right! The answer was {self.question['answer']} FeelsGoodMan They get {self.point_bounty} points! PogChamp"
)
source.points += self.point_bounty
else:
self.bot.safe_me(
f"{source} got the answer right! The answer was {self.question['answer']} FeelsGoodMan"
)
self.question = None
self.step = 0
self.last_question = utils.now()
示例12: gen_feat_tensor
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def gen_feat_tensor(input, classes, total_attrs):
vid = int(input[0])
attr_idx = input[1]
init_value = input[2]
# TODO: To add more similarity metrics increase the last dimension of tensor.
tensor = torch.zeros(1, classes, total_attrs)
domain = input[3].split('|||')
for idx, val in enumerate(domain):
if val == init_value:
sim = -1.0
else:
sim = (2 * Levenshtein.ratio(val, init_value)) - 1
tensor[0][idx][attr_idx] = sim
return tensor
示例13: worker
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def worker(num,total,foodStrings):
stringMatches = []
partialList = {}
"""thread worker function"""
for foodString in foodStrings:
for (i,key) in enumerate(foodList.keys()):
if i%total==num:
leven1 = fuzz.token_set_ratio(key,foodString)
leven2 = Levenshtein.ratio(foodString,key)
if leven2>0.5:
stringMatches.append((key,foodList[key],leven1,leven2))
pickle.dump(stringMatches,open(str(num)+'.p','wb'))
return
示例14: char_cost
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def char_cost(a, b):
return Levenshtein.ratio(a.text, b.text)
# Merge the input alignment sequence to a single edit span
示例15: question_answer_similarity_by_ratio
# 需要导入模块: import Levenshtein [as 别名]
# 或者: from Levenshtein import ratio [as 别名]
def question_answer_similarity_by_ratio(index, question, answer):
global valid_emoticon
# Disabled or short or char emoticon
if score_settings['question_answer_similarity_modifier_value'] is None or len(answer) < score_settings['question_answer_similarity_sentence_len'] or valid_emoticon:
return 0
# Divide response into subsentences
answer = list(filter(None, re.split(score_settings['subsentence_dividers'], answer))) + [answer]
# Calculate similarity for every subsentence, gext maximum one
ratio = max([Levenshtein.ratio(question, s) for s in answer])
# Not similar
if ratio < score_settings['question_answer_similarity_threshold']:
return 0
# Apply value
if score_settings['question_answer_similarity_modifier'] == 'value':
return score_settings['question_answer_similarity_modifier_value']
# Apply multiplier
if score_settings['question_answer_similarity_modifier'] == 'multiplier':
return (ratio - score_settings['question_answer_similarity_threshold']) / (1 - score_settings['question_answer_similarity_threshold']) * score_settings['question_answer_similarity_modifier_value']
return 0