本文整理汇总了Python中fuzzywuzzy.fuzz.WRatio方法的典型用法代码示例。如果您正苦于以下问题:Python fuzz.WRatio方法的具体用法?Python fuzz.WRatio怎么用?Python fuzz.WRatio使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fuzzywuzzy.fuzz
的用法示例。
在下文中一共展示了fuzz.WRatio方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_fuzzy_wuzzy_features
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def _create_fuzzy_wuzzy_features(self, df):
df['fuzzy_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.ratio(row['spn_1'], row['spn_2']), axis=1)
df['fuzzy_set_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_set_ratio(row['spn_1'], row['spn_2']), axis=1)
df['fuzzy_partial_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.partial_ratio(row['spn_1'], row['spn_2']), axis=1)
df['fuzzy_token_sort_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_sort_ratio(row['spn_1'], row['spn_2']), axis=1)
df['fuzzy_qratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.QRatio(row['spn_1'], row['spn_2']), axis=1)
df['fuzzy_WRatio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.WRatio(row['spn_1'], row['spn_2']), axis=1)
def _get_longest_substr_ratio(a, b):
strs = list(distance.lcsubstrings(a, b))
if len(strs) == 0:
return 0
else:
return len(strs[0]) / (min(len(a), len(b)) + 1)
df['longest_substr_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: _get_longest_substr_ratio(row['spn_1'], row['spn_2']), axis=1)
示例2: hooked_scorer
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def hooked_scorer(*args, **kwargs):
if kwargs.pop('terminate_event').is_set():
raise TerminateException
return fuzz.WRatio(*args, **kwargs)
示例3: get_best_fuzzy
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def get_best_fuzzy(value, choices, min_score=75, scorer=fuzz_fuzz.WRatio, return_score=False):
"""Returns the best match in a list of choices using fuzzywuzzy."""
if not isinstance(value, six.string_types):
raise ValueError('invalid value. Must be a string.')
if len(value) < 3:
raise ValueError('your fuzzy search value must be at least three characters long.')
if len(choices) == 0:
raise ValueError('choices cannot be an empty list.')
# If the value contains _ivar or _mask this is probably and incorrect use
# of the fuzzy feature. We raise an error.
if '_ivar' in value:
raise ValueError('_ivar not allowd in search value.')
elif '_mask' in value:
raise ValueError('_mask not allowd in search value.')
bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score)
if len(bests) == 0:
best = None
elif len(bests) == 1:
best = bests[0]
else:
if bests[0][1] == bests[1][1]:
best = None
else:
best = bests[0]
if best is None:
raise ValueError('cannot find a good match for {0!r}. '
'Your input value is too ambiguous.'.format(value))
return best if return_score else best[0]
示例4: get_best_fuzzy
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def get_best_fuzzy(name, choices, cutoff=60, return_score=False):
items = process.extractBests(name, choices, score_cutoff=cutoff, scorer=fuzz.WRatio)
if not items:
best = None
elif len(items) == 1:
best = items[0]
else:
scores = [s[1] for s in items]
# finds items with the same score
morethanone = sum(np.max(scores) == scores) > 1
if morethanone:
# tries to find an exact string match
exact = []
for s in items:
itemname = s[0].name if isinstance(s[0], QueryParameter) else s[0]
if itemname.lower() == name.lower():
exact.append(s)
# returns exact match or fails with ambiguity
if exact:
best = exact[0]
else:
options = [s[0].name if isinstance(s[0], QueryParameter)
else s[0] for s in items if s[1] == np.max(scores)]
raise KeyError('{0} is too ambiguous. '
'Did you mean one of {1}?'.format(name, options))
else:
best = items[0]
if best is None:
raise ValueError('Could not find a match for {0}. Please refine your text.'.format(name))
return best if return_score else best[0]
示例5: extract_string_similarity_vector
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def extract_string_similarity_vector(instance: dict):
"""
Returns a vector encoding a variety of lexical similarity metrics given a dictionary containing keys
sentence_1,sentence_2
:return: a vector containing similarity scores
"""
s1 = instance['sentence_1']
s2 = instance['sentence_2']
return torch.tensor([
normalized_levenshtein.similarity(s1,s2),
jarowinkler.similarity(s1,s2),
metric_lcs.distance(s1,s2),
qgram2.distance(s1,s2),
qgram3.distance(s1,s2),
qgram4.distance(s1,s2),
jaccard.similarity(s1,s2),
cosine.similarity(s1,s2),
fuzz.partial_token_set_ratio(s1,s2),
fuzz.partial_token_sort_ratio(s1,s2),
fuzz.token_set_ratio(s1,s2),
fuzz.token_sort_ratio(s1,s2),
fuzz.QRatio(s1,s2),
fuzz.UQRatio(s1,s2),
fuzz.UWRatio(s1,s2),
fuzz.WRatio(s1,s2)
])
示例6: find
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def find(self, name_approx: str, threshold: int = 75) -> Optional['CardTypesInstanced']:
"""
Finds a card by name, doesn't have to be exact name, thanks to highly sophisticated AI - a.k.a.
simple algorithm, that will try and guess what was meant.
This algorithm can change over time so don't expect the same results across different versions.
:param name_approx: Name to look up
:param threshold: How strict to be, higher number -> less likely to
find a result if the name is off, higher chance the result will be correct
"""
result = None
if name_approx in ctx.cards_by_name.keys():
return self.get(name_approx)
name_scores = {}
for name in ctx.cards_by_name.keys():
score = fuzz.WRatio(name_approx, name)
if score >= threshold:
name_scores[name] = score
max_score = max(name_scores.values())
for name, score in name_scores.items():
if score == max_score:
instantiated = ctx.cards_by_name[name]
try:
result = [inst for inst in instantiated if issubclass(inst.__class__, NotAbility)][0]
except IndexError:
continue
return result
示例7: fuzzy_fuzzywuzzy_list
# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def fuzzy_fuzzywuzzy_list(fuzz, user_input, qa_list, collection, topn=50):
'''编辑距离,速度比较慢,比起匹配方法,能够处理字符不一样的问题'''
start_time = time.time()
# user_input_set = set([user_input_one for user_input_one in user_input])
user_input_set = [user_input_one for user_input_one in user_input]
same_char_list = []
max_data = 0
max_data_list = []
count_collection_new_one = 0
for collection_new_one in collection: # 获取相同字符串多的问题
count_same_char_one = len([x for x in user_input_set if x in collection_new_one])
if count_same_char_one > 0:
same_char_list.append((count_collection_new_one, count_same_char_one))
if count_same_char_one > max_data:
max_data_list.append(count_same_char_one)
max_data = count_same_char_one
count_collection_new_one += 1
end_time1 = time.time()
list_max_count = []
len_max_data_list = len(max_data_list)
for x in range(len_max_data_list): # 获取前20排名
for k,l in same_char_list:
if l == max_data_list[len_max_data_list -1 - x]:
list_max_count.append(qa_list[k]) #问答重这里取出来
if len(list_max_count) >= 5000:
list_max_count = list_max_count[0:5000]
break
end_time2 = time.time()
# end_time1: 0.34090662002563477
# end_time2: 0.4080846309661865
# end_time1: 0.06417036056518555
# end_time2: 0.08422374725341797
# same_char_list.sort(key=lambda x: x[1], reverse=True)
# if len(same_char_list) >= 20:
# same_char_list = same_char_list[0: 20]
result = process.extract(user_input, list_max_count, scorer=fuzz.token_set_ratio, limit=topn)
end_time3 = time.time()
# print('end_time1: ' + str(end_time1 - start_time))
# print('end_time2: ' + str(end_time2 - start_time))
# print('end_time3: ' + str(end_time3 - start_time))
return result
# [fuzz.WRatio, fuzz.QRatio,
# fuzz.token_set_ratio, fuzz.token_sort_ratio,
# fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
# fuzz.UWRatio, fuzz.UQRatio]