当前位置: 首页>>代码示例>>Python>>正文


Python fuzz.WRatio方法代码示例

本文整理汇总了Python中fuzzywuzzy.fuzz.WRatio方法的典型用法代码示例。如果您正苦于以下问题:Python fuzz.WRatio方法的具体用法?Python fuzz.WRatio怎么用?Python fuzz.WRatio使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fuzzywuzzy.fuzz的用法示例。


在下文中一共展示了fuzz.WRatio方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _create_fuzzy_wuzzy_features

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def _create_fuzzy_wuzzy_features(self, df):
        df['fuzzy_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_set_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_set_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_partial_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.partial_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_token_sort_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_sort_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_qratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.QRatio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_WRatio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.WRatio(row['spn_1'], row['spn_2']), axis=1)
   
        def _get_longest_substr_ratio(a, b):
            strs = list(distance.lcsubstrings(a, b))
            if len(strs) == 0:
                return 0
            else:
                return len(strs[0]) / (min(len(a), len(b)) + 1)

        df['longest_substr_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: _get_longest_substr_ratio(row['spn_1'], row['spn_2']), axis=1) 
开发者ID:zake7749,项目名称:CIKM-AnalytiCup-2018,代码行数:18,代码来源:feature_engineering.py

示例2: hooked_scorer

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def hooked_scorer(*args, **kwargs):
    if kwargs.pop('terminate_event').is_set():
        raise TerminateException
    return fuzz.WRatio(*args, **kwargs) 
开发者ID:Ga-ryo,项目名称:IDAFuzzy,代码行数:6,代码来源:ida_fuzzy.py

示例3: get_best_fuzzy

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def get_best_fuzzy(value, choices, min_score=75, scorer=fuzz_fuzz.WRatio, return_score=False):
    """Returns the best match in a list of choices using fuzzywuzzy."""

    if not isinstance(value, six.string_types):
        raise ValueError('invalid value. Must be a string.')

    if len(value) < 3:
        raise ValueError('your fuzzy search value must be at least three characters long.')

    if len(choices) == 0:
        raise ValueError('choices cannot be an empty list.')

    # If the value contains _ivar or _mask this is probably and incorrect use
    # of the fuzzy feature. We raise an error.
    if '_ivar' in value:
        raise ValueError('_ivar not allowd in search value.')
    elif '_mask' in value:
        raise ValueError('_mask not allowd in search value.')

    bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score)

    if len(bests) == 0:
        best = None
    elif len(bests) == 1:
        best = bests[0]
    else:
        if bests[0][1] == bests[1][1]:
            best = None
        else:
            best = bests[0]

    if best is None:
        raise ValueError('cannot find a good match for {0!r}. '
                         'Your input value is too ambiguous.'.format(value))

    return best if return_score else best[0] 
开发者ID:sdss,项目名称:marvin,代码行数:38,代码来源:structs.py

示例4: get_best_fuzzy

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def get_best_fuzzy(name, choices, cutoff=60, return_score=False):
    items = process.extractBests(name, choices, score_cutoff=cutoff, scorer=fuzz.WRatio)

    if not items:
        best = None
    elif len(items) == 1:
        best = items[0]
    else:
        scores = [s[1] for s in items]
        # finds items with the same score
        morethanone = sum(np.max(scores) == scores) > 1
        if morethanone:
            # tries to find an exact string match
            exact = []
            for s in items:
                itemname = s[0].name if isinstance(s[0], QueryParameter) else s[0]
                if itemname.lower() == name.lower():
                    exact.append(s)
            # returns exact match or fails with ambiguity
            if exact:
                best = exact[0]
            else:
                options = [s[0].name if isinstance(s[0], QueryParameter)
                           else s[0] for s in items if s[1] == np.max(scores)]
                raise KeyError('{0} is too ambiguous.  '
                               'Did you mean one of {1}?'.format(name, options))
        else:
            best = items[0]

    if best is None:
        raise ValueError('Could not find a match for {0}.  Please refine your text.'.format(name))

    return best if return_score else best[0] 
开发者ID:sdss,项目名称:marvin,代码行数:35,代码来源:base.py

示例5: extract_string_similarity_vector

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def extract_string_similarity_vector(instance: dict):
    """
    Returns a vector encoding a variety of lexical similarity metrics given a dictionary containing keys
    sentence_1,sentence_2
    :return: a vector containing similarity scores
    """

    s1 = instance['sentence_1']
    s2 = instance['sentence_2']

    return torch.tensor([
        normalized_levenshtein.similarity(s1,s2),
        jarowinkler.similarity(s1,s2),
        metric_lcs.distance(s1,s2),
        qgram2.distance(s1,s2),
        qgram3.distance(s1,s2),
        qgram4.distance(s1,s2),
        jaccard.similarity(s1,s2),
        cosine.similarity(s1,s2),
        fuzz.partial_token_set_ratio(s1,s2),
        fuzz.partial_token_sort_ratio(s1,s2),
        fuzz.token_set_ratio(s1,s2),
        fuzz.token_sort_ratio(s1,s2),
        fuzz.QRatio(s1,s2),
        fuzz.UQRatio(s1,s2),
        fuzz.UWRatio(s1,s2),
        fuzz.WRatio(s1,s2)
    ]) 
开发者ID:AndriyMulyar,项目名称:semantic-text-similarity,代码行数:30,代码来源:lexical_similarity_metrics.py

示例6: find

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def find(self, name_approx: str, threshold: int = 75) -> Optional['CardTypesInstanced']:
        """
        Finds a card by name, doesn't have to be exact name, thanks to highly sophisticated AI - a.k.a.
        simple algorithm, that will try and guess what was meant.

        This algorithm can change over time so don't expect the same results across different versions.

        :param name_approx:             Name to look up
        :param threshold:               How strict to be, higher number -> less likely to
                                        find a result if the name is off, higher chance the result will be correct
        """
        result = None

        if name_approx in ctx.cards_by_name.keys():
            return self.get(name_approx)
        name_scores = {}
        for name in ctx.cards_by_name.keys():
            score = fuzz.WRatio(name_approx, name)
            if score >= threshold:
                name_scores[name] = score

        max_score = max(name_scores.values())
        for name, score in name_scores.items():
            if score == max_score:
                instantiated = ctx.cards_by_name[name]
                try:
                    result = [inst for inst in instantiated if issubclass(inst.__class__, NotAbility)][0]
                except IndexError:
                    continue
                return result 
开发者ID:iScrE4m,项目名称:pyArtifact,代码行数:32,代码来源:api_sync.py

示例7: fuzzy_fuzzywuzzy_list

# 需要导入模块: from fuzzywuzzy import fuzz [as 别名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 别名]
def fuzzy_fuzzywuzzy_list(fuzz, user_input, qa_list, collection, topn=50):
    '''编辑距离,速度比较慢,比起匹配方法,能够处理字符不一样的问题'''

    start_time = time.time()
    # user_input_set = set([user_input_one for user_input_one in user_input])
    user_input_set = [user_input_one for user_input_one in user_input]


    same_char_list = []
    max_data = 0
    max_data_list = []
    count_collection_new_one = 0
    for collection_new_one in collection: # 获取相同字符串多的问题
        count_same_char_one = len([x for x in user_input_set if x in collection_new_one])

        if count_same_char_one > 0:
            same_char_list.append((count_collection_new_one, count_same_char_one))
        if count_same_char_one > max_data:
            max_data_list.append(count_same_char_one)
            max_data = count_same_char_one
        count_collection_new_one += 1

    end_time1 = time.time()
    list_max_count = []
    len_max_data_list = len(max_data_list)
    for x in range(len_max_data_list):  # 获取前20排名
        for k,l in same_char_list:
            if l == max_data_list[len_max_data_list -1 - x]:
                list_max_count.append(qa_list[k]) #问答重这里取出来
        if len(list_max_count) >= 5000:
            list_max_count = list_max_count[0:5000]
            break

    end_time2 = time.time()

    # end_time1: 0.34090662002563477
    # end_time2: 0.4080846309661865

    # end_time1: 0.06417036056518555
    # end_time2: 0.08422374725341797

    # same_char_list.sort(key=lambda x: x[1], reverse=True)
    # if len(same_char_list) >= 20:
    #     same_char_list = same_char_list[0: 20]

    result =  process.extract(user_input, list_max_count, scorer=fuzz.token_set_ratio, limit=topn)
    end_time3 = time.time()

    # print('end_time1: ' + str(end_time1 - start_time))
    # print('end_time2: ' + str(end_time2 - start_time))
    # print('end_time3: ' + str(end_time3 - start_time))

    return result
    # [fuzz.WRatio, fuzz.QRatio,
    #  fuzz.token_set_ratio, fuzz.token_sort_ratio,
    #  fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
    #  fuzz.UWRatio, fuzz.UQRatio] 
开发者ID:yongzhuo,项目名称:nlp_xiaojiang,代码行数:59,代码来源:chatbot_fuzzy.py


注:本文中的fuzzywuzzy.fuzz.WRatio方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。