當前位置: 首頁>>代碼示例>>Python>>正文


Python fuzz.WRatio方法代碼示例

本文整理匯總了Python中fuzzywuzzy.fuzz.WRatio方法的典型用法代碼示例。如果您正苦於以下問題:Python fuzz.WRatio方法的具體用法?Python fuzz.WRatio怎麽用?Python fuzz.WRatio使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在fuzzywuzzy.fuzz的用法示例。


在下文中一共展示了fuzz.WRatio方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _create_fuzzy_wuzzy_features

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def _create_fuzzy_wuzzy_features(self, df):
        df['fuzzy_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_set_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_set_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_partial_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.partial_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_token_sort_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_sort_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_qratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.QRatio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_WRatio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.WRatio(row['spn_1'], row['spn_2']), axis=1)
   
        def _get_longest_substr_ratio(a, b):
            strs = list(distance.lcsubstrings(a, b))
            if len(strs) == 0:
                return 0
            else:
                return len(strs[0]) / (min(len(a), len(b)) + 1)

        df['longest_substr_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: _get_longest_substr_ratio(row['spn_1'], row['spn_2']), axis=1) 
開發者ID:zake7749,項目名稱:CIKM-AnalytiCup-2018,代碼行數:18,代碼來源:feature_engineering.py

示例2: hooked_scorer

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def hooked_scorer(*args, **kwargs):
    if kwargs.pop('terminate_event').is_set():
        raise TerminateException
    return fuzz.WRatio(*args, **kwargs) 
開發者ID:Ga-ryo,項目名稱:IDAFuzzy,代碼行數:6,代碼來源:ida_fuzzy.py

示例3: get_best_fuzzy

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def get_best_fuzzy(value, choices, min_score=75, scorer=fuzz_fuzz.WRatio, return_score=False):
    """Returns the best match in a list of choices using fuzzywuzzy."""

    if not isinstance(value, six.string_types):
        raise ValueError('invalid value. Must be a string.')

    if len(value) < 3:
        raise ValueError('your fuzzy search value must be at least three characters long.')

    if len(choices) == 0:
        raise ValueError('choices cannot be an empty list.')

    # If the value contains _ivar or _mask this is probably and incorrect use
    # of the fuzzy feature. We raise an error.
    if '_ivar' in value:
        raise ValueError('_ivar not allowd in search value.')
    elif '_mask' in value:
        raise ValueError('_mask not allowd in search value.')

    bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score)

    if len(bests) == 0:
        best = None
    elif len(bests) == 1:
        best = bests[0]
    else:
        if bests[0][1] == bests[1][1]:
            best = None
        else:
            best = bests[0]

    if best is None:
        raise ValueError('cannot find a good match for {0!r}. '
                         'Your input value is too ambiguous.'.format(value))

    return best if return_score else best[0] 
開發者ID:sdss,項目名稱:marvin,代碼行數:38,代碼來源:structs.py

示例4: get_best_fuzzy

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def get_best_fuzzy(name, choices, cutoff=60, return_score=False):
    items = process.extractBests(name, choices, score_cutoff=cutoff, scorer=fuzz.WRatio)

    if not items:
        best = None
    elif len(items) == 1:
        best = items[0]
    else:
        scores = [s[1] for s in items]
        # finds items with the same score
        morethanone = sum(np.max(scores) == scores) > 1
        if morethanone:
            # tries to find an exact string match
            exact = []
            for s in items:
                itemname = s[0].name if isinstance(s[0], QueryParameter) else s[0]
                if itemname.lower() == name.lower():
                    exact.append(s)
            # returns exact match or fails with ambiguity
            if exact:
                best = exact[0]
            else:
                options = [s[0].name if isinstance(s[0], QueryParameter)
                           else s[0] for s in items if s[1] == np.max(scores)]
                raise KeyError('{0} is too ambiguous.  '
                               'Did you mean one of {1}?'.format(name, options))
        else:
            best = items[0]

    if best is None:
        raise ValueError('Could not find a match for {0}.  Please refine your text.'.format(name))

    return best if return_score else best[0] 
開發者ID:sdss,項目名稱:marvin,代碼行數:35,代碼來源:base.py

示例5: extract_string_similarity_vector

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def extract_string_similarity_vector(instance: dict):
    """
    Returns a vector encoding a variety of lexical similarity metrics given a dictionary containing keys
    sentence_1,sentence_2
    :return: a vector containing similarity scores
    """

    s1 = instance['sentence_1']
    s2 = instance['sentence_2']

    return torch.tensor([
        normalized_levenshtein.similarity(s1,s2),
        jarowinkler.similarity(s1,s2),
        metric_lcs.distance(s1,s2),
        qgram2.distance(s1,s2),
        qgram3.distance(s1,s2),
        qgram4.distance(s1,s2),
        jaccard.similarity(s1,s2),
        cosine.similarity(s1,s2),
        fuzz.partial_token_set_ratio(s1,s2),
        fuzz.partial_token_sort_ratio(s1,s2),
        fuzz.token_set_ratio(s1,s2),
        fuzz.token_sort_ratio(s1,s2),
        fuzz.QRatio(s1,s2),
        fuzz.UQRatio(s1,s2),
        fuzz.UWRatio(s1,s2),
        fuzz.WRatio(s1,s2)
    ]) 
開發者ID:AndriyMulyar,項目名稱:semantic-text-similarity,代碼行數:30,代碼來源:lexical_similarity_metrics.py

示例6: find

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def find(self, name_approx: str, threshold: int = 75) -> Optional['CardTypesInstanced']:
        """
        Finds a card by name, doesn't have to be exact name, thanks to highly sophisticated AI - a.k.a.
        simple algorithm, that will try and guess what was meant.

        This algorithm can change over time so don't expect the same results across different versions.

        :param name_approx:             Name to look up
        :param threshold:               How strict to be, higher number -> less likely to
                                        find a result if the name is off, higher chance the result will be correct
        """
        result = None

        if name_approx in ctx.cards_by_name.keys():
            return self.get(name_approx)
        name_scores = {}
        for name in ctx.cards_by_name.keys():
            score = fuzz.WRatio(name_approx, name)
            if score >= threshold:
                name_scores[name] = score

        max_score = max(name_scores.values())
        for name, score in name_scores.items():
            if score == max_score:
                instantiated = ctx.cards_by_name[name]
                try:
                    result = [inst for inst in instantiated if issubclass(inst.__class__, NotAbility)][0]
                except IndexError:
                    continue
                return result 
開發者ID:iScrE4m,項目名稱:pyArtifact,代碼行數:32,代碼來源:api_sync.py

示例7: fuzzy_fuzzywuzzy_list

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import WRatio [as 別名]
def fuzzy_fuzzywuzzy_list(fuzz, user_input, qa_list, collection, topn=50):
    '''編輯距離,速度比較慢,比起匹配方法,能夠處理字符不一樣的問題'''

    start_time = time.time()
    # user_input_set = set([user_input_one for user_input_one in user_input])
    user_input_set = [user_input_one for user_input_one in user_input]


    same_char_list = []
    max_data = 0
    max_data_list = []
    count_collection_new_one = 0
    for collection_new_one in collection: # 獲取相同字符串多的問題
        count_same_char_one = len([x for x in user_input_set if x in collection_new_one])

        if count_same_char_one > 0:
            same_char_list.append((count_collection_new_one, count_same_char_one))
        if count_same_char_one > max_data:
            max_data_list.append(count_same_char_one)
            max_data = count_same_char_one
        count_collection_new_one += 1

    end_time1 = time.time()
    list_max_count = []
    len_max_data_list = len(max_data_list)
    for x in range(len_max_data_list):  # 獲取前20排名
        for k,l in same_char_list:
            if l == max_data_list[len_max_data_list -1 - x]:
                list_max_count.append(qa_list[k]) #問答重這裏取出來
        if len(list_max_count) >= 5000:
            list_max_count = list_max_count[0:5000]
            break

    end_time2 = time.time()

    # end_time1: 0.34090662002563477
    # end_time2: 0.4080846309661865

    # end_time1: 0.06417036056518555
    # end_time2: 0.08422374725341797

    # same_char_list.sort(key=lambda x: x[1], reverse=True)
    # if len(same_char_list) >= 20:
    #     same_char_list = same_char_list[0: 20]

    result =  process.extract(user_input, list_max_count, scorer=fuzz.token_set_ratio, limit=topn)
    end_time3 = time.time()

    # print('end_time1: ' + str(end_time1 - start_time))
    # print('end_time2: ' + str(end_time2 - start_time))
    # print('end_time3: ' + str(end_time3 - start_time))

    return result
    # [fuzz.WRatio, fuzz.QRatio,
    #  fuzz.token_set_ratio, fuzz.token_sort_ratio,
    #  fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
    #  fuzz.UWRatio, fuzz.UQRatio] 
開發者ID:yongzhuo,項目名稱:nlp_xiaojiang,代碼行數:59,代碼來源:chatbot_fuzzy.py


注:本文中的fuzzywuzzy.fuzz.WRatio方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。