當前位置: 首頁>>代碼示例>>Python>>正文


Python fuzz.QRatio方法代碼示例

本文整理匯總了Python中fuzzywuzzy.fuzz.QRatio方法的典型用法代碼示例。如果您正苦於以下問題:Python fuzz.QRatio方法的具體用法?Python fuzz.QRatio怎麽用?Python fuzz.QRatio使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在fuzzywuzzy.fuzz的用法示例。


在下文中一共展示了fuzz.QRatio方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: extract_features

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import QRatio [as 別名]
def extract_features(df):
    df["question1"] = df["question1"].fillna("").apply(preprocess)
    df["question2"] = df["question2"].fillna("").apply(preprocess)

    print("token features...")
    token_features = df.apply(lambda x: get_token_features(x["question1"], x["question2"]), axis=1)
    df["cwc_min"]       = list(map(lambda x: x[0], token_features))
    df["cwc_max"]       = list(map(lambda x: x[1], token_features))
    df["csc_min"]       = list(map(lambda x: x[2], token_features))
    df["csc_max"]       = list(map(lambda x: x[3], token_features))
    df["ctc_min"]       = list(map(lambda x: x[4], token_features))
    df["ctc_max"]       = list(map(lambda x: x[5], token_features))
    df["last_word_eq"]  = list(map(lambda x: x[6], token_features))
    df["first_word_eq"] = list(map(lambda x: x[7], token_features))
    df["abs_len_diff"]  = list(map(lambda x: x[8], token_features))
    df["mean_len"]      = list(map(lambda x: x[9], token_features))

    print("fuzzy features..")
    df["token_set_ratio"]       = df.apply(lambda x: fuzz.token_set_ratio(x["question1"], x["question2"]), axis=1)
    df["token_sort_ratio"]      = df.apply(lambda x: fuzz.token_sort_ratio(x["question1"], x["question2"]), axis=1)
    df["fuzz_ratio"]            = df.apply(lambda x: fuzz.QRatio(x["question1"], x["question2"]), axis=1)
    df["fuzz_partial_ratio"]    = df.apply(lambda x: fuzz.partial_ratio(x["question1"], x["question2"]), axis=1)
    df["longest_substr_ratio"]  = df.apply(lambda x: get_longest_substr_ratio(x["question1"], x["question2"]), axis=1)
    return df 
開發者ID:aerdem4,項目名稱:kaggle-quora-dup,代碼行數:26,代碼來源:nlp_feature_extraction.py

示例2: _create_fuzzy_wuzzy_features

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import QRatio [as 別名]
def _create_fuzzy_wuzzy_features(self, df):
        df['fuzzy_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_set_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_set_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_partial_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.partial_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_token_sort_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.token_sort_ratio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_qratio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.QRatio(row['spn_1'], row['spn_2']), axis=1)
        df['fuzzy_WRatio'] = df[['spn_1', 'spn_2']].apply(lambda row: fuzz.WRatio(row['spn_1'], row['spn_2']), axis=1)
   
        def _get_longest_substr_ratio(a, b):
            strs = list(distance.lcsubstrings(a, b))
            if len(strs) == 0:
                return 0
            else:
                return len(strs[0]) / (min(len(a), len(b)) + 1)

        df['longest_substr_ratio'] = df[['spn_1', 'spn_2']].apply(lambda row: _get_longest_substr_ratio(row['spn_1'], row['spn_2']), axis=1) 
開發者ID:zake7749,項目名稱:CIKM-AnalytiCup-2018,代碼行數:18,代碼來源:feature_engineering.py

示例3: extract_stat_features

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import QRatio [as 別名]
def extract_stat_features(self,df):
        df["title1_zh"] = df["title1_zh"].fillna("").apply(self.__preprocess__)
        df["title2_zh"] = df["title2_zh"].fillna("").apply(self.__preprocess__)

        print("token features...")
        token_features = df.apply(lambda x: self.__get_token_features__(x["title1_zh"], x["title2_zh"]), axis=1)
        df["cwc_min"]       = list(map(lambda x: x[0], token_features))
        df["cwc_max"]       = list(map(lambda x: x[1], token_features))
        df["csc_min"]       = list(map(lambda x: x[2], token_features))
        df["csc_max"]       = list(map(lambda x: x[3], token_features))
        df["ctc_min"]       = list(map(lambda x: x[4], token_features))
        df["ctc_max"]       = list(map(lambda x: x[5], token_features))
        df["last_word_eq"]  = list(map(lambda x: x[6], token_features))
        df["first_word_eq"] = list(map(lambda x: x[7], token_features))
        df["abs_len_diff"]  = list(map(lambda x: x[8], token_features))
        df["mean_len"]      = list(map(lambda x: x[9], token_features))

        print("fuzzy features..")
        df["token_set_ratio"]       = df.apply(lambda x: fuzz.token_set_ratio(x["title1_zh"], x["title2_zh"]), axis=1)
        df["token_sort_ratio"]      = df.apply(lambda x: fuzz.token_sort_ratio(x["title1_zh"], x["title2_zh"]), axis=1)
        df["fuzz_ratio"]            = df.apply(lambda x: fuzz.QRatio(x["title1_zh"], x["title2_zh"]), axis=1)
        df["fuzz_partial_ratio"]    = df.apply(lambda x: fuzz.partial_ratio(x["title1_zh"], x["title2_zh"]), axis=1)
        df["longest_substr_ratio"]  = df.apply(lambda x: self.__get_longest_substr_ratio__(x["title1_zh"], x["title2_zh"]), axis=1)
        
        if 'label' in df.columns.tolist():
            return df.drop(["title1_zh", "title2_zh", "label"], axis=1).values
        else:
            return df.drop(["title1_zh", "title2_zh"], axis=1).values 
開發者ID:lampts,項目名稱:wsdm19cup,代碼行數:30,代碼來源:make_handcrafted_33_features.py

示例4: extract_string_similarity_vector

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import QRatio [as 別名]
def extract_string_similarity_vector(instance: dict):
    """
    Returns a vector encoding a variety of lexical similarity metrics given a dictionary containing keys
    sentence_1,sentence_2
    :return: a vector containing similarity scores
    """

    s1 = instance['sentence_1']
    s2 = instance['sentence_2']

    return torch.tensor([
        normalized_levenshtein.similarity(s1,s2),
        jarowinkler.similarity(s1,s2),
        metric_lcs.distance(s1,s2),
        qgram2.distance(s1,s2),
        qgram3.distance(s1,s2),
        qgram4.distance(s1,s2),
        jaccard.similarity(s1,s2),
        cosine.similarity(s1,s2),
        fuzz.partial_token_set_ratio(s1,s2),
        fuzz.partial_token_sort_ratio(s1,s2),
        fuzz.token_set_ratio(s1,s2),
        fuzz.token_sort_ratio(s1,s2),
        fuzz.QRatio(s1,s2),
        fuzz.UQRatio(s1,s2),
        fuzz.UWRatio(s1,s2),
        fuzz.WRatio(s1,s2)
    ]) 
開發者ID:AndriyMulyar,項目名稱:semantic-text-similarity,代碼行數:30,代碼來源:lexical_similarity_metrics.py

示例5: fuzzy_fuzzywuzzy_list

# 需要導入模塊: from fuzzywuzzy import fuzz [as 別名]
# 或者: from fuzzywuzzy.fuzz import QRatio [as 別名]
def fuzzy_fuzzywuzzy_list(fuzz, user_input, qa_list, collection, topn=50):
    '''編輯距離,速度比較慢,比起匹配方法,能夠處理字符不一樣的問題'''

    start_time = time.time()
    # user_input_set = set([user_input_one for user_input_one in user_input])
    user_input_set = [user_input_one for user_input_one in user_input]


    same_char_list = []
    max_data = 0
    max_data_list = []
    count_collection_new_one = 0
    for collection_new_one in collection: # 獲取相同字符串多的問題
        count_same_char_one = len([x for x in user_input_set if x in collection_new_one])

        if count_same_char_one > 0:
            same_char_list.append((count_collection_new_one, count_same_char_one))
        if count_same_char_one > max_data:
            max_data_list.append(count_same_char_one)
            max_data = count_same_char_one
        count_collection_new_one += 1

    end_time1 = time.time()
    list_max_count = []
    len_max_data_list = len(max_data_list)
    for x in range(len_max_data_list):  # 獲取前20排名
        for k,l in same_char_list:
            if l == max_data_list[len_max_data_list -1 - x]:
                list_max_count.append(qa_list[k]) #問答重這裏取出來
        if len(list_max_count) >= 5000:
            list_max_count = list_max_count[0:5000]
            break

    end_time2 = time.time()

    # end_time1: 0.34090662002563477
    # end_time2: 0.4080846309661865

    # end_time1: 0.06417036056518555
    # end_time2: 0.08422374725341797

    # same_char_list.sort(key=lambda x: x[1], reverse=True)
    # if len(same_char_list) >= 20:
    #     same_char_list = same_char_list[0: 20]

    result =  process.extract(user_input, list_max_count, scorer=fuzz.token_set_ratio, limit=topn)
    end_time3 = time.time()

    # print('end_time1: ' + str(end_time1 - start_time))
    # print('end_time2: ' + str(end_time2 - start_time))
    # print('end_time3: ' + str(end_time3 - start_time))

    return result
    # [fuzz.WRatio, fuzz.QRatio,
    #  fuzz.token_set_ratio, fuzz.token_sort_ratio,
    #  fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
    #  fuzz.UWRatio, fuzz.UQRatio] 
開發者ID:yongzhuo,項目名稱:nlp_xiaojiang,代碼行數:59,代碼來源:chatbot_fuzzy.py


注:本文中的fuzzywuzzy.fuzz.QRatio方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。