当前位置: 首页>>代码示例>>Python>>正文


Python pairwise.cosine_similarity方法代码示例

本文整理汇总了Python中sklearn.metrics.pairwise.cosine_similarity方法的典型用法代码示例。如果您正苦于以下问题:Python pairwise.cosine_similarity方法的具体用法?Python pairwise.cosine_similarity怎么用?Python pairwise.cosine_similarity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.metrics.pairwise的用法示例。


在下文中一共展示了pairwise.cosine_similarity方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cos_sim

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def cos_sim(ind1,ind2=1999):
    view1 = np.load("test_v1.npy")[0:ind1]
    view2 = np.load("test_v2.npy")[0:ind2]
    #val = []
    MAP=0
    for i,j in enumerate(view1):
        val=[]
        AP=0
        for x in view2:            
            val.append(cosine_similarity(j,x)[0].tolist())
        #val=val[0].tolist()
        #print val[0].tolist()
        val=[(q,p)for p,q in enumerate(val)]
        #print val
        val.sort()
        val.reverse()
        t = [w[1]for w in val[0:7]]
        for x,y in enumerate(t):
            if y in range(i,i+5):
                AP+=1/(x+1)
        print(t)
        print(AP)
        MAP+=AP
    print('MAP is : ',MAP/ind1) 
开发者ID:GauravBh1010tt,项目名称:DeepLearn,代码行数:26,代码来源:utility.py

示例2: test_cosine_similarity

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def test_cosine_similarity():
    # Test the cosine_similarity.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    for X_, Y_ in ((X, None), (X, Y),
                   (Xcsr, None), (Xcsr, Ycsr)):
        # Test that the cosine is kernel is equal to a linear kernel when data
        # has been previously normalized by L2-norm.
        K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
        X_ = normalize(X_)
        if Y_ is not None:
            Y_ = normalize(Y_)
        K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
        assert_array_almost_equal(K1, K2) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:21,代码来源:test_pairwise.py

示例3: transform

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def transform(self, X: dt.Frame):
        X.replace([None, math.inf, -math.inf], self._repl_val)
        from flair.embeddings import WordEmbeddings, BertEmbeddings, DocumentPoolEmbeddings, Sentence
        if self.embedding_name in ["glove", "en"]:
            self.embedding = WordEmbeddings(self.embedding_name)
        elif self.embedding_name in ["bert"]:
            self.embedding = BertEmbeddings()
        self.doc_embedding = DocumentPoolEmbeddings([self.embedding])
        output = []
        X = X.to_pandas()
        text1_arr = X.iloc[:, 0].values
        text2_arr = X.iloc[:, 1].values
        for ind, text1 in enumerate(text1_arr):
            try:
                text1 = Sentence(str(text1).lower())
                self.doc_embedding.embed(text1)
                text2 = text2_arr[ind]
                text2 = Sentence(str(text2).lower())
                self.doc_embedding.embed(text2)
                score = cosine_similarity(text1.get_embedding().reshape(1, -1),
                                          text2.get_embedding().reshape(1, -1))[0, 0]
                output.append(score)
            except:
                output.append(-99)
        return np.array(output) 
开发者ID:h2oai,项目名称:driverlessai-recipes,代码行数:27,代码来源:text_embedding_similarity_transformers.py

示例4: compared

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def compared(request):
    if request.method == 'POST':
        if len(request.FILES) != 2:
            return HttpResponse('{"status":false,"data":"","msg":"图片参数错误!"}')
        starttime = time.time()
        name1 = str(random.randint(10000, 99999)) + str(time.time())  # 随机名字
        name2 = str(random.randint(10000, 99999)) + str(time.time())

        handle_uploaded_file(request.FILES['face1'], str(name1))
        handle_uploaded_file(request.FILES['face2'], str(name2))

        tz1 = get_feature(root + "RestServer/upload/" + str(name1))

        tz2 = get_feature(root + "RestServer/upload/" + str(name2))

        comparedValue = pw.cosine_similarity(tz1, tz2)[0][0]

        os.remove(root + "RestServer/upload/" + str(name1))
        os.remove(root + "RestServer/upload/" + str(name2))
        endtime = time.time()
        Runtime=endtime-starttime
        return HttpResponse('{"status":true,"data":"' + str(comparedValue) + '","msg":"成功","runtime": ' + str(Runtime) + '  }')
    else:
        return HttpResponse('{"status":false,"data":"","msg":"请求不合法"}')
    return HttpResponse('{"status":false,"data":"","msg":"未知错误"}') 
开发者ID:Jinnrry,项目名称:FaceRecognition-RestApi,代码行数:27,代码来源:faceApi.py

示例5: get_closest_docs

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def get_closest_docs(uri):
    #user_doc = requests.get(uri).text
    r = requests.get(uri)
    if r.status_code == 200:
        user_doc = r.text
        print("URI content length",len(user_doc))
        code, _ = separate_code_and_comments(user_doc,"user doc")
        normalized_code = normalize_text(code, remove_stop_words=False, only_letters=False, return_list=True)
        model.random.seed(0)
        user_vector = model.infer_vector(normalized_code)
        print("finding similar...")
        sys.stdout.flush()
        stored_urls = list()
        stored_vectors = list()
        for url in vectors:
            stored_urls.append(url)
            stored_vectors.append(vectors[url])
        pair_sims = cosine_similarity(user_vector.reshape(1, -1), stored_vectors)
        indices = (-pair_sims[0]).argsort()[:5]
        return [(stored_urls[index],round(float(pair_sims[0][index]),2)) for index in indices]
    else:
        print("URL returned status code", r.status_code)
        raise ValueError('URL error') 
开发者ID:Lab41,项目名称:altair,代码行数:25,代码来源:app.py

示例6: clustering

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def clustering(self, threshold):
        """分不同词性的聚类

        :return: partition: dict {word_id: cluster_id}
        """
        print("Louvain clustering")
        partition = {}
        part_offset = 0
        for etype, ners in self.type_entity_dict.items():
            sub_id_mapping = [self.word2id[ner0] for ner0 in ners if ner0 in self.word2id]
            if len(sub_id_mapping) == 0:
                continue
            emb_mat_sub = self.emb_mat[sub_id_mapping, :]
            cos_sims = cosine_similarity(emb_mat_sub)
            cos_sims -= np.eye(len(emb_mat_sub))
            adj_mat = (cos_sims > threshold).astype(int)
            G = nx.from_numpy_array(adj_mat)
            partition_sub = community.best_partition(G)
            for sub_id, main_id in enumerate(sub_id_mapping):
                sub_part_id = partition_sub[sub_id]
                partition[main_id] = sub_part_id + part_offset
            part_offset += max(partition_sub.values()) + 1
        return partition 
开发者ID:blmoistawinde,项目名称:HarvestText,代码行数:25,代码来源:entity_discoverer.py

示例7: cosine_sim

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def cosine_sim(x, y):
    try:
        if type(x) is np.ndarray: x = x.reshape(1, -1) # get rid of the warning
        if type(y) is np.ndarray: y = y.reshape(1, -1)
        d = cosine_similarity(x, y)
        d = d[0][0]
    except:
        print x
        print y
        d = 0.
    return d

 #   Copyright 2017 Cisco Systems, Inc.
 #  
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
 #   You may obtain a copy of the License at
 #  
 #     http://www.apache.org/licenses/LICENSE-2.0
 #  
 #   Unless required by applicable law or agreed to in writing, software
 #   distributed under the License is distributed on an "AS IS" BASIS,
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License. 
开发者ID:Cisco-Talos,项目名称:fnc-1,代码行数:27,代码来源:helpers.py

示例8: _get_similarity_values

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def _get_similarity_values(self, q1_csc, q2_csc):
        cosine_sim = []
        manhattan_dis = []
        eucledian_dis = []
        jaccard_dis = []
        minkowsk_dis = []
        
        for i,j in zip(q1_csc, q2_csc):
            sim = cs(i, j)
            cosine_sim.append(sim[0][0])
            sim = md(i, j)
            manhattan_dis.append(sim[0][0])
            sim = ed(i, j)
            eucledian_dis.append(sim[0][0])
            i_ = i.toarray()
            j_ = j.toarray()
            try:
                sim = jsc(i_, j_)
                jaccard_dis.append(sim)
            except:
                jaccard_dis.append(0)
                
            sim = minkowski_dis.pairwise(i_, j_)
            minkowsk_dis.append(sim[0][0])
        return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis 
开发者ID:zake7749,项目名称:CIKM-AnalytiCup-2018,代码行数:27,代码来源:feature_engineering.py

示例9: _compute_sim

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def _compute_sim(self, R, k):
        # compute the similarity between all the items. This calculates the
        # similarity between each ITEM
        sim = cosine_similarity(R.T)

        # Only keep the similarities of the top K, setting all others to zero
        # (negative since we want descending)
        not_top_k = np.argsort(-sim, axis=1)[:, k:]  # shape=(n_items, k)

        if not_top_k.shape[1]:  # only if there are cols (k < n_items)
            # now we have to set these to zero in the similarity matrix
            row_indices = np.repeat(range(not_top_k.shape[0]),
                                    not_top_k.shape[1])
            sim[row_indices, not_top_k.ravel()] = 0.

        return sim 
开发者ID:PacktPublishing,项目名称:Hands-on-Supervised-Machine-Learning-with-Python,代码行数:18,代码来源:itemitem.py

示例10: decision_function

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def decision_function(self, X):
        """Evaluate the cosine similarity between document-term matrix and X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_timestamps)
            Test samples.

        Returns
        -------
        X : array-like, shape (n_samples, n_classes)
            osine similarity between the document-term matrix and X.

        """
        check_is_fitted(self, ['vocabulary_', 'tfidf_', 'idf_',
                               '_tfidf', 'classes_'])
        X = check_array(X)
        X_bow = self._bow.transform(X)
        vectorizer = CountVectorizer(vocabulary=self._tfidf.vocabulary_)
        X_transformed = vectorizer.transform(X_bow).toarray()
        return cosine_similarity(X_transformed, self.tfidf_) 
开发者ID:johannfaouzi,项目名称:pyts,代码行数:23,代码来源:saxvsm.py

示例11: test_init

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def test_init():
    default = Spanning_Forest()
    assert default.metric == skm.manhattan_distances
    assert default.center == np.mean
    assert default.reduction == np.sum
    change = Spanning_Forest(dissimilarity=skm.euclidean_distances,
                             center=np.median, reduction=np.max)
    assert change.metric == skm.euclidean_distances
    assert change.center == np.median
    assert change.reduction == np.max
    
    sym = Spanning_Forest(affinity=skm.cosine_similarity)
    assert isinstance(sym.metric, types.LambdaType)
    test_distance = -np.log(skm.cosine_similarity(data[:2,]))
    comparator = sym.metric(data[:2,])
    np.testing.assert_allclose(test_distance, comparator) 
开发者ID:pysal,项目名称:region,代码行数:18,代码来源:test_skater.py

示例12: save_model

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def save_model(model: Model, tokenizer: Tokenizer):
    """
    Saves the important parts of the model
    :param model: Keras model to save
    :param tokenizer: Keras Tokenizer to save
    """
    for layer in model.layers:
        if '_biases' in layer.name or '_embeddings' in layer.name:
            np.save(file=f'{OUTPUT_FOLDER}{layer.name}', arr=layer.get_weights()[0])

    # save tokenizer
    pickle.dump(obj=tokenizer.index_word, file=open(f'{OUTPUT_FOLDER}{INDEX2WORD}', 'wb'))
    pickle.dump(obj=tokenizer.word_index, file=open(f'{OUTPUT_FOLDER}{WORD2INDEX}', 'wb'))

    # save combined embeddings & correlation matrix
    agg_embeddings = np.load(f'{OUTPUT_FOLDER}{CENTRAL_EMBEDDINGS}.npy') + \
                     np.load(f'{OUTPUT_FOLDER}{CONTEXT_EMBEDDINGS}.npy')

    np.save(file=f'{OUTPUT_FOLDER}{AGGREGATED_EMBEDDINGS}', arr=agg_embeddings)
    np.save(file=f'{OUTPUT_FOLDER}{CORRELATION_MATRIX}', arr=cosine_similarity(cosine_similarity(agg_embeddings))) 
开发者ID:erwtokritos,项目名称:keras-glove,代码行数:22,代码来源:save_utils.py

示例13: evaluate

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def evaluate(self, category_projection):
        assert issubclass(type(category_projection), CategoryProjectionBase)
        topics = category_projection.get_nearest_terms()
        total_similarity = 0
        for topic in topics.values():
            topic_vectors = np.array([self.get_vector(term) for term in topic])
            #simport pdb; pdb.set_trace()
            sim_matrix = cosine_similarity(topic_vectors)
            tril_sim_matrix = np.tril(sim_matrix)
            mean_similarity = tril_sim_matrix.sum()/(tril_sim_matrix.shape[0] ** 2 - tril_sim_matrix.shape[0]) / 2
            total_similarity += mean_similarity
        return total_similarity/len(topics) 
开发者ID:JasonKessler,项目名称:scattertext,代码行数:14,代码来源:CategoryProjectorEvaluator.py

示例14: process

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def process(self,data):
        claim_bow = self.bow_vectorizer.transform(self.claims(data))
        claim_tfs = self.tfreq_vectorizer.transform(claim_bow)
        claim_tfidf = self.tfidf_vectorizer.transform(self.claims(data))

        body_texts = self.texts(data)
        body_bow = self.bow_vectorizer.transform(body_texts)
        body_tfs = self.tfreq_vectorizer.transform(body_bow)
        body_tfidf = self.tfidf_vectorizer.transform(body_texts)

        cosines = np.array([cosine_similarity(c, b)[0] for c,b in zip(claim_tfidf,body_tfidf)])

        return hstack([body_tfs,claim_tfs,cosines]) 
开发者ID:sheffieldnlp,项目名称:fever-naacl-2018,代码行数:15,代码来源:fever_features.py

示例15: process

# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def process(self, data):
        claim_bow = self.bow_vectorizer.transform(self.claims(data))
        claim_tfs = self.tfreq_vectorizer.transform(claim_bow)
        claim_tfidf = self.tfidf_vectorizer.transform(self.claims(data))

        body_texts = self.texts(data)
        body_bow = self.bow_vectorizer.transform(body_texts)
        body_tfs = self.tfreq_vectorizer.transform(body_bow)
        body_tfidf = self.tfidf_vectorizer.transform(body_texts)

        cosines = np.array([cosine_similarity(c, b)[0] for c, b in zip(claim_tfidf, body_tfidf)])

        return cosines 
开发者ID:sheffieldnlp,项目名称:fever-naacl-2018,代码行数:15,代码来源:process_tfidf_grid.py


注:本文中的sklearn.metrics.pairwise.cosine_similarity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。