本文整理汇总了Python中sklearn.metrics.pairwise.cosine_similarity方法的典型用法代码示例。如果您正苦于以下问题:Python pairwise.cosine_similarity方法的具体用法?Python pairwise.cosine_similarity怎么用?Python pairwise.cosine_similarity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.metrics.pairwise
的用法示例。
在下文中一共展示了pairwise.cosine_similarity方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cos_sim
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def cos_sim(ind1,ind2=1999):
view1 = np.load("test_v1.npy")[0:ind1]
view2 = np.load("test_v2.npy")[0:ind2]
#val = []
MAP=0
for i,j in enumerate(view1):
val=[]
AP=0
for x in view2:
val.append(cosine_similarity(j,x)[0].tolist())
#val=val[0].tolist()
#print val[0].tolist()
val=[(q,p)for p,q in enumerate(val)]
#print val
val.sort()
val.reverse()
t = [w[1]for w in val[0:7]]
for x,y in enumerate(t):
if y in range(i,i+5):
AP+=1/(x+1)
print(t)
print(AP)
MAP+=AP
print('MAP is : ',MAP/ind1)
示例2: test_cosine_similarity
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def test_cosine_similarity():
# Test the cosine_similarity.
rng = np.random.RandomState(0)
X = rng.random_sample((5, 4))
Y = rng.random_sample((3, 4))
Xcsr = csr_matrix(X)
Ycsr = csr_matrix(Y)
for X_, Y_ in ((X, None), (X, Y),
(Xcsr, None), (Xcsr, Ycsr)):
# Test that the cosine is kernel is equal to a linear kernel when data
# has been previously normalized by L2-norm.
K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
X_ = normalize(X_)
if Y_ is not None:
Y_ = normalize(Y_)
K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
assert_array_almost_equal(K1, K2)
示例3: transform
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def transform(self, X: dt.Frame):
X.replace([None, math.inf, -math.inf], self._repl_val)
from flair.embeddings import WordEmbeddings, BertEmbeddings, DocumentPoolEmbeddings, Sentence
if self.embedding_name in ["glove", "en"]:
self.embedding = WordEmbeddings(self.embedding_name)
elif self.embedding_name in ["bert"]:
self.embedding = BertEmbeddings()
self.doc_embedding = DocumentPoolEmbeddings([self.embedding])
output = []
X = X.to_pandas()
text1_arr = X.iloc[:, 0].values
text2_arr = X.iloc[:, 1].values
for ind, text1 in enumerate(text1_arr):
try:
text1 = Sentence(str(text1).lower())
self.doc_embedding.embed(text1)
text2 = text2_arr[ind]
text2 = Sentence(str(text2).lower())
self.doc_embedding.embed(text2)
score = cosine_similarity(text1.get_embedding().reshape(1, -1),
text2.get_embedding().reshape(1, -1))[0, 0]
output.append(score)
except:
output.append(-99)
return np.array(output)
示例4: compared
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def compared(request):
if request.method == 'POST':
if len(request.FILES) != 2:
return HttpResponse('{"status":false,"data":"","msg":"图片参数错误!"}')
starttime = time.time()
name1 = str(random.randint(10000, 99999)) + str(time.time()) # 随机名字
name2 = str(random.randint(10000, 99999)) + str(time.time())
handle_uploaded_file(request.FILES['face1'], str(name1))
handle_uploaded_file(request.FILES['face2'], str(name2))
tz1 = get_feature(root + "RestServer/upload/" + str(name1))
tz2 = get_feature(root + "RestServer/upload/" + str(name2))
comparedValue = pw.cosine_similarity(tz1, tz2)[0][0]
os.remove(root + "RestServer/upload/" + str(name1))
os.remove(root + "RestServer/upload/" + str(name2))
endtime = time.time()
Runtime=endtime-starttime
return HttpResponse('{"status":true,"data":"' + str(comparedValue) + '","msg":"成功","runtime": ' + str(Runtime) + ' }')
else:
return HttpResponse('{"status":false,"data":"","msg":"请求不合法"}')
return HttpResponse('{"status":false,"data":"","msg":"未知错误"}')
示例5: get_closest_docs
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def get_closest_docs(uri):
#user_doc = requests.get(uri).text
r = requests.get(uri)
if r.status_code == 200:
user_doc = r.text
print("URI content length",len(user_doc))
code, _ = separate_code_and_comments(user_doc,"user doc")
normalized_code = normalize_text(code, remove_stop_words=False, only_letters=False, return_list=True)
model.random.seed(0)
user_vector = model.infer_vector(normalized_code)
print("finding similar...")
sys.stdout.flush()
stored_urls = list()
stored_vectors = list()
for url in vectors:
stored_urls.append(url)
stored_vectors.append(vectors[url])
pair_sims = cosine_similarity(user_vector.reshape(1, -1), stored_vectors)
indices = (-pair_sims[0]).argsort()[:5]
return [(stored_urls[index],round(float(pair_sims[0][index]),2)) for index in indices]
else:
print("URL returned status code", r.status_code)
raise ValueError('URL error')
示例6: clustering
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def clustering(self, threshold):
"""分不同词性的聚类
:return: partition: dict {word_id: cluster_id}
"""
print("Louvain clustering")
partition = {}
part_offset = 0
for etype, ners in self.type_entity_dict.items():
sub_id_mapping = [self.word2id[ner0] for ner0 in ners if ner0 in self.word2id]
if len(sub_id_mapping) == 0:
continue
emb_mat_sub = self.emb_mat[sub_id_mapping, :]
cos_sims = cosine_similarity(emb_mat_sub)
cos_sims -= np.eye(len(emb_mat_sub))
adj_mat = (cos_sims > threshold).astype(int)
G = nx.from_numpy_array(adj_mat)
partition_sub = community.best_partition(G)
for sub_id, main_id in enumerate(sub_id_mapping):
sub_part_id = partition_sub[sub_id]
partition[main_id] = sub_part_id + part_offset
part_offset += max(partition_sub.values()) + 1
return partition
示例7: cosine_sim
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def cosine_sim(x, y):
try:
if type(x) is np.ndarray: x = x.reshape(1, -1) # get rid of the warning
if type(y) is np.ndarray: y = y.reshape(1, -1)
d = cosine_similarity(x, y)
d = d[0][0]
except:
print x
print y
d = 0.
return d
# Copyright 2017 Cisco Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
示例8: _get_similarity_values
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def _get_similarity_values(self, q1_csc, q2_csc):
cosine_sim = []
manhattan_dis = []
eucledian_dis = []
jaccard_dis = []
minkowsk_dis = []
for i,j in zip(q1_csc, q2_csc):
sim = cs(i, j)
cosine_sim.append(sim[0][0])
sim = md(i, j)
manhattan_dis.append(sim[0][0])
sim = ed(i, j)
eucledian_dis.append(sim[0][0])
i_ = i.toarray()
j_ = j.toarray()
try:
sim = jsc(i_, j_)
jaccard_dis.append(sim)
except:
jaccard_dis.append(0)
sim = minkowski_dis.pairwise(i_, j_)
minkowsk_dis.append(sim[0][0])
return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis
示例9: _compute_sim
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def _compute_sim(self, R, k):
# compute the similarity between all the items. This calculates the
# similarity between each ITEM
sim = cosine_similarity(R.T)
# Only keep the similarities of the top K, setting all others to zero
# (negative since we want descending)
not_top_k = np.argsort(-sim, axis=1)[:, k:] # shape=(n_items, k)
if not_top_k.shape[1]: # only if there are cols (k < n_items)
# now we have to set these to zero in the similarity matrix
row_indices = np.repeat(range(not_top_k.shape[0]),
not_top_k.shape[1])
sim[row_indices, not_top_k.ravel()] = 0.
return sim
开发者ID:PacktPublishing,项目名称:Hands-on-Supervised-Machine-Learning-with-Python,代码行数:18,代码来源:itemitem.py
示例10: decision_function
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def decision_function(self, X):
"""Evaluate the cosine similarity between document-term matrix and X.
Parameters
----------
X : array-like, shape (n_samples, n_timestamps)
Test samples.
Returns
-------
X : array-like, shape (n_samples, n_classes)
osine similarity between the document-term matrix and X.
"""
check_is_fitted(self, ['vocabulary_', 'tfidf_', 'idf_',
'_tfidf', 'classes_'])
X = check_array(X)
X_bow = self._bow.transform(X)
vectorizer = CountVectorizer(vocabulary=self._tfidf.vocabulary_)
X_transformed = vectorizer.transform(X_bow).toarray()
return cosine_similarity(X_transformed, self.tfidf_)
示例11: test_init
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def test_init():
default = Spanning_Forest()
assert default.metric == skm.manhattan_distances
assert default.center == np.mean
assert default.reduction == np.sum
change = Spanning_Forest(dissimilarity=skm.euclidean_distances,
center=np.median, reduction=np.max)
assert change.metric == skm.euclidean_distances
assert change.center == np.median
assert change.reduction == np.max
sym = Spanning_Forest(affinity=skm.cosine_similarity)
assert isinstance(sym.metric, types.LambdaType)
test_distance = -np.log(skm.cosine_similarity(data[:2,]))
comparator = sym.metric(data[:2,])
np.testing.assert_allclose(test_distance, comparator)
示例12: save_model
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def save_model(model: Model, tokenizer: Tokenizer):
"""
Saves the important parts of the model
:param model: Keras model to save
:param tokenizer: Keras Tokenizer to save
"""
for layer in model.layers:
if '_biases' in layer.name or '_embeddings' in layer.name:
np.save(file=f'{OUTPUT_FOLDER}{layer.name}', arr=layer.get_weights()[0])
# save tokenizer
pickle.dump(obj=tokenizer.index_word, file=open(f'{OUTPUT_FOLDER}{INDEX2WORD}', 'wb'))
pickle.dump(obj=tokenizer.word_index, file=open(f'{OUTPUT_FOLDER}{WORD2INDEX}', 'wb'))
# save combined embeddings & correlation matrix
agg_embeddings = np.load(f'{OUTPUT_FOLDER}{CENTRAL_EMBEDDINGS}.npy') + \
np.load(f'{OUTPUT_FOLDER}{CONTEXT_EMBEDDINGS}.npy')
np.save(file=f'{OUTPUT_FOLDER}{AGGREGATED_EMBEDDINGS}', arr=agg_embeddings)
np.save(file=f'{OUTPUT_FOLDER}{CORRELATION_MATRIX}', arr=cosine_similarity(cosine_similarity(agg_embeddings)))
示例13: evaluate
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def evaluate(self, category_projection):
assert issubclass(type(category_projection), CategoryProjectionBase)
topics = category_projection.get_nearest_terms()
total_similarity = 0
for topic in topics.values():
topic_vectors = np.array([self.get_vector(term) for term in topic])
#simport pdb; pdb.set_trace()
sim_matrix = cosine_similarity(topic_vectors)
tril_sim_matrix = np.tril(sim_matrix)
mean_similarity = tril_sim_matrix.sum()/(tril_sim_matrix.shape[0] ** 2 - tril_sim_matrix.shape[0]) / 2
total_similarity += mean_similarity
return total_similarity/len(topics)
示例14: process
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def process(self,data):
claim_bow = self.bow_vectorizer.transform(self.claims(data))
claim_tfs = self.tfreq_vectorizer.transform(claim_bow)
claim_tfidf = self.tfidf_vectorizer.transform(self.claims(data))
body_texts = self.texts(data)
body_bow = self.bow_vectorizer.transform(body_texts)
body_tfs = self.tfreq_vectorizer.transform(body_bow)
body_tfidf = self.tfidf_vectorizer.transform(body_texts)
cosines = np.array([cosine_similarity(c, b)[0] for c,b in zip(claim_tfidf,body_tfidf)])
return hstack([body_tfs,claim_tfs,cosines])
示例15: process
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_similarity [as 别名]
def process(self, data):
claim_bow = self.bow_vectorizer.transform(self.claims(data))
claim_tfs = self.tfreq_vectorizer.transform(claim_bow)
claim_tfidf = self.tfidf_vectorizer.transform(self.claims(data))
body_texts = self.texts(data)
body_bow = self.bow_vectorizer.transform(body_texts)
body_tfs = self.tfreq_vectorizer.transform(body_bow)
body_tfidf = self.tfidf_vectorizer.transform(body_texts)
cosines = np.array([cosine_similarity(c, b)[0] for c, b in zip(claim_tfidf, body_tfidf)])
return cosines