本文整理汇总了Python中scipy.spatial.distance.cosine方法的典型用法代码示例。如果您正苦于以下问题:Python distance.cosine方法的具体用法?Python distance.cosine怎么用?Python distance.cosine使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.spatial.distance
的用法示例。
在下文中一共展示了distance.cosine方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_batch_cos_similarities
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def get_batch_cos_similarities(self, shorttext):
""" Calculate the score, which is the cosine similarity with the topic vector of the model,
of the short text against each class labels.
If neither :func:`~train` nor :func:`~loadmodel` was run, it will raise `ModelNotTrainedException`.
:param shorttext: short text
:return: dictionary of scores of the text to all classes
:raise: ModelNotTrainedException
:type shorttext: str
:rtype: dict
"""
if not self.trained:
raise ModelNotTrainedException()
simdict = {}
for label in self.classtopicvecs:
simdict[label] = 1 - cosine(self.classtopicvecs[label], self.retrieve_topicvec(shorttext))
return simdict
示例2: get_embedding_similarities
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def get_embedding_similarities(embed, embed2 = None, sim_measure = "euclidean", num_top = None):
n_nodes, dim = embed.shape
if embed2 is None:
embed2 = embed
if num_top is not None: #KD tree with only top similarities computed
kd_sim = kd_align(embed, embed2, distance_metric = sim_measure, num_top = num_top)
return kd_sim
#All pairwise distance computation
if sim_measure == "cosine":
similarity_matrix = sklearn.metrics.pairwise.cosine_similarity(embed, embed2)
else:
similarity_matrix = sklearn.metrics.pairwise.euclidean_distances(embed, embed2)
similarity_matrix = np.exp(-similarity_matrix)
return similarity_matrix
#Split embeddings in half (TODO generalize to different numbers and sizes of networks)
示例3: test_cosine_similarity
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def test_cosine_similarity():
# Test the cosine_similarity.
rng = np.random.RandomState(0)
X = rng.random_sample((5, 4))
Y = rng.random_sample((3, 4))
Xcsr = csr_matrix(X)
Ycsr = csr_matrix(Y)
for X_, Y_ in ((X, None), (X, Y),
(Xcsr, None), (Xcsr, Ycsr)):
# Test that the cosine is kernel is equal to a linear kernel when data
# has been previously normalized by L2-norm.
K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
X_ = normalize(X_)
if Y_ is not None:
Y_ = normalize(Y_)
K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
assert_array_almost_equal(K1, K2)
示例4: run_synonym_finding
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def run_synonym_finding(self, embs, data):
result = defaultdict(lambda: {})
for word, suspicious_words in data.items():
distances = []
for susp_word, _ in suspicious_words:
distances.append(1 - distance.cosine(embs.get_vector(susp_word), embs.get_vector(word)))
guessed_word_index = distances.index(np.min(distances))
results_for_word = []
for dist_id, cosine_distance in enumerate(distances):
d = {}
d['suspicious_word'] = suspicious_words[dist_id][0]
d['is_synonym'] = suspicious_words[dist_id][1]
if dist_id == guessed_word_index:
d['hit'] = True
else:
d['hit'] = False
d['distance'] = cosine_distance
results_for_word.append(d)
result[word] = results_for_word
return dict(result)
示例5: compare_words
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def compare_words(self, values):
result = defaultdict(lambda: {})
distances_to_other_words = defaultdict(lambda: [])
for word, compared_word in product(values['words'], repeat=2):
if word == compared_word:
continue
distance_between_words = round(1 - distance.cosine(word[1], compared_word[1]), 2)
distances_to_other_words[word[0]].append([compared_word[0], distance_between_words])
for word_id, key in enumerate(distances_to_other_words.keys()):
result_dict = {}
result_dict['distances'] = distances_to_other_words[key]
result_dict['is_outlier'] = values['is_outlier'][word_id]
average = self.compute_average(distances_to_other_words[key])
result_dict['average'] = round(average, 2)
if average <= self.threshold:
result_dict['hit'] = False
else:
result_dict['hit'] = True
result[key] = result_dict
return dict(result)
示例6: single_query
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def single_query(self, query_id, query_feat, gallery_embeds, query_idx):
query_dist = []
for j, feat in enumerate(gallery_embeds):
cosine_dist = cosine(
feat.reshape(1, -1), query_feat.reshape(1, -1))
query_dist.append(cosine_dist)
query_dist = np.array(query_dist)
order = np.argsort(query_dist)
single_recall = dict()
print(self.query_id2idx[query_id])
for k in self.topks:
retrieved_idxes = order[:k]
tp = 0
relevant_num = len(self.gallery_id2idx[query_id])
for idx in retrieved_idxes:
retrieved_id = self.gallery_dict[idx]
if query_id == retrieved_id:
tp += 1
single_recall[k] = float(tp) / relevant_num
return single_recall
示例7: show_retrieved_images
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def show_retrieved_images(self, query_feat, gallery_embeds):
query_dist = []
for i, feat in enumerate(gallery_embeds):
cosine_dist = cosine(
feat.reshape(1, -1), query_feat.reshape(1, -1))
query_dist.append(cosine_dist)
query_dist = np.array(query_dist)
order = np.argsort(query_dist)
for k in self.topks:
retrieved_idxes = order[:k]
for idx in retrieved_idxes:
retrieved_id = self.gallery_dict[idx]
print('retrieved id', retrieved_id)
示例8: get_best_label
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def get_best_label(label_list,num):
topic_ls = get_topic_lg(topic_list[num])
val_dict = {}
for item in label_list:
trigrams = [item[i:i+3] for i in range(0, len(item) - 2)] #Extracting letter trigram for label
label_cnt = Counter(trigrams)
total = sum(label_cnt.values(), 0.0)
for key in label_cnt:
label_cnt[key] /= total
tot_keys = list(set(topic_ls.keys() + label_cnt.keys()))
listtopic = []
listlabel = []
for elem in tot_keys:
if elem in topic_ls:
listtopic.append(topic_ls[elem])
else:
listtopic.append(0.0)
if elem in label_cnt:
listlabel.append(label_cnt[elem])
else:
listlabel.append(0.0)
val = 1 - cosine(np.array(listtopic),np.array(listlabel)) # Cosine Similarity
val_dict[item] = val
list_sorted=sorted(val_dict.items(), key=lambda x:x[1], reverse = True) # Sorting the labels by rank
return [i[0] for i in list_sorted[:int(args.num_unsup_labels)]]
示例9: pw_score_cosine
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def pw_score_cosine(self, s1 : ClassId, s2 : ClassId) -> SimScore:
"""
Cosine similarity of two subjects
Arguments
---------
s1 : str
class id
Return
------
number
A number between 0 and 1
"""
df = self.assoc_df
slice1 = df.loc[s1].values
slice2 = df.loc[s2].values
return 1 - cosine(slice1, slice2)
示例10: score_cosine
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def score_cosine(self, term1, term2, **kwargs):
"""
Compute a weighting score based on the cosine distance between the
kernel density estimates of two terms.
Args:
term1 (str)
term2 (str)
Returns: float
"""
t1_kde = self.kde(term1, **kwargs)
t2_kde = self.kde(term2, **kwargs)
return 1-distance.cosine(t1_kde, t2_kde)
示例11: test_compute_distance_matrix_loo_cosine
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def test_compute_distance_matrix_loo_cosine(self):
if not tf.executing_eagerly():
self.skipTest("Test requires eager mode.")
np.random.seed(seed=self.random_seed)
x_train = np.random.rand(self.train_samples, self.dim)
d = utils.compute_distance_matrix_loo(x_train, measure="cosine")
self.assertEqual(d.shape, (self.train_samples, self.train_samples))
for i in range(self.train_samples):
for j in range(self.train_samples):
if i == j:
self.assertEqual(float("inf"), d[i, j])
else:
d_ij = spdist.cosine(x_train[i, :], x_train[j, :])
self.assertAlmostEqual(d_ij, d[i, j], places=5)
示例12: is_word_embed_match
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def is_word_embed_match(self, mention_x: MentionDataLight, mention_y: MentionDataLight):
"""
Check if input mentions Word Embedding cosine distance below above 0.65
Args:
mention_x: MentionDataLight
mention_y: MentionDataLight
Returns:
bool
"""
match_result = False
x_embed = self.embedding.get_head_feature_vector(mention_x)
y_embed = self.embedding.get_head_feature_vector(mention_y)
# make sure words are not 'unk/None/0'
if x_embed is not None and y_embed is not None:
dist = cos(x_embed, y_embed)
if not math.isnan(dist):
sim = 1 - dist
if sim >= self.accepted_dist:
match_result = True
return match_result
示例13: update
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def update(self, feature_vec):
if len(self.clusters) < self.feature_len:
self.clusters.append(feature_vec)
self.clusters_sizes.append(1)
elif sum(self.clusters_sizes) < 2*self.feature_len:
idx = random.randint(0, self.feature_len - 1)
self.clusters_sizes[idx] += 1
self.clusters[idx] += (feature_vec - self.clusters[idx]) / \
self.clusters_sizes[idx]
else:
distances = cdist(feature_vec.reshape(1, -1),
np.array(self.clusters).reshape(len(self.clusters), -1), 'cosine')
nearest_idx = np.argmin(distances)
self.clusters_sizes[nearest_idx] += 1
self.clusters[nearest_idx] += (feature_vec - self.clusters[nearest_idx]) / \
self.clusters_sizes[nearest_idx]
示例14: _compute_mct_distance_matrix
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def _compute_mct_distance_matrix(self, all_tracks):
distance_matrix = THE_BIGGEST_DISTANCE * np.eye(len(all_tracks), dtype=np.float32)
for i, track1 in enumerate(all_tracks):
for j, track2 in enumerate(all_tracks):
if j >= i:
break
if track1.id != track2.id and track1.cam_id != track2.cam_id and \
len(track1) > self.time_window and len(track2) > self.time_window and \
track1.f_avg.is_valid() and track2.f_avg.is_valid():
if not track1.f_orient.is_valid():
f_complex_dist = clusters_distance(track1.f_clust, track2.f_clust)
else:
f_complex_dist = track1.f_orient.dist_to_other(track2.f_orient)
f_avg_dist = 0.5 * cosine(track1.f_avg.get(), track2.f_avg.get())
distance_matrix[i, j] = min(f_avg_dist, f_complex_dist)
else:
distance_matrix[i, j] = THE_BIGGEST_DISTANCE
return distance_matrix + np.transpose(distance_matrix)
示例15: test_transform
# 需要导入模块: from scipy.spatial import distance [as 别名]
# 或者: from scipy.spatial.distance import cosine [as 别名]
def test_transform(self):
""" Test that training the model brings the document vector
closer to the vectors for words in the sentence"""
model = Document2Vec(w2v_file)
model.workers = 1
corpus = _generate_corpus(model)
# vectors = model.fit_transform(corpus)
# Get the first word in the corpus
vectors = model.transform(corpus)
word = next(corpus.__iter__()).words[0]
sent0_vector = vectors[0, :]
sim = cosine(sent0_vector, model[word])
self.assertGreater(sim, 0.15)