本文整理汇总了Python中sklearn.metrics.pairwise.cosine_distances方法的典型用法代码示例。如果您正苦于以下问题:Python pairwise.cosine_distances方法的具体用法?Python pairwise.cosine_distances怎么用?Python pairwise.cosine_distances使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.metrics.pairwise
的用法示例。
在下文中一共展示了pairwise.cosine_distances方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_tsne_with_different_distance_metrics
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def test_tsne_with_different_distance_metrics():
"""Make sure that TSNE works for different distance metrics"""
random_state = check_random_state(0)
n_components_original = 3
n_components_embedding = 2
X = random_state.randn(50, n_components_original).astype(np.float32)
metrics = ['manhattan', 'cosine']
dist_funcs = [manhattan_distances, cosine_distances]
for metric, dist_func in zip(metrics, dist_funcs):
X_transformed_tsne = TSNE(
metric=metric, n_components=n_components_embedding,
random_state=0).fit_transform(X)
X_transformed_tsne_precomputed = TSNE(
metric='precomputed', n_components=n_components_embedding,
random_state=0).fit_transform(dist_func(X))
assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
示例2: construct_H_with_KNN
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def construct_H_with_KNN(X, K_neigs=[10], is_probH=False, m_prob=1):
"""
init multi-scale hypergraph Vertex-Edge matrix from original node feature matrix
:param X: N_object x feature_number
:param K_neigs: the number of neighbor expansion
:param is_probH: prob Vertex-Edge matrix or binary
:param m_prob: prob
:return: N_object x N_hyperedge
"""
if len(X.shape) != 2:
X = X.reshape(-1, X.shape[-1])
if type(K_neigs) == int:
K_neigs = [K_neigs]
dis_mat = cos_dis(X)
H = None
for k_neig in K_neigs:
H_tmp = construct_H_with_KNN_from_distance(dis_mat, k_neig, is_probH, m_prob)
H = hyperedge_concat(H, H_tmp)
return H
示例3: testCosineDistancesExecution
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def testCosineDistancesExecution(self):
raw_dense_x = np.random.rand(25, 10)
raw_dense_y = np.random.rand(17, 10)
raw_sparse_x = sps.random(25, 10, density=0.5, format='csr', random_state=0)
raw_sparse_y = sps.random(17, 10, density=0.4, format='csr', random_state=1)
for raw_x, raw_y in [
(raw_dense_x, raw_dense_y),
(raw_sparse_x, raw_sparse_y)
]:
for chunk_size in (25, 6):
x = mt.tensor(raw_x, chunk_size=chunk_size)
y = mt.tensor(raw_y, chunk_size=chunk_size)
d = cosine_distances(x, y)
result = self.executor.execute_tensor(d, concat=True)[0]
expected = sk_cosine_distances(raw_x, raw_y)
np.testing.assert_almost_equal(np.asarray(result), expected)
d = cosine_distances(x)
result = self.executor.execute_tensor(d, concat=True)[0]
expected = sk_cosine_distances(raw_x)
np.testing.assert_almost_equal(np.asarray(result), expected)
示例4: vec_cos_dist
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def vec_cos_dist(token_input, operation_input):
operation_string = None
ref_vector_string = None
cond_value_string = None
for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']:
if opr_sign in operation_input:
ref_vector_string = operation_input.split(opr_sign)[0]
operation_string = opr_sign
cond_value_string = operation_input.split(opr_sign)[1]
break
if ref_vector_string and cond_value_string and operation_string:
try:
cond_value = float(cond_value_string)
ref_vector = change_string_to_vector(ref_vector_string)
token_vector = change_string_to_vector(token_input)
if len(ref_vector) != len(token_vector):
print ('len of vectors does not match')
return False
if operation_string == "=" or operation_string == "==":
return cosine_distances(token_vector, ref_vector) == cond_value
elif operation_string == "<":
return cosine_distances(token_vector, ref_vector) < cond_value
elif operation_string == ">":
return cosine_distances(token_vector, ref_vector) > cond_value
elif operation_string == ">=":
return cosine_distances(token_vector, ref_vector) >= cond_value
elif operation_string == "<=":
return cosine_distances(token_vector, ref_vector) <= cond_value
elif operation_string == "!=" or operation_string == "<>":
return cosine_distances(token_vector, ref_vector) != cond_value
else:
return False
except ValueError:
# TODO raise tokenregex error
return False
else:
# TODO raise tokenregex error
print ('Problem with the operation input')
示例5: test_cosine_distances
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def test_cosine_distances():
# Check the pairwise Cosine distances computation
rng = np.random.RandomState(1337)
x = np.abs(rng.rand(910))
XA = np.vstack([x, x])
D = cosine_distances(XA)
assert_array_almost_equal(D, [[0., 0.], [0., 0.]])
# check that all elements are in [0, 2]
assert np.all(D >= 0.)
assert np.all(D <= 2.)
# check that diagonal elements are equal to 0
assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.])
XB = np.vstack([x, -x])
D2 = cosine_distances(XB)
# check that all elements are in [0, 2]
assert np.all(D2 >= 0.)
assert np.all(D2 <= 2.)
# check that diagonal elements are equal to 0 and non diagonal to 2
assert_array_almost_equal(D2, [[0., 2.], [2., 0.]])
# check large random matrix
X = np.abs(rng.rand(1000, 5000))
D = cosine_distances(X)
# check that diagonal elements are equal to 0
assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0])
assert np.all(D >= 0.)
assert np.all(D <= 2.)
示例6: get_cosine_dist
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def get_cosine_dist(all_functions):
return_dict = {}
vect, func_sparse = funcs_to_sparse(all_functions)
transformer = Normalizer().fit(func_sparse)
func_sparse = transformer.transform(func_sparse)
return cosine_distances(func_sparse, func_sparse)
示例7: cosine_distance
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def cosine_distance(v1, v2):
#As cosine similarity interval is [-1.0, 1.0], the cosine distance interval is [0.0, 2.0].
#This normalizes the cosine distance to interval [0.0, 1.0]
return pairwise.cosine_distances(v1, v2) / 2.0
#For ranks index starting from 0
示例8: _construct_edge_list_from_distance
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def _construct_edge_list_from_distance(X, k_neigh):
"""
construct edge_list (numpy array) from kNN distance for single modality
:param X -> numpy array: feature
:param k_neigh -> int: # of neighbors
:return: N * k_neigh numpy array
"""
dis = cos_dis(X)
dis = torch.Tensor(dis)
_, k_idx = dis.topk(k_neigh, dim=-1, largest=False)
return k_idx.numpy()
示例9: _distance
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def _distance(x1, x2):
return cosine_distances(x1, x2)
示例10: kNN_classify
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def kNN_classify(*, x, y):
"""
return the index of y that is closest to each x
:param x: n*d matrix
:param y: m*d matrix
:return: n-dim vector
"""
ds = cosine_distances(x, y)
idx = y[np.argmin(ds, axis=1)]
return idx
示例11: main
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--nearest', type=int, default=10)
parser.add_argument('tables')
parser.add_argument('questions', type=argparse.FileType('r', encoding='UTF-8'))
args = parser.parse_args()
explanations = []
for path, _, files in os.walk(args.tables):
for file in files:
explanations += read_explanations(os.path.join(path, file))
if not explanations:
warnings.warn('Empty explanations')
df_q = pd.read_csv(args.questions, sep='\t', dtype=str)
df_e = pd.DataFrame(explanations, columns=('uid', 'text'))
vectorizer = TfidfVectorizer().fit(df_q['Question']).fit(df_e['text'])
X_q = vectorizer.transform(df_q['Question'])
X_e = vectorizer.transform(df_e['text'])
X_dist = cosine_distances(X_q, X_e)
for i_question, distances in enumerate(X_dist):
for i_explanation in np.argsort(distances)[:args.nearest]:
print('{}\t{}'.format(df_q.loc[i_question]['questionID'], df_e.loc[i_explanation]['uid']))
示例12: cosine_distances_xy
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def cosine_distances_xy(x, y, to_similar=False):
"""
余弦距离计算两个序列distance,注意需要理解数据的测距目的来分析
是否需要进行scale_start,进行和不进行scale_start的结果将完全不一样,在功能需求及数据理解的情况下
选择是否进行scale_start
:param x: 可迭代序列
:param y: 可迭代序列
:param to_similar: 是否进行后置输出转换similar值
:return: float数值
"""
distance = _distance_xy(cosine_distances, x, y)
if to_similar:
# 余弦距离转换余弦相似度直接减就行
distance = 1.0 - distance
return distance
示例13: test_cosine_distances
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def test_cosine_distances():
# Check the pairwise Cosine distances computation
rng = np.random.RandomState(1337)
x = np.abs(rng.rand(910))
XA = np.vstack([x, x])
D = cosine_distances(XA)
assert_array_almost_equal(D, [[0., 0.], [0., 0.]])
# check that all elements are in [0, 2]
assert_true(np.all(D >= 0.))
assert_true(np.all(D <= 2.))
# check that diagonal elements are equal to 0
assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.])
XB = np.vstack([x, -x])
D2 = cosine_distances(XB)
# check that all elements are in [0, 2]
assert_true(np.all(D2 >= 0.))
assert_true(np.all(D2 <= 2.))
# check that diagonal elements are equal to 0 and non diagonal to 2
assert_array_almost_equal(D2, [[0., 2.], [2., 0.]])
# check large random matrix
X = np.abs(rng.rand(1000, 5000))
D = cosine_distances(X)
# check that diagonal elements are equal to 0
assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0])
assert_true(np.all(D >= 0.))
assert_true(np.all(D <= 2.))
# Paired distances
示例14: produce_category_focused_pairplot
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def produce_category_focused_pairplot(corpus,
category,
category_projector=CategoryProjector(projector=TruncatedSVD(20)),
category_projection=None,
**kwargs):
'''
Produces a pair-plot which is focused on a single category.
:param corpus: TermDocMatrix
:param category: str, name of a category in the corpus
:param category_projector: CategoryProjector, a factor analysis of the category/feature vector
:param category_projection: CategoryProjection, None by default. If present, overrides category projector
:param kwargs: remaining kwargs for produce_pairplot
:return: str, HTML
'''
category_num = corpus.get_categories().index(category)
uncorrelated_components_projection = (category_projector.project(corpus)
if category_projection is None
else category_projection)
distances = cosine_distances(uncorrelated_components_projection.get_category_embeddings().T)
similarity_to_category_scores = -2 * (rankdata(distances[category_num]) - 0.5)
uncorrelated_components = uncorrelated_components_projection.get_projection()
least_correlated_dimension = min([(np.abs(pearsonr(similarity_to_category_scores,
uncorrelated_components.T[i])[0]), i)]
for i in range(uncorrelated_components.shape[1]))[0][1]
projection_to_plot = np.array([uncorrelated_components.T[least_correlated_dimension],
similarity_to_category_scores]).T
return produce_pairplot(
corpus,
initial_category=category,
category_projection=uncorrelated_components_projection.use_alternate_projection(projection_to_plot),
category_focused=True,
**kwargs
)
示例15: latent_dirichlet_allocation
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import cosine_distances [as 别名]
def latent_dirichlet_allocation(headlines, bodies):
# https://pypi.python.org/pypi/lda on bottom see suggestions like MALLET, hca
# https://medium.com/@aneesha/topic-modeling-with-scikit-learn-e80d33668730
# https://www.quora.com/What-are-the-best-features-to-put-into-Latent-Dirichlet-Allocation-LDA-for-topic-modeling-of-short-text
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic #%d:" % topic_idx)
print(", ".join([feature_names[i]
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print()
def combine_head_and_body(headlines, bodies):
head_and_body = [headline + " " + body for i, (headline, body) in
enumerate(zip(headlines, bodies))]
return head_and_body
def get_features(vocab):
vectorizer_head = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
X_train_head = vectorizer_head.fit_transform(headlines)
vectorizer_body = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
X_train_body = vectorizer_body.fit_transform(bodies)
# calculates n most important topics of the bodies. Each topic contains all words but ordered by importance. The
# more important topic words a body contains of a certain topic, the higher its value for this topic
lda_body = LatentDirichletAllocation(n_topics=25, learning_method='online', random_state=0, n_jobs=3)
print("latent_dirichlet_allocation: fit and transform body")
t0 = time()
lda_body_matrix = lda_body.fit_transform(X_train_body)
print("done in %0.3fs." % (time() - t0))
print("latent_dirichlet_allocation: transform head")
# use the lda trained for body topcis on the headlines => if the headlines and bodies share topics
# their vectors should be similar
lda_head_matrix = lda_body.transform(X_train_head)
#print_top_words(lda_body, vectorizer_body.get_feature_names(), 100)
print('latent_dirichlet_allocation: calculating cosine distance between head and body')
# calculate cosine distance between the body and head
X = []
for i in range(len(lda_head_matrix)):
X_head_vector = np.array(lda_head_matrix[i]).reshape((1, -1)) #1d array is deprecated
X_body_vector = np.array(lda_body_matrix[i]).reshape((1, -1))
cos_dist = cosine_distances(X_head_vector, X_body_vector).flatten()
X.append(cos_dist.tolist())
return X
vocab = create_word_ngram_vocabulary(ngram_range=(1, 1), max_features=5000, lemmatize=False, term_freq=True,
norm='l2')
X = get_features(vocab)
return X