本文整理汇总了Python中sklearn.metrics.pairwise.manhattan_distances方法的典型用法代码示例。如果您正苦于以下问题:Python pairwise.manhattan_distances方法的具体用法?Python pairwise.manhattan_distances怎么用?Python pairwise.manhattan_distances使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.metrics.pairwise
的用法示例。
在下文中一共展示了pairwise.manhattan_distances方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_tsne_with_different_distance_metrics
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def test_tsne_with_different_distance_metrics():
"""Make sure that TSNE works for different distance metrics"""
random_state = check_random_state(0)
n_components_original = 3
n_components_embedding = 2
X = random_state.randn(50, n_components_original).astype(np.float32)
metrics = ['manhattan', 'cosine']
dist_funcs = [manhattan_distances, cosine_distances]
for metric, dist_func in zip(metrics, dist_funcs):
X_transformed_tsne = TSNE(
metric=metric, n_components=n_components_embedding,
random_state=0).fit_transform(X)
X_transformed_tsne_precomputed = TSNE(
metric='precomputed', n_components=n_components_embedding,
random_state=0).fit_transform(dist_func(X))
assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
示例2: _get_similarity_values
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def _get_similarity_values(self, q1_csc, q2_csc):
cosine_sim = []
manhattan_dis = []
eucledian_dis = []
jaccard_dis = []
minkowsk_dis = []
for i,j in zip(q1_csc, q2_csc):
sim = cs(i, j)
cosine_sim.append(sim[0][0])
sim = md(i, j)
manhattan_dis.append(sim[0][0])
sim = ed(i, j)
eucledian_dis.append(sim[0][0])
i_ = i.toarray()
j_ = j.toarray()
try:
sim = jsc(i_, j_)
jaccard_dis.append(sim)
except:
jaccard_dis.append(0)
sim = minkowski_dis.pairwise(i_, j_)
minkowsk_dis.append(sim[0][0])
return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis
示例3: test_init
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def test_init():
default = Spanning_Forest()
assert default.metric == skm.manhattan_distances
assert default.center == np.mean
assert default.reduction == np.sum
change = Spanning_Forest(dissimilarity=skm.euclidean_distances,
center=np.median, reduction=np.max)
assert change.metric == skm.euclidean_distances
assert change.center == np.median
assert change.reduction == np.max
sym = Spanning_Forest(affinity=skm.cosine_similarity)
assert isinstance(sym.metric, types.LambdaType)
test_distance = -np.log(skm.cosine_similarity(data[:2,]))
comparator = sym.metric(data[:2,])
np.testing.assert_allclose(test_distance, comparator)
示例4: recall_at_kappa_leave_one_out
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def recall_at_kappa_leave_one_out(test_emb, test_id, kappa, dist):
unique_ids, unique_counts = np.unique(test_id,return_counts=True)
unique_ids = unique_ids[unique_counts >= 2]
good_test_indices = np.in1d(test_id,unique_ids)
valid_test_embs = test_emb[good_test_indices]
valid_test_ids = test_id[good_test_indices]
n_correct_at_k = np.zeros(kappa)
if dist == 'cos':
distances = find_cos_distances(valid_test_embs,test_emb)
elif dist == 'l2':
distances = find_l2_distances(valid_test_embs, test_emb)
elif dist == 'l1':
distances = manhattan_distances(valid_test_embs, test_emb)
elif dist == 'max_l1' or dist == 'max_l2':
distances = max_distances(valid_test_embs, test_emb, dist)
for idx, valid_test_id in enumerate(valid_test_ids):
k_sorted_indices = np.argsort(distances[idx])[1:]
first_correct_position = np.where(test_id[k_sorted_indices] == valid_test_id)[0][0]
if first_correct_position < kappa:
n_correct_at_k[first_correct_position:] += 1
return 1.*n_correct_at_k / len(valid_test_ids)
示例5: recall_at_kappa_support_query
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def recall_at_kappa_support_query(x_support, y_support, x_query, y_query, kappa, dist):
n_correct_at_k = np.zeros(kappa)
if dist == 'cos':
distances = find_cos_distances(x_query, x_support)
elif dist == 'l2':
distances = find_l2_distances(x_query, x_support)
elif dist == 'l1':
distances = manhattan_distances(x_query, x_support)
elif dist == 'max_l1' or dist == 'max_l2':
distances = max_distances(x_query, x_support, dist)
for idx, valid_test_id in enumerate(y_query):
k_sorted_indices = np.argsort(distances[idx])
first_correct_position = np.where(y_support[k_sorted_indices] == valid_test_id)[0][0]
if first_correct_position < kappa:
n_correct_at_k[first_correct_position:] += 1
return 1.*n_correct_at_k / len(y_query)
示例6: execute
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def execute(cls, ctx, op):
(x, y), device_id, xp = as_same_device(
[ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True)
out = op.outputs[0]
with device(device_id):
if sklearn_manhattan_distances is not None:
ctx[out.key] = sklearn_manhattan_distances(
x, y, sum_over_features=op.sum_over_features)
else: # pragma: no cover
# we cannot support sparse
raise NotImplementedError('cannot support calculate manhattan '
'distances on GPU')
示例7: vec_man_dist
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def vec_man_dist(token_input, operation_input):
operation_string = None
ref_vector_string = None
cond_value_string = None
for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']:
if opr_sign in operation_input:
ref_vector_string = operation_input.split(opr_sign)[0]
operation_string = opr_sign
cond_value_string = operation_input.split(opr_sign)[1]
break
if ref_vector_string and cond_value_string and operation_string:
try:
cond_value = float(cond_value_string)
ref_vector = change_string_to_vector(ref_vector_string)
token_vector = change_string_to_vector(token_input)
print(manhattan_distances(token_vector, ref_vector))
if len(ref_vector) != len(token_vector):
print ('len of vectors does not match')
return False
if operation_string == "=" or operation_string == "==":
return manhattan_distances(token_vector, ref_vector) == cond_value
elif operation_string == "<":
return manhattan_distances(token_vector, ref_vector) < cond_value
elif operation_string == ">":
return manhattan_distances(token_vector, ref_vector) > cond_value
elif operation_string == ">=":
return manhattan_distances(token_vector, ref_vector) >= cond_value
elif operation_string == "<=":
return manhattan_distances(token_vector, ref_vector) <= cond_value
elif operation_string == "!=" or operation_string == "<>":
return manhattan_distances(token_vector, ref_vector) != cond_value
else:
return False
except ValueError:
# TODO raise tokenregex error
return False
else:
# TODO raise tokenregex error
print ('Problem with the operation input')
示例8: similarity
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def similarity(self, query, type):
assert self.corpus != None, "self.corpus can't be None"
ret = []
if type == 'cosine':
query = self.get_vector(query)
for item in self.corpus_vec:
sim = cosine_similarity(item, query)
ret.append(sim[0][0])
elif type == 'manhattan':
query = self.get_vector(query)
for item in self.corpus_vec:
sim = manhattan_distances(item, query)
ret.append(sim[0][0])
elif type == 'euclidean':
query = self.get_vector(query)
for item in self.corpus_vec:
sim = euclidean_distances (item, query)
ret.append(sim[0][0])
#elif type == 'jaccard':
# #query = query.split()
# query = self.get_vector(query)
# for item in self.corpus_vec:
# pdb.set_trace()
# sim = jaccard_similarity_score(item, query)
# ret.append(sim)
elif type == 'bm25':
query = query.split()
ret = self.bm25_model.get_scores(query)
else:
raise ValueError('similarity type error:%s'%type)
return ret
示例9: manhattan_distances_xy
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def manhattan_distances_xy(x, y, to_similar=False):
"""
曼哈顿距离(L1范数)计算两个序列distance,注意需要理解数据的测距目的来分析
是否需要进行scale_start,进行和不进行scale_start的结果将完全不一样,在功能需求及数据理解的情况下
选择是否进行scale_start
:param x: 可迭代序列
:param y: 可迭代序列
:param to_similar: 是否进行后置输出转换similar值
:return: float数值
"""
distance = _distance_xy(manhattan_distances, x, y)
if to_similar:
# 实际上l1和l2转换similar的值不直观,只能对比使用
distance = 1.0 / (1.0 + distance)
return distance
示例10: __init__
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def __init__(self,
dissimilarity=skm.manhattan_distances,
affinity=None,
reduction=np.sum,
center=np.mean):
"""
Initialize the SKATER algorithm.
dissimilarity : a callable distance metric
affinity : an callable affinity metric between 0,1.
Will be inverted to provide a
dissimilarity metric.
reduction: the reduction applied over all clusters
to provide the map score.
center: way to compute the center of each region in attribute space
NOTE: Optimization occurs with respect to a *dissimilarity* metric, so the reduction should
yield some kind of score where larger values are *less desirable* than smaller values.
Typically, this means we use addition.
"""
if affinity is not None:
# invert the 0,1 affinity to
# to an unbounded positive dissimilarity
metric = lambda x: -np.log(affinity(x))
else:
metric = dissimilarity
self.metric = metric
self.reduction = reduction
self.center = center
示例11: testManhattanDistancesExecution
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def testManhattanDistancesExecution(self):
raw_x = np.random.rand(20, 5)
raw_y = np.random.rand(21, 5)
x1 = mt.tensor(raw_x, chunk_size=30)
y1 = mt.tensor(raw_y, chunk_size=30)
x2 = mt.tensor(raw_x, chunk_size=11)
y2 = mt.tensor(raw_y, chunk_size=12)
raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0)
raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0)
x3 = mt.tensor(raw_sparse_x, chunk_size=30)
y3 = mt.tensor(raw_sparse_y, chunk_size=30)
x4 = mt.tensor(raw_sparse_x, chunk_size=11)
y4 = mt.tensor(raw_sparse_y, chunk_size=12)
for x, y, is_sparse in [(x1, y1, False),
(x2, y2, False),
(x3, y3, True),
(x4, y4, True)]:
if is_sparse:
rx, ry = raw_sparse_x, raw_sparse_y
else:
rx, ry = raw_x, raw_y
sv = [True, False] if not is_sparse else [True]
for sum_over_features in sv:
d = manhattan_distances(x, y, sum_over_features)
result = self.executor.execute_tensor(d, concat=True)[0]
expected = sk_manhattan_distances(rx, ry, sum_over_features)
np.testing.assert_almost_equal(result, expected)
d = manhattan_distances(x, sum_over_features=sum_over_features)
result = self.executor.execute_tensor(d, concat=True)[0]
expected = sk_manhattan_distances(rx, sum_over_features=sum_over_features)
np.testing.assert_almost_equal(result, expected)
示例12: manhattan_distance_matrix
# 需要导入模块: from sklearn.metrics import pairwise [as 别名]
# 或者: from sklearn.metrics.pairwise import manhattan_distances [as 别名]
def manhattan_distance_matrix(df, scale_end=True, to_similar=False):
"""
曼哈顿距离(L1范数): 与manhattan_distances_xy的区别主要是,非两两distance计算,只有一个矩阵的输入,
且输入必须为pd.DataFrame or np.array or 多层迭代序列[[],[]],注意需要理解数据的测距目的来分析
是否需要进行scale_start,进行和不进行scale_start的结果将完全不一样,在功能需求及数据理解的情况下
选择是否进行scale_start
eg:
input:
tsla bidu noah sfun goog vips aapl
2014-07-25 223.57 226.50 15.32 12.110 589.02 21.349 97.67
2014-07-28 224.82 225.80 16.13 12.450 590.60 21.548 99.02
2014-07-29 225.01 220.00 16.75 12.220 585.61 21.190 98.38
... ... ... ... ... ... ... ...
2016-07-22 222.27 160.88 25.50 4.850 742.74 13.510 98.66
2016-07-25 230.01 160.25 25.57 4.790 739.77 13.390 97.34
2016-07-26 225.93 163.09 24.75 4.945 740.92 13.655 97.76
ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=True)
output:
tsla bidu noah sfun goog vips aapl
tsla 0.0000 0.3698 0.6452 0.7917 0.4670 0.7426 0.3198
bidu 0.3698 0.0000 0.5969 0.7056 0.6495 0.5822 0.4000
noah 0.6452 0.5969 0.0000 0.7422 0.7441 0.6913 0.6896
sfun 0.7917 0.7056 0.7422 0.0000 0.9236 0.4489 1.0000
goog 0.4670 0.6495 0.7441 0.9236 0.0000 0.8925 0.5134
vips 0.7426 0.5822 0.6913 0.4489 0.8925 0.0000 0.7038
aapl 0.3198 0.4000 0.6896 1.0000 0.5134 0.7038 0.0000
ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=False)
output:
tsla bidu noah sfun goog vips aapl
tsla 0.0000 0.0640 0.3318 0.3585 0.6415 0.3395 0.1906
bidu 0.0640 0.0000 0.2750 0.3018 0.6982 0.2827 0.1338
noah 0.3318 0.2750 0.0000 0.0267 0.9733 0.0124 0.1412
sfun 0.3585 0.3018 0.0267 0.0000 1.0000 0.0191 0.1680
goog 0.6415 0.6982 0.9733 1.0000 0.0000 0.9809 0.8320
vips 0.3395 0.2827 0.0124 0.0191 0.9809 0.0000 0.1489
aapl 0.1906 0.1338 0.1412 0.1680 0.8320 0.1489 0.000
:param df: pd.DataFrame or np.array or 多层迭代序列[[],[]], 之所以叫df,是因为在内部会统一转换为pd.DataFrame
:param scale_end: 对结果矩阵进行标准化处理
:param to_similar: 是否进行后置输出转换similar值
:return: distance_df,pd.DataFrame对象
"""
return _distance_matrix(manhattan_distances, df, scale_end, to_similar)