本文整理汇总了Python中sklearn.neighbors.BallTree类的典型用法代码示例。如果您正苦于以下问题:Python BallTree类的具体用法?Python BallTree怎么用?Python BallTree使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BallTree类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _hdbscan_prims_balltree
def _hdbscan_prims_balltree(X, min_samples=5, alpha=1.0,
metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False):
if metric == 'minkowski':
if p is None:
raise TypeError('Minkowski metric given but no p value supplied!')
if p < 0:
raise ValueError('Minkowski metric with negative p value is not defined!')
elif p is None:
p = 2 # Unused, but needs to be integer; assume euclidean
size = X.shape[0]
min_samples = min(size - 1, min_samples)
tree = BallTree(X, metric=metric, leaf_size=leaf_size)
dist_metric = DistanceMetric.get_metric(metric)
#Get distance to kth nearest neighbour
core_distances = tree.query(X, k=min_samples,
dualtree=True,
breadth_first=True)[0][:, -1]
#Mutual reachability distance is implicite in mst_linkage_core_cdist
min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha)
#Sort edges of the min_spanning_tree by weight
min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]
#Convert edge list into standard hierarchical clustering format
single_linkage_tree = label(min_spanning_tree)
return single_linkage_tree, None
示例2: _hdbscan_prims_balltree
def _hdbscan_prims_balltree(X, min_samples=5, alpha=1.0,
metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False, **kwargs):
if metric == 'minkowski':
if p is None:
raise TypeError('Minkowski metric given but no p value supplied!')
if p < 0:
raise ValueError('Minkowski metric with negative p value is not defined!')
elif p is None:
p = 2 # Unused, but needs to be integer; assume euclidean
# The Cython routines used require contiguous arrays
if not X.flags['C_CONTIGUOUS']:
X = np.array(X, dtype=np.double, order='C')
size = X.shape[0]
min_samples = min(size - 1, min_samples)
tree = BallTree(X, metric=metric, leaf_size=leaf_size, **kwargs)
dist_metric = DistanceMetric.get_metric(metric, **kwargs)
# Get distance to kth nearest neighbour
core_distances = tree.query(X, k=min_samples,
dualtree=True,
breadth_first=True)[0][:, -1].copy(order='C')
# Mutual reachability distance is implicit in mst_linkage_core_vector
min_spanning_tree = mst_linkage_core_vector(X, core_distances, dist_metric, alpha)
# Sort edges of the min_spanning_tree by weight
min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]
# Convert edge list into standard hierarchical clustering format
single_linkage_tree = label(min_spanning_tree)
return single_linkage_tree, None
示例3: __init__
class BallTreeANN:
def __init__(self):
"""
Constructor
"""
self.nbrs = None
def build_index(self, dataset, leaf_size):
self.nbrs = BallTree(dataset, leaf_size=leaf_size, metric="euclidean")
return self.nbrs
def build_store_index(self, dataset, path, leaf_size):
self.build_index(dataset, leaf_size)
self.store_index(path)
def store_index(self, path):
with open(path, "wb") as output1:
pickle.dump(self.nbrs, output1, pickle.HIGHEST_PROTOCOL)
def load_index(self, path):
with open(path, "rb") as input1:
self.nbrs = pickle.load(input1)
def search_in_radious(self, vector, radious=2):
distances, indices = self.nbrs.query_radius(vector, r=radious, return_distance=True)
return distances, indices
def search_neighbors(self, vector, num_neighbors):
distances, indices = self.nbrs.query(vector, k=num_neighbors)
return distances, indices
示例4: DualTree
def DualTree(dataFlux,dDataFlux,modelFlux,modelParams,mcIts,columnsToScale=[]):
'''
Inputs:
dataFlux = observed fluxes, array of size (#objects,#filters)
dDataFlux = flux uncertainties, array of size (#objects,#filters)
modelFlux = fluxes of models, array of size (#models,#filters)
modelParams = parameters of each model to be recorded, array of size (#models,#parameters)
mcIts = number of times to perturb fluxes for each object, int
columnsToScale = list of column indices in modelParams of parameters that need to be multiplied by scale factor
Output:
NumPy array of size (#objects,mcIts,#params)
e.g. the zeroth element gives you a 2d array where each row represents the
fit parameters from one monte carlo iteration
'''
modelColors = modelFlux[:,1:] / modelFlux[:,:-1]
tree = BallTree(modelColors)
fitParams = []
for i in range(len(dataFlux)):
newFlux = dataFlux[i] + dDataFlux[i] * np.random.randn(mcIts,len(dataFlux[i]))
newColors = newFlux[:,1:] / newFlux[:,:-1]
query = tree.query(newColors,k=1,dualtree=True)
s = fit_tools.Scale(modelFlux[query[1][:,0]],newFlux,np.ones(np.shape(newFlux)))
myParams = s
for j in range(len(modelParams[0])):
if j in columnsToScale:
myParams = np.c_[myParams,np.multiply(s,modelParams[query[1][:,0]][:,j])]
else:
myParams = np.c_[myParams,modelParams[query[1][:,0]][:,j]]
fitParams.append(myParams)
return(np.array(fitParams))
示例5: _rsl_prims_balltree
def _rsl_prims_balltree(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric='minkowski', p=2):
if metric == 'minkowski':
if p is None:
raise TypeError('Minkowski metric given but no p value supplied!')
if p < 0:
raise ValueError('Minkowski metric with negative p value is not defined!')
elif p is None:
p = 2 # Unused, but needs to be integer; assume euclidean
dim = X.shape[0]
k = min(dim - 1, k)
tree = BallTree(X, metric=metric)
dist_metric = DistanceMetric.get_metric(metric)
core_distances = tree.query(X, k=k)[0][:,-1]
min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric)
single_linkage_tree = label(min_spanning_tree)
single_linkage_tree = SingleLinkageTree(single_linkage_tree)
labels = single_linkage_tree.get_clusters(cut, gamma)
return labels, single_linkage_tree
示例6: knn
def knn(a, b):
"k nearest neighbors"
b = np.array([bb[:-1] for bb in b])
tree = BallTree(b)
__, indx = tree.query(a[:-1], k)
return [b[i] for i in indx]
示例7: compute_labels
def compute_labels(X, C):
"""Compute the cluster labels for dataset X given centers C.
"""
# labels = np.argmin(pairwise_distances(C, X), axis=0) # THIS REQUIRES TOO MUCH MEMORY FOR LARGE X
tree = BallTree(C)
labels = tree.query(X, k=1, return_distance=False).squeeze()
return labels
示例8: correrPruebaLocal
def correrPruebaLocal(set_ampliado):
print "corriendo prueba local"
train,targetTrain,test,targetTest = cargarDatosPruebaLocal(set_ampliado,0.66)
tree = BallTree(train,leaf_size=30)
predictions=[]
correctas=0
incorrectas=0
for x in range(len(test)):
dist, ind = tree.query(test[x], k=4)
resultado = obtenerPrediccionknnEB(train,targetTrain,test[x],ind.ravel())
predictions.append(resultado)
print progreso(x,len(test))
if resultado==targetTest[x]:
correctas+=1
else:
incorrectas+=1
print "Predicciones --> Correctas: " + str(correctas) + "Incorrectas: " + str(incorrectas)+ "Total: "+ str(len(test))
print('> predicted=' + repr(resultado) + ', actual=' + repr(targetTest[x]) + ' ' + progreso(x,len(test)) )
print "precision total"
correct = 0
for x in range(len(test)):
if targetTest[x] == predictions[x]:
correct += 1
print (float(correct)/float(len(test))) * 100.0
示例9: test_barnes_hut_angle
def test_barnes_hut_angle():
# When Barnes-Hut's angle=0 this corresponds to the exact method.
angle = 0.0
perplexity = 10
n_samples = 100
for n_components in [2, 3]:
n_features = 5
degrees_of_freedom = float(n_components - 1.0)
random_state = check_random_state(0)
distances = random_state.randn(n_samples, n_features)
distances = distances.astype(np.float32)
distances = distances.dot(distances.T)
np.fill_diagonal(distances, 0.0)
params = random_state.randn(n_samples, n_components)
P = _joint_probabilities(distances, perplexity, False)
kl, gradex = _kl_divergence(params, P, degrees_of_freedom, n_samples,
n_components)
k = n_samples - 1
bt = BallTree(distances)
distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
neighbors_nn = neighbors_nn[:, 1:]
Pbh = _joint_probabilities_nn(distances, neighbors_nn,
perplexity, False)
kl, gradbh = _kl_divergence_bh(params, Pbh, neighbors_nn,
degrees_of_freedom, n_samples,
n_components, angle=angle,
skip_num_points=0, verbose=False)
assert_array_almost_equal(Pbh, P, decimal=5)
assert_array_almost_equal(gradex, gradbh, decimal=5)
示例10: DualTree
def DualTree(dataFlux, dDataFlux, modelFlux, modelParams, mcIts):
"""
Inputs:
dataFlux = observed fluxes, array of size (#objects,#filters)
dDataFlux = flux uncertainties, array of size (#objects,#filters)
modelFlux = fluxes of models, array of size (#models,#filters)
modelParams = parameters of each model to be recorded, array of size (#models,#parameters)
mcIts = number of times to perturb fluxes for each object, int
Output:
NumPy array of size (#objects,mcIts,#params)
e.g. the zeroth element gives you a 2d array where each row represents the
fit parameters from one monte carlo iteration
"""
modelColors = modelFlux[:, 1:] / modelFlux[:, :-1]
tree = BallTree(modelColors)
fitParams = []
for i in range(len(dataFlux)):
newFlux = dataFlux[i] + dDataFlux[i] * np.random.randn(mcIts, len(dataFlux[i]))
newColors = newFlux[:, 1:] / newFlux[:, :-1]
query = tree.query(newColors, k=1, dualtree=True)
s = Scale(modelFlux[query[1][:, 0]], newFlux, np.ones(np.shape(newFlux)))
myParams = s
for j in range(len(modelParams[0])):
myParams = np.c_[myParams, modelParams[query[1][:, 0]][:, j]]
fitParams.append(myParams)
return np.array(fitParams)
示例11: run_single_trial
def run_single_trial(self, train_pairs, test_pairs, train_tune_data, test_tune_data):
print "Running PCA..."
train_pairs_pca, test_pairs_pca = self.fit_pca(train_pairs, test_pairs)
ys = ys_from_pairs(train_pairs_pca)
file_id = str(random.random())[2:]
save_cvx_params(ys, file_id)
run_cvx(file_id)
M = load_cvx_result(file_id)
dist = DistanceMetric.get_metric('mahalanobis', VI = M)
train_a_sections = [x[0] for x in train_pairs_pca]
train_b_sections = [x[1] for x in train_pairs_pca]
test_a_sections = [x[0] for x in test_pairs_pca]
test_b_sections = [x[1] for x in test_pairs_pca]
train_given_sections = train_a_sections
train_to_match_sections = train_b_sections
test_given_sections = test_a_sections
test_to_match_sections = test_b_sections
if self.match_a_to_b:
train_given_sections = train_b_sections
train_to_match_sections = train_a_sections
test_given_sections = test_b_sections
test_to_match_sections = test_a_sections
print "Constructing BallTrees..."
train_bt = BallTree(train_to_match_sections, metric=dist)
test_bt = BallTree(test_to_match_sections, metric=dist)
train_top_fraction = int(len(train_given_sections) * self.correct_within_top_fraction)
test_top_fraction = int(len(test_given_sections) * self.correct_within_top_fraction)
print "Querying the BallTrees..."
train_result = train_bt.query(train_given_sections, train_top_fraction)
test_result = test_bt.query(test_given_sections, test_top_fraction)
print "Looking at correctness of results..."
train_correct = sum([int(i in train_result[1][i]) for i in xrange(len(train_given_sections))])
test_correct = sum([int(i in test_result[1][i]) for i in xrange(len(test_given_sections))])
print "Finding indices of correct matches..."
test_result_full = test_bt.query(test_given_sections, len(test_given_sections))
def default_index(lst, i):
ind = -1
try:
ind = lst.index(i)
except:
pass
return ind
test_indices = [default_index(list(test_result_full[1][i]), i) for i in xrange(len(test_given_sections))]
test_indices = [x for x in test_indices if x != -1]
with open("successful_tunes_{}".format(file_id), 'w') as successful_tunes_f:
for i, index in enumerate(test_indices):
if index == 0:
successful_tunes_f.write(str(test_tune_data[i]) + '\n\n')
return [[train_correct, len(train_given_sections)],
[test_correct, len(test_given_sections)]], test_indices
示例12: _compute_nearest
def _compute_nearest(xhs, rr, use_balltree=True, return_dists=False):
"""Find nearest neighbors
Note: The rows in xhs and rr must all be unit-length vectors, otherwise
the result will be incorrect.
Parameters
----------
xhs : array, shape=(n_samples, n_dim)
Points of data set.
rr : array, shape=(n_query, n_dim)
Points to find nearest neighbors for.
use_balltree : bool
Use fast BallTree based search from scikit-learn. If scikit-learn
is not installed it will fall back to the slow brute force search.
return_dists : bool
If True, return associated distances.
Returns
-------
nearest : array, shape=(n_query,)
Index of nearest neighbor in xhs for every point in rr.
distances : array, shape=(n_query,)
The distances. Only returned if return_dists is True.
"""
if use_balltree:
try:
from sklearn.neighbors import BallTree
except ImportError:
logger.info('Nearest-neighbor searches will be significantly '
'faster if scikit-learn is installed.')
use_balltree = False
if xhs.size == 0 or rr.size == 0:
if return_dists:
return np.array([], int), np.array([])
return np.array([], int)
if use_balltree is True:
ball_tree = BallTree(xhs)
if return_dists:
out = ball_tree.query(rr, k=1, return_distance=True)
return out[1][:, 0], out[0][:, 0]
else:
nearest = ball_tree.query(rr, k=1, return_distance=False)[:, 0]
return nearest
else:
from scipy.spatial.distance import cdist
if return_dists:
nearest = list()
dists = list()
for r in rr:
d = cdist(r[np.newaxis, :], xhs)
idx = np.argmin(d)
nearest.append(idx)
dists.append(d[0, idx])
return (np.array(nearest), np.array(dists))
else:
nearest = np.array([np.argmin(cdist(r[np.newaxis, :], xhs))
for r in rr])
return nearest
示例13: _hdbscan_prims_balltree
def _hdbscan_prims_balltree(X, min_samples=5, alpha=1.0,
metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False):
if metric == 'minkowski':
if p is None:
raise TypeError('Minkowski metric given but no p value supplied!')
if p < 0:
raise ValueError('Minkowski metric with negative p value is not defined!')
elif p is None:
p = 2 # Unused, but needs to be integer; assume euclidean
dim = X.shape[0]
min_samples = min(dim - 1, min_samples)
tree = BallTree(X, metric=metric, leaf_size=leaf_size)
dist_metric = DistanceMetric.get_metric(metric)
core_distances = tree.query(X, k=min_samples,
dualtree=True,
breadth_first=True)[0][:, -1]
min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha)
min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]
single_linkage_tree = label(min_spanning_tree)
return single_linkage_tree, None
示例14: md_nearest_from_centroids
def md_nearest_from_centroids(seeding, centroids):
# mean distance
ball_tree = BallTree(seeding)
dist, idx = ball_tree.query(centroids)
sum_dist = sum(d[0] for d in dist)
mean = sum_dist / len(centroids)
return mean
示例15: _calc_tree
def _calc_tree(xx, yy, radius):
X = np.zeros((len(xx), 2), dtype='float')
X[:, 0] = xx[:]
X[:, 1] = yy[:]
tree = BallTree(X, metric='euclidean')
ind = tree.query_radius(X, r=radius)
ind_sw = tree.query_radius(X, r=VARIANCE_RADIUS_SW)
return ind, ind_sw