本文整理汇总了Python中sklearn.neighbors方法的典型用法代码示例。如果您正苦于以下问题:Python sklearn.neighbors方法的具体用法?Python sklearn.neighbors怎么用?Python sklearn.neighbors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn
的用法示例。
在下文中一共展示了sklearn.neighbors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_sklearn
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def run_sklearn():
n_trees = 100
n_folds = 3
# https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
alg_list = [
['rforest',RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=1, max_depth=3)],
['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=3,n_jobs=-1)],
['adaboost',AdaBoostClassifier(base_estimator=None, n_estimators=600, learning_rate=1.0)],
['knn', sklearn.neighbors.KNeighborsClassifier(n_neighbors=5,n_jobs=-1)]
]
start_time = time.time()
for name,alg in alg_list:
train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
train.run()
train = None
示例2: k_nearest_approx
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def k_nearest_approx(self, vec, k):
"""Get the k nearest neighbors of a vector (in terms of cosine similarity).
:param (np.array) vec: query vector
:param (int) k: number of top neighbors to return
:return (list[tuple[str, float]]): a list of (word, cosine similarity) pairs, in descending order
"""
if not hasattr(self, 'lshf'):
self.lshf = self._init_lsh_forest()
# TODO(kelvin): make this inner product score, to be consistent with k_nearest
distances, neighbors = self.lshf.kneighbors([vec], n_neighbors=k, return_distance=True)
scores = np.subtract(1, distances)
nbr_score_pairs = self._word_to_score(np.squeeze(neighbors), np.squeeze(scores))
return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)
示例3: _pre_calculate
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _pre_calculate(self, force=False):
if self.final_storage.check_exists(self.final_storage.instance_path) and not force:
self.NNS = self.final_storage.load_instance(self.final_storage.instance_path)
else:
self.ssfeature_loader.setup()
self.Xtrain = self.ssfeature_loader.load_train()
self.Xtest = self.ssfeature_loader.load_test()
if self.normalize:
self.Xtrain = utils.l2_feat_norm(self.Xtrain)
self.Xtest = utils.l2_feat_norm(self.Xtest)
self.nn_model = sklearn.neighbors.NearestNeighbors(n_neighbors=self.n_neighbors, algorithm='ball_tree', metric='minkowski', p=2)
self.nn_model.fit(self.Xtrain)
self.NNS = self.nn_model.kneighbors(self.Xtest, self.n_neighbors, return_distance=False)
self.final_storage.save_instance(self.final_storage.instance_path, self.NNS)
# this needs change for larges n_neighbors
if self.n_neighbors == 1:
self.NNS = self.NNS.T[0]
else:
pass
示例4: _get_embedded
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _get_embedded(signal, delay=1, dimension=2, r="default", distance="chebyshev", approximate=True, fuzzy=False):
"""Examples
----------
>>> import neurokit2 as nk
>>>
>>> signal = nk.signal_simulate(duration=2, frequency=5)
>>> delay = nk.complexity_delay(signal)
>>>
>>> embbeded, count = _get_embedded(signal, delay, r=0.2 * np.std(signal, ddof=1), dimension=2,
... distance='chebyshev', approximate=False)
"""
# Sanity checks
if distance not in sklearn.neighbors.KDTree.valid_metrics:
raise ValueError(
"NeuroKit error: _get_embedded(): The given metric (%s) is not valid."
"The valid metric names are: %s" % (distance, sklearn.neighbors.KDTree.valid_metrics)
)
# Get embedded
embedded = complexity_embedding(signal, delay=delay, dimension=dimension)
if approximate is False:
embedded = embedded[:-1] # Removes the last line
if fuzzy is False:
# Get neighbors count
count = _get_count(embedded, r=r, distance=distance)
else:
# FuzzyEn: Remove the local baselines of vectors
embedded -= np.mean(embedded, axis=1, keepdims=True)
count = _get_count_fuzzy(embedded, r=r, distance=distance, n=1)
return embedded, count
# =============================================================================
# Get Count
# =============================================================================
示例5: _get_count
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _get_count(embedded, r, distance="chebyshev"):
kdtree = sklearn.neighbors.KDTree(embedded, metric=distance)
# Return the count
return kdtree.query_radius(embedded, r, count_only=True).astype(np.float64)
示例6: _get_count_fuzzy
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _get_count_fuzzy(embedded, r, distance="chebyshev", n=1):
dist = sklearn.neighbors.DistanceMetric.get_metric(distance)
dist = dist.pairwise(embedded)
if n > 1:
sim = np.exp(-(dist ** n) / r)
else:
sim = np.exp(-dist / r)
# Return the count
return np.sum(sim, axis=0)
# =============================================================================
# Get R
# =============================================================================
示例7: _nearest_distances
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _nearest_distances(X, k=1):
"""From https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429
X = array(N,M)
N = number of points
M = number of dimensions
returns the distance to the kth nearest neighbor for every point in X
"""
knn = sklearn.neighbors.NearestNeighbors(n_neighbors=k + 1)
knn.fit(X)
d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
return d[:, -1] # returns the distance to the kth nearest neighbor
示例8: _entropy
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def _entropy(X, k=1):
"""Returns the entropy of X. From https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429.
Parameters
-----------
X : array-like or shape (n_samples, n_features)
The data the entropy of which is computed
k : int (optional)
number of nearest neighbors for density estimation
Returns
-------
float
entropy of X.
Notes
---------
- Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm.
23, 95-101.
- Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093),
1203-1215.
- Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138.
"""
# Distance to kth nearest neighbor
r = _nearest_distances(X, k) # squared distances
n, d = X.shape
volume_unit_ball = (np.pi ** (0.5 * d)) / scipy.special.gamma(0.5 * d + 1)
# Perez-Cruz et al. (2008). Estimation of Information Theoretic Measures for
# Continuous Random Variables, suggets returning:
# return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
return (
d * np.mean(np.log(r + np.finfo(X.dtype).eps))
+ np.log(volume_unit_ball)
+ scipy.special.psi(n)
- scipy.special.psi(k)
)
示例9: knn
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def knn(self,
scoring_metric='roc_auc',
hyperparameter_grid=None,
randomized_search=True,
number_iteration_samples=10):
"""
A light wrapper for Sklearn's knn classifier that performs randomized
search over an overridable default
hyperparameter grid.
Args:
scoring_metric (str): Any sklearn scoring metric appropriate for classification
hyperparameter_grid (dict): hyperparameters by name
randomized_search (bool): True for randomized search (default)
number_iteration_samples (int): Number of models to train during the
randomized search for exploring the
hyperparameter space. More may lead to a better model, but will take longer.
Returns:
TrainedSupervisedModel:
"""
self.validate_classification('KNN')
if hyperparameter_grid is None:
neighbors = list(range(5, 26))
hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']}
number_iteration_samples = 10
print('KNN Grid: {}'.format(hyperparameter_grid))
algorithm = get_algorithm(KNeighborsClassifier,
scoring_metric,
hyperparameter_grid,
randomized_search,
number_iteration_samples=number_iteration_samples)
trained_supervised_model = self._create_trained_supervised_model(algorithm)
return trained_supervised_model
示例10: knn_matte
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def knn_matte(img, trimap, mylambda=100):
[m, n, c] = img.shape
img, trimap = img/255.0, trimap/255.0
foreground = (trimap > 0.99).astype(int)
background = (trimap < 0.01).astype(int)
all_constraints = foreground + background
print('Finding nearest neighbors')
a, b = np.unravel_index(np.arange(m*n), (m, n))
feature_vec = np.append(np.transpose(img.reshape(m*n,c)), [ a, b]/np.sqrt(m*m + n*n), axis=0).T
nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=10, n_jobs=4).fit(feature_vec)
knns = nbrs.kneighbors(feature_vec)[1]
# Compute Sparse A
print('Computing sparse A')
row_inds = np.repeat(np.arange(m*n), 10)
col_inds = knns.reshape(m*n*10)
vals = 1 - np.linalg.norm(feature_vec[row_inds] - feature_vec[col_inds], axis=1)/(c+2)
A = scipy.sparse.coo_matrix((vals, (row_inds, col_inds)),shape=(m*n, m*n))
D_script = scipy.sparse.diags(np.ravel(A.sum(axis=1)))
L = D_script-A
D = scipy.sparse.diags(np.ravel(all_constraints[:,:, 0]))
v = np.ravel(foreground[:,:,0])
c = 2*mylambda*np.transpose(v)
H = 2*(L + mylambda*D)
print('Solving linear system for alpha')
warnings.filterwarnings('error')
alpha = []
try:
alpha = np.minimum(np.maximum(scipy.sparse.linalg.spsolve(H, c), 0), 1).reshape(m, n)
except Warning:
x = scipy.sparse.linalg.lsqr(H, c)
alpha = np.minimum(np.maximum(x[0], 0), 1).reshape(m, n)
return alpha
示例11: get_kmeans_prototypes
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def get_kmeans_prototypes(X, n_prototypes, hashing_dim=128,
ngram_range=(3, 3), sparse=False, sample_weight=None,
random_state=None):
"""
Computes prototypes based on:
- dimensionality reduction (via hashing n-grams)
- k-means clustering
- nearest neighbor
"""
vectorizer = HashingVectorizer(analyzer='char', norm=None,
alternate_sign=False,
ngram_range=ngram_range,
n_features=hashing_dim)
projected = vectorizer.transform(X)
if not sparse:
projected = projected.toarray()
kmeans = KMeans(n_clusters=n_prototypes, random_state=random_state)
kmeans.fit(projected, sample_weight=sample_weight)
centers = kmeans.cluster_centers_
neighbors = NearestNeighbors()
neighbors.fit(projected)
indexes_prototypes = np.unique(neighbors.kneighbors(centers, 1)[-1])
if indexes_prototypes.shape[0] < n_prototypes:
warnings.warn('Final number of unique prototypes is lower than ' +
'n_prototypes (expected)')
return np.sort(X[indexes_prototypes])
示例12: distance_lshforest
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def distance_lshforest(z, k=4, metric='cosine'):
"""Return an approximation of the k-nearest cosine distances."""
assert metric is 'cosine'
lshf = sklearn.neighbors.LSHForest()
lshf.fit(z)
dist, idx = lshf.kneighbors(z, n_neighbors=k+1)
assert dist.min() < 1e-10
dist[dist < 0] = 0
return dist, idx
# TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
示例13: init_classifier_impl
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def init_classifier_impl(field_code: str, init_script: str):
if init_script is not None:
init_script = init_script.strip()
if not init_script:
from sklearn import tree as sklearn_tree
return sklearn_tree.DecisionTreeClassifier()
from sklearn import tree as sklearn_tree
from sklearn import neural_network as sklearn_neural_network
from sklearn import neighbors as sklearn_neighbors
from sklearn import svm as sklearn_svm
from sklearn import gaussian_process as sklearn_gaussian_process
from sklearn.gaussian_process import kernels as sklearn_gaussian_process_kernels
from sklearn import ensemble as sklearn_ensemble
from sklearn import naive_bayes as sklearn_naive_bayes
from sklearn import discriminant_analysis as sklearn_discriminant_analysis
from sklearn import linear_model as sklearn_linear_model
eval_locals = {
'sklearn_linear_model': sklearn_linear_model,
'sklearn_tree': sklearn_tree,
'sklearn_neural_network': sklearn_neural_network,
'sklearn_neighbors': sklearn_neighbors,
'sklearn_svm': sklearn_svm,
'sklearn_gaussian_process': sklearn_gaussian_process,
'sklearn_gaussian_process_kernels': sklearn_gaussian_process_kernels,
'sklearn_ensemble': sklearn_ensemble,
'sklearn_naive_bayes': sklearn_naive_bayes,
'sklearn_discriminant_analysis': sklearn_discriminant_analysis
}
return eval_script('classifier init script of field {0}'.format(field_code), init_script, eval_locals)
示例14: distance_lshforest
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def distance_lshforest(z, k=4, metric='cosine'):
"""Return an approximation of the k-nearest cosine distances."""
assert metric is 'cosine'
lshf = sklearn.neighbors.LSHForest()
lshf.fit(z)
dist, idx = lshf.kneighbors(z, n_neighbors=k + 1)
assert dist.min() < 1e-10
dist[dist < 0] = 0
return dist, idx
# TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
示例15: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import neighbors [as 别名]
def __init__(self, **hyperparams):
self._hyperparams = hyperparams
self._wrapped_model = sklearn.neighbors.KNeighborsClassifier(**self._hyperparams)