本文整理汇总了Python中sklearn.neighbors.LSHForest方法的典型用法代码示例。如果您正苦于以下问题:Python neighbors.LSHForest方法的具体用法?Python neighbors.LSHForest怎么用?Python neighbors.LSHForest使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors
的用法示例。
在下文中一共展示了neighbors.LSHForest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def __init__(self, use_lsh_forest=False, n_neighbors=20, max_iterations = 300, count_concepts = False, number_of_concepts = 0,
count_terms = False, training_validation_split = 0.8, algorithm_id = '7', l2r_metric = "ERR@k", n_jobs = 1, translation_probability = False, **kwargs ):
self.n_neighbors = n_neighbors
nn = LSHForest(n_neighbors=n_neighbors, **kwargs) if use_lsh_forest else NearestNeighbors(
n_neighbors=n_neighbors, **kwargs)
self.knn = BatchKNeighbors(nn)
self.y = None
self.max_iterations = max_iterations
self.count_concepts = count_concepts
self.count_terms = count_terms
self.number_of_concepts = number_of_concepts
self.training_validation_split = training_validation_split
self.algorithm_id = algorithm_id
self.l2r_metric = l2r_metric
self.n_jobs = n_jobs
self.translation_probability = translation_probability
示例2: test_graphs
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_graphs():
# Smoke tests for graph methods.
n_samples_sizes = [5, 10, 20]
n_features = 3
rng = np.random.RandomState(42)
for n_samples in n_samples_sizes:
X = rng.rand(n_samples, n_features)
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
min_hash_match=0)
ignore_warnings(lshf.fit)(X)
kneighbors_graph = lshf.kneighbors_graph(X)
radius_neighbors_graph = lshf.radius_neighbors_graph(X)
assert_equal(kneighbors_graph.shape[0], n_samples)
assert_equal(kneighbors_graph.shape[1], n_samples)
assert_equal(radius_neighbors_graph.shape[0], n_samples)
assert_equal(radius_neighbors_graph.shape[1], n_samples)
示例3: __init__
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def __init__(self, k=3, **kwargs):
"""
Note: tried LSHForest, still too slow
:param k:
:param kwargs:
"""
self.model = NearestNeighbors(n_neighbors=k, **kwargs)
示例4: __init__
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def __init__(self, use_lsh_forest=False, metric='cosine', algorithm='brute'):
self.lsh = use_lsh_forest
self.y = None
nn = LSHForest(n_neighbors=1, n_candidates=400, n_estimators=35) if use_lsh_forest else NearestNeighbors(
n_neighbors=1, metric=metric, algorithm=algorithm)
self.knn = BatchKNeighbors(nn)
示例5: __init__
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def __init__(self, threshold=0.2, use_lsh_forest=False, mode='b',
n_neighbors=50, scoring='f1_samples', auto_optimize_k=False,
n_neighbor_candidates=(3, 5, 8, 13, 21, 34, 55, 84, 139, 223, 362),
algorithm='brute', metric='cosine'):
self.auto_optimize_k = auto_optimize_k
self.scoring = scoring
self.n_neighbor_candidates = n_neighbor_candidates
self.mode = mode
self.n_neighbors = n_neighbors
self.threshold = threshold
nn = LSHForest(n_neighbors=n_neighbors, n_candidates=400,
n_estimators=35) if use_lsh_forest else NearestNeighbors(
n_neighbors=n_neighbors, algorithm=algorithm, metric=metric)
self.knn = BatchKNeighbors(nn)
self.y = None
示例6: knn_indices_func_approx
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def knn_indices_func_approx(rep_pts : FloatTensor, # (N, pts, dim)
pts : FloatTensor, # (N, x, dim)
K : int, D : int
) -> LongTensor: # (N, pts, K)
"""
Approximate CPU-based Indexing function based on K-Nearest Neighbors search.
:param rep_pts: Representative points.
:param pts: Point cloud to get indices from.
:param K: Number of nearest neighbors to collect.
:param D: "Spread" of neighboring points.
:return: Array of indices, P_idx, into pts such that pts[n][P_idx[n],:]
is the set k-nearest neighbors for the representative points in pts[n].
"""
if rep_pts.is_cuda:
rep_pts = rep_pts.cpu()
if pts.is_cuda:
pts = pts.cpu()
rep_pts = rep_pts.data.numpy()
pts = pts.data.numpy()
region_idx = []
for n, p in enumerate(rep_pts):
P_particular = pts[n]
lshf = LSHForest(n_estimators = 20, n_candidates = 100, n_neighbors = D*K + 1)
lshf.fit(P_particular)
indices = lshf.kneighbors(p, return_distance = False)
region_idx.append(indices[:,1::D])
示例7: test_lsh_forest_deprecation
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_lsh_forest_deprecation():
assert_warns_message(DeprecationWarning,
"LSHForest has poor performance and has been "
"deprecated in 0.19. It will be removed "
"in version 0.21.", LSHForest)
示例8: test_neighbors_accuracy_with_n_candidates
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_neighbors_accuracy_with_n_candidates():
# Checks whether accuracy increases as `n_candidates` increases.
n_candidates_values = np.array([.1, 50, 500])
n_samples = 100
n_features = 10
n_iter = 10
n_points = 5
rng = np.random.RandomState(42)
accuracies = np.zeros(n_candidates_values.shape[0], dtype=float)
X = rng.rand(n_samples, n_features)
for i, n_candidates in enumerate(n_candidates_values):
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
n_candidates=n_candidates)
ignore_warnings(lshf.fit)(X)
for j in range(n_iter):
query = X[rng.randint(0, n_samples)].reshape(1, -1)
neighbors = lshf.kneighbors(query, n_neighbors=n_points,
return_distance=False)
distances = pairwise_distances(query, X, metric='cosine')
ranks = np.argsort(distances)[0, :n_points]
intersection = np.intersect1d(ranks, neighbors).shape[0]
ratio = intersection / float(n_points)
accuracies[i] = accuracies[i] + ratio
accuracies[i] = accuracies[i] / float(n_iter)
# Sorted accuracies should be equal to original accuracies
print('accuracies:', accuracies)
assert_true(np.all(np.diff(accuracies) >= 0),
msg="Accuracies are not non-decreasing.")
# Highest accuracy should be strictly greater than the lowest
assert_true(np.ptp(accuracies) > 0,
msg="Highest accuracy is not strictly greater than lowest.")
示例9: test_neighbors_accuracy_with_n_estimators
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_neighbors_accuracy_with_n_estimators():
# Checks whether accuracy increases as `n_estimators` increases.
n_estimators = np.array([1, 10, 100])
n_samples = 100
n_features = 10
n_iter = 10
n_points = 5
rng = np.random.RandomState(42)
accuracies = np.zeros(n_estimators.shape[0], dtype=float)
X = rng.rand(n_samples, n_features)
for i, t in enumerate(n_estimators):
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
n_candidates=500, n_estimators=t)
ignore_warnings(lshf.fit)(X)
for j in range(n_iter):
query = X[rng.randint(0, n_samples)].reshape(1, -1)
neighbors = lshf.kneighbors(query, n_neighbors=n_points,
return_distance=False)
distances = pairwise_distances(query, X, metric='cosine')
ranks = np.argsort(distances)[0, :n_points]
intersection = np.intersect1d(ranks, neighbors).shape[0]
ratio = intersection / float(n_points)
accuracies[i] = accuracies[i] + ratio
accuracies[i] = accuracies[i] / float(n_iter)
# Sorted accuracies should be equal to original accuracies
assert_true(np.all(np.diff(accuracies) >= 0),
msg="Accuracies are not non-decreasing.")
# Highest accuracy should be strictly greater than the lowest
assert_true(np.ptp(accuracies) > 0,
msg="Highest accuracy is not strictly greater than lowest.")
示例10: test_fit
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_fit():
# Checks whether `fit` method sets all attribute values correctly.
n_samples = 12
n_features = 2
n_estimators = 5
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
n_estimators=n_estimators)
ignore_warnings(lshf.fit)(X)
# _input_array = X
assert_array_equal(X, lshf._fit_X)
# A hash function g(p) for each tree
assert_equal(n_estimators, len(lshf.hash_functions_))
# Hash length = 32
assert_equal(32, lshf.hash_functions_[0].components_.shape[0])
# Number of trees_ in the forest
assert_equal(n_estimators, len(lshf.trees_))
# Each tree has entries for every data point
assert_equal(n_samples, len(lshf.trees_[0]))
# Original indices after sorting the hashes
assert_equal(n_estimators, len(lshf.original_indices_))
# Each set of original indices in a tree has entries for every data point
assert_equal(n_samples, len(lshf.original_indices_[0]))
示例11: test_partial_fit
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_partial_fit():
# Checks whether inserting array is consistent with fitted data.
# `partial_fit` method should set all attribute values correctly.
n_samples = 12
n_samples_partial_fit = 3
n_features = 2
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
X_partial_fit = rng.rand(n_samples_partial_fit, n_features)
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()
# Test unfitted estimator
ignore_warnings(lshf.partial_fit)(X)
assert_array_equal(X, lshf._fit_X)
ignore_warnings(lshf.fit)(X)
# Insert wrong dimension
assert_raises(ValueError, lshf.partial_fit,
np.random.randn(n_samples_partial_fit, n_features - 1))
ignore_warnings(lshf.partial_fit)(X_partial_fit)
# size of _input_array = samples + 1 after insertion
assert_equal(lshf._fit_X.shape[0],
n_samples + n_samples_partial_fit)
# size of original_indices_[1] = samples + 1
assert_equal(len(lshf.original_indices_[0]),
n_samples + n_samples_partial_fit)
# size of trees_[1] = samples + 1
assert_equal(len(lshf.trees_[1]),
n_samples + n_samples_partial_fit)
示例12: test_hash_functions
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_hash_functions():
# Checks randomness of hash functions.
# Variance and mean of each hash function (projection vector)
# should be different from flattened array of hash functions.
# If hash functions are not randomly built (seeded with
# same value), variances and means of all functions are equal.
n_samples = 12
n_features = 2
n_estimators = 5
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
n_estimators=n_estimators,
random_state=rng.randint(0, np.iinfo(np.int32).max))
ignore_warnings(lshf.fit)(X)
hash_functions = []
for i in range(n_estimators):
hash_functions.append(lshf.hash_functions_[i].components_)
for i in range(n_estimators):
assert_not_equal(np.var(hash_functions),
np.var(lshf.hash_functions_[i].components_))
for i in range(n_estimators):
assert_not_equal(np.mean(hash_functions),
np.mean(lshf.hash_functions_[i].components_))
示例13: test_candidates
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_candidates():
# Checks whether candidates are sufficient.
# This should handle the cases when number of candidates is 0.
# User should be warned when number of candidates is less than
# requested number of neighbors.
X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
[6, 10, 2]], dtype=np.float32)
X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1)
# For zero candidates
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
min_hash_match=32)
ignore_warnings(lshf.fit)(X_train)
message = ("Number of candidates is not sufficient to retrieve"
" %i neighbors with"
" min_hash_match = %i. Candidates are filled up"
" uniformly from unselected"
" indices." % (3, 32))
assert_warns_message(UserWarning, message, lshf.kneighbors,
X_test, n_neighbors=3)
distances, neighbors = lshf.kneighbors(X_test, n_neighbors=3)
assert_equal(distances.shape[1], 3)
# For candidates less than n_neighbors
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
min_hash_match=31)
ignore_warnings(lshf.fit)(X_train)
message = ("Number of candidates is not sufficient to retrieve"
" %i neighbors with"
" min_hash_match = %i. Candidates are filled up"
" uniformly from unselected"
" indices." % (5, 31))
assert_warns_message(UserWarning, message, lshf.kneighbors,
X_test, n_neighbors=5)
distances, neighbors = lshf.kneighbors(X_test, n_neighbors=5)
assert_equal(distances.shape[1], 5)
示例14: test_kneighbors
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_kneighbors():
# Checks whether desired number of neighbors are returned.
# It is guaranteed to return the requested number of neighbors
# if `min_hash_match` is set to 0. Returned distances should be
# in ascending order.
n_samples = 12
n_features = 2
n_iter = 10
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
min_hash_match=0)
# Test unfitted estimator
assert_raises(ValueError, lshf.kneighbors, X[0])
ignore_warnings(lshf.fit)(X)
for i in range(n_iter):
n_neighbors = rng.randint(0, n_samples)
query = X[rng.randint(0, n_samples)].reshape(1, -1)
neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors,
return_distance=False)
# Desired number of neighbors should be returned.
assert_equal(neighbors.shape[1], n_neighbors)
# Multiple points
n_queries = 5
queries = X[rng.randint(0, n_samples, n_queries)]
distances, neighbors = lshf.kneighbors(queries,
n_neighbors=1,
return_distance=True)
assert_equal(neighbors.shape[0], n_queries)
assert_equal(distances.shape[0], n_queries)
# Test only neighbors
neighbors = lshf.kneighbors(queries, n_neighbors=1,
return_distance=False)
assert_equal(neighbors.shape[0], n_queries)
# Test random point(not in the data set)
query = rng.randn(n_features).reshape(1, -1)
lshf.kneighbors(query, n_neighbors=1,
return_distance=False)
# Test n_neighbors at initialization
neighbors = lshf.kneighbors(query, return_distance=False)
assert_equal(neighbors.shape[1], 5)
# Test `neighbors` has an integer dtype
assert_true(neighbors.dtype.kind == 'i',
msg="neighbors are not in integer dtype.")
示例15: test_radius_neighbors_boundary_handling
# 需要导入模块: from sklearn import neighbors [as 别名]
# 或者: from sklearn.neighbors import LSHForest [as 别名]
def test_radius_neighbors_boundary_handling():
X = [[0.999, 0.001], [0.5, 0.5], [0, 1.], [-1., 0.001]]
n_points = len(X)
# Build an exact nearest neighbors model as reference model to ensure
# consistency between exact and approximate methods
nnbrs = NearestNeighbors(algorithm='brute', metric='cosine').fit(X)
# Build a LSHForest model with hyperparameter values that always guarantee
# exact results on this toy dataset.
lsfh = ignore_warnings(LSHForest, category=DeprecationWarning)(
min_hash_match=0, n_candidates=n_points, random_state=42).fit(X)
# define a query aligned with the first axis
query = [[1., 0.]]
# Compute the exact cosine distances of the query to the four points of
# the dataset
dists = pairwise_distances(query, X, metric='cosine').ravel()
# The first point is almost aligned with the query (very small angle),
# the cosine distance should therefore be almost null:
assert_almost_equal(dists[0], 0, decimal=5)
# The second point form an angle of 45 degrees to the query vector
assert_almost_equal(dists[1], 1 - np.cos(np.pi / 4))
# The third point is orthogonal from the query vector hence at a distance
# exactly one:
assert_almost_equal(dists[2], 1)
# The last point is almost colinear but with opposite sign to the query
# therefore it has a cosine 'distance' very close to the maximum possible
# value of 2.
assert_almost_equal(dists[3], 2, decimal=5)
# If we query with a radius of one, all the samples except the last sample
# should be included in the results. This means that the third sample
# is lying on the boundary of the radius query:
exact_dists, exact_idx = nnbrs.radius_neighbors(query, radius=1)
approx_dists, approx_idx = lsfh.radius_neighbors(query, radius=1)
assert_array_equal(np.sort(exact_idx[0]), [0, 1, 2])
assert_array_equal(np.sort(approx_idx[0]), [0, 1, 2])
assert_array_almost_equal(np.sort(exact_dists[0]), dists[:-1])
assert_array_almost_equal(np.sort(approx_dists[0]), dists[:-1])
# If we perform the same query with a slightly lower radius, the third
# point of the dataset that lay on the boundary of the previous query
# is now rejected:
eps = np.finfo(np.float64).eps
exact_dists, exact_idx = nnbrs.radius_neighbors(query, radius=1 - eps)
approx_dists, approx_idx = lsfh.radius_neighbors(query, radius=1 - eps)
assert_array_equal(np.sort(exact_idx[0]), [0, 1])
assert_array_equal(np.sort(approx_idx[0]), [0, 1])
assert_array_almost_equal(np.sort(exact_dists[0]), dists[:-2])
assert_array_almost_equal(np.sort(approx_dists[0]), dists[:-2])