本文整理汇总了Python中sklearn.neighbors.LSHForest.kneighbors方法的典型用法代码示例。如果您正苦于以下问题:Python LSHForest.kneighbors方法的具体用法?Python LSHForest.kneighbors怎么用?Python LSHForest.kneighbors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.LSHForest
的用法示例。
在下文中一共展示了LSHForest.kneighbors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search_neighbors
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def search_neighbors(request):
designs = Design.objects.all()
image_list = []
for design in designs:
image_list.append(str(design.uid) + ".png")
d_geometry = settings.D_GEOMETRY
designed_images = np.empty((len(image_list), d_geometry[0]*d_geometry[1]*3), dtype="float32")
for i in range(len(image_list)):
designed_images[i] = img2numpy_arr(settings.DESIGN_PATH + image_list[i]).reshape(d_geometry[0]*d_geometry[1]*3)
designed_images /= 255
lshf = LSHForest(random_state=42)
lshf.fit(designed_images)
num = int(request.GET['num'])
input_fname = str(request.GET['input'])
input_image = img2numpy_arr(settings.DESIGN_PATH + input_fname)
input_image = input_image.reshape(1, -1)/255
_, indices = lshf.kneighbors(input_image, n_neighbors=num)
similar_images = []
for i in list(indices.reshape(-1)):
similar_images.append({
"image": str(designs[i].uid) + ".png",
"text": str(designs[i].history_text),
"like": int(designs[i].like),
"filtered": str(designs[i].filtered)
})
return JsonResponse({
"results": similar_images
})
示例2: test_distances
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_distances():
"""Checks whether returned neighbors are from closest to farthest."""
n_samples = 12
n_features = 2
n_iter = 10
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = LSHForest()
lshf.fit(X)
for i in range(n_iter):
n_neighbors = rng.randint(0, n_samples)
query = X[rng.randint(0, n_samples)]
distances, neighbors = lshf.kneighbors(query,
n_neighbors=n_neighbors,
return_distance=True)
# Returned neighbors should be from closest to farthest.
assert_true(np.all(np.diff(distances[0]) >= 0))
mean_dist = np.mean(pairwise_distances(query, X, metric='cosine'))
distances, neighbors = lshf.radius_neighbors(query,
radius=mean_dist,
return_distance=True)
assert_true(np.all(np.diff(distances[0]) >= 0))
示例3: test_neighbors_accuracy_with_n_estimators
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_neighbors_accuracy_with_n_estimators():
# Checks whether accuracy increases as `n_estimators` increases.
n_estimators = np.array([1, 10, 100])
n_samples = 100
n_features = 10
n_iter = 10
n_points = 5
rng = np.random.RandomState(42)
accuracies = np.zeros(n_estimators.shape[0], dtype=float)
X = rng.rand(n_samples, n_features)
for i, t in enumerate(n_estimators):
lshf = LSHForest(n_candidates=500, n_estimators=t)
ignore_warnings(lshf.fit)(X)
for j in range(n_iter):
query = X[rng.randint(0, n_samples)].reshape(1, -1)
neighbors = lshf.kneighbors(query, n_neighbors=n_points,
return_distance=False)
distances = pairwise_distances(query, X, metric='cosine')
ranks = np.argsort(distances)[0, :n_points]
intersection = np.intersect1d(ranks, neighbors).shape[0]
ratio = intersection / float(n_points)
accuracies[i] = accuracies[i] + ratio
accuracies[i] = accuracies[i] / float(n_iter)
# Sorted accuracies should be equal to original accuracies
assert_true(np.all(np.diff(accuracies) >= 0),
msg="Accuracies are not non-decreasing.")
# Highest accuracy should be strictly greater than the lowest
assert_true(np.ptp(accuracies) > 0,
msg="Highest accuracy is not strictly greater than lowest.")
示例4: get_nearest_neighbor_iterable
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def get_nearest_neighbor_iterable(self, graphlist, start_graphs, start_is_subset=True):
# vectorize all
graphlist= list(graphlist)
graphlist_ = copy.deepcopy(graphlist)
X = self.vectorizer.transform_single(graphlist_)
start_graphs= list(start_graphs)
graphlist_= copy.deepcopy(start_graphs)
Y = self.vectorizer.transform_single(graphlist_)
forest = LSHForest()
forest.fit(X)
#http://scikit-learn.org/stable/modules/neighbors.html
distances, indices = forest.kneighbors(Y, n_neighbors=2)
# we just assume that this is short...
index = 0
if start_is_subset:
index += 1
#matches= ( X_index ,Y_index, distance )
matches = [(indices[i, index], i, distances[i, index]) for i in range(len(indices))]
matches.sort()
# this looks super confusing....
#for index, graph in enumerate(selection_iterator(graphlist, [a[0] for a in matches])):
# yield ((graph, start_graphs[matches[index][1]], X[matches[index][0]]))
# so i wrote this:,,, you may even get rid of the matches variable i think.. and use indices directly
for Xi,Yi,dist in matches:
yield ((start_graphs[Yi],graphlist[Xi],X[Xi]))
示例5: get_heap_and_forest
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def get_heap_and_forest(self, griter, k):
'''
so we create the heap and the forest...
heap is (dist to hyperplane, count, graph)
and the forest ist just a nearest neighbor from sklearn
'''
graphs = list(griter)
graphs2 = copy.deepcopy(graphs)
# transform doess mess up the graph objects
X = self.vectorizer.transform(graphs)
forest = LSHForest()
forest.fit(X)
print 'got forest'
heap = []
for vector, graph in zip(X, graphs2):
graph2 = nx.Graph(graph)
heapq.heappush(heap, (
self.sampler.estimator.predict_proba(self.sampler.vectorizer.transform_single(graph2))[0][1],
# score ~ dist from hyperplane
k + 1, # making sure that the counter is high so we dont output the startgraphz at the end
graph)) # at last the actual graph
print 'got heap'
distances, unused = forest.kneighbors(X, n_neighbors=2)
distances = [a[1] for a in distances] # the second element should be the dist we want
avg_dist = distances[len(distances) / 2] # sum(distances)/len(distances)
print 'got dist'
return heap, forest, avg_dist
示例6: __init__
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
class LHSForestEngine:
def __init__(self):
self.engine = LSHForest(random_state=42)
self.name = "LHS"
def fit(self, data):
self.engine.fit(data)
def dist(self, data):
distances, indices = self.engine.kneighbors(data, n_neighbors=1)
return distances.ravel()
示例7: calculate_duplication_number
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def calculate_duplication_number(self,text_list):
print "length is ", len(text_list)
tf_vectorizer = CountVectorizer(stop_words=None,analyzer='word',ngram_range=(5,5))
#print text_list
tf = tf_vectorizer.fit_transform(text_list)
#print tf_vectorizer.get_feature_names()
print tf[0]
#print tf[123]
lshf = LSHForest()
#print tf
lshf.fit(tf)
distance,index = lshf.kneighbors(tf,n_neighbors=1)
print distance, index
示例8: test_sparse_input
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_sparse_input():
# note: Fixed random state in sp.rand is not supported in older scipy.
# The test should succeed regardless.
X1 = sp.rand(50, 100)
X2 = sp.rand(10, 100)
forest_sparse = LSHForest(radius=1, random_state=0).fit(X1)
forest_dense = LSHForest(radius=1, random_state=0).fit(X1.A)
d_sparse, i_sparse = forest_sparse.kneighbors(X2, return_distance=True)
d_dense, i_dense = forest_dense.kneighbors(X2.A, return_distance=True)
assert_array_equal(d_sparse, d_dense)
assert_array_equal(i_sparse, i_dense)
d_sparse, i_sparse = forest_sparse.radius_neighbors(X2,
return_distance=True)
d_dense, i_dense = forest_dense.radius_neighbors(X2.A,
return_distance=True)
assert_equal(d_sparse.shape, d_dense.shape)
for a, b in zip(d_sparse, d_dense):
assert_array_equal(a, b)
for a, b in zip(i_sparse, i_dense):
assert_array_equal(a, b)
示例9: startQuery
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def startQuery():
while True:
try:
ipt = raw_input('Directory of query:')
except ImportError:
print 'invalid type'
else:
query = ipt
if query == 'exit()':
break
print 'loading query...'
try:
token = get_tokens_by_dir(query)
except IOError:
print 'invalid file name'
else:
##########################################query preprocessing
print 'query pre-processing...'
stopped_tokens = [i for i in token if not i in en_stop]
p_stemmer = PorterStemmer()
stemed_tokens = []
for i in stopped_tokens:
try:
temp_token = str(p_stemmer.stem(i))
stemed_tokens.append(temp_token)
except IndexError:
pass
tokens = [stemed_tokens]
######################################################################################
dictionary_new = corpora.Dictionary(tokens)
corpus_new = [dictionary_new.doc2bow(text) for text in tokens]
QUERY_TOPIC = np.zeros([1,num_topic]) ## topic vector for query
new_topics = LDA[corpus_new]
for i in new_topics[0]:
print(i)
QUERY_TOPIC[0,i[0]] = i[1] ##assign new topics to query doc-topic matrix
print 'fetching results for you...'
lshf = LSHForest(random_state=42)
lshf.fit(DOC_TOPICS) ##fit the local sensitive hash forest with training data POINT_SET
dist,indices=lshf.kneighbors(QUERY_TOPIC,n_neighbors=20)
print indices
示例10: test_candidates
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_candidates():
"""Checks whether candidates are sufficient.
This should handle the cases when number of candidates is 0.
User should be warned when number of candidates is less than
requested number of neighbors.
"""
X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
[6, 10, 2]], dtype=np.float32)
X_test = np.array([7, 10, 3], dtype=np.float32)
# For zero candidates
lshf = LSHForest(min_hash_match=32)
lshf.fit(X_train)
message = ("Number of candidates is not sufficient to retrieve"
" %i neighbors with"
" min_hash_match = %i. Candidates are filled up"
" uniformly from unselected"
" indices." % (3, 32))
assert_warns_message(UserWarning, message, lshf.kneighbors,
X_test, n_neighbors=3)
distances, neighbors = lshf.kneighbors(X_test, n_neighbors=3)
assert_equal(distances.shape[1], 3)
# For candidates less than n_neighbors
lshf = LSHForest(min_hash_match=31)
lshf.fit(X_train)
message = ("Number of candidates is not sufficient to retrieve"
" %i neighbors with"
" min_hash_match = %i. Candidates are filled up"
" uniformly from unselected"
" indices." % (5, 31))
assert_warns_message(UserWarning, message, lshf.kneighbors,
X_test, n_neighbors=5)
distances, neighbors = lshf.kneighbors(X_test, n_neighbors=5)
assert_equal(distances.shape[1], 5)
示例11: cal_acc
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def cal_acc(pack_file, stat_file, feature_dim):
f = open(stat_file, 'w')
f.write('train_pic_num'+'\t'+'person_name'+'\t'+'acc'+'\n')
pic_num = range(1, max_person_num)
for num in pic_num:
all_train_data, all_train_label, all_valid_data, all_valid_label = split_train_valid(pack_file, train_pic_num=num, feature_dim=feature_dim)
lshf = LSHForest(n_estimators=20, n_candidates=200, n_neighbors=5)
for index in range(len(all_train_data)):
try:
if all_train_data[index] == None:
continue
lshf.partial_fit(all_train_data[index], all_train_label[index])
except:
traceback.print_exc()
continue
# 对于每个人,分别统计准确率
person_acc_dic = {} # 准确的个数
person_all_dic = {} # 总的个数
filter_num = 0
all_num = 0
for index in range(len(all_valid_data)):
try:
if all_valid_data[index] == None:
continue
all_find_distance, all_find_index = lshf.kneighbors(all_valid_data[index], n_neighbors=5, return_distance=True)
cos_sim = cosine_similarity(all_valid_data[index], all_train_data[all_find_index[0, 0]])
label = all_train_label[all_find_index[0, 0]]
# if cos_sim > sim_threshold:
if True:
if label == all_valid_label[index]:
person_acc_dic[label] = person_acc_dic.get(label, 0) + 1
person_all_dic[label] = person_all_dic.get(label, 0) + 1
else:
person_all_dic[label] = person_all_dic.get(label, 0) + 1
else:
filter_num += 1
all_num += 1
except:
print all_valid_label[index]
continue
print 'train_num :', num, 'filter_rate: ', (filter_num * 1.0 / all_num)
for person in person_all_dic:
all_num = person_all_dic[person]
right_num = person_acc_dic.get(person, 0)
f.write('\t'.join(map(str, [num, person, (right_num * 1.0 / all_num)]))+'\n')
示例12: cal_recall
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def cal_recall(pack_file, stat_file, feature_dim):
# f_model = open('verf.txt', 'w')
f = open(stat_file, 'w')
f.write('train_pic_num'+'\t'+'person_name'+'\t'+'recall'+'\n')
pic_num = range(1, max_person_num)
for num in pic_num:
all_train_data, all_train_label, all_valid_data, all_valid_label = split_train_valid(pack_file, train_pic_num=num, feature_dim=feature_dim)
lshf = LSHForest(n_estimators=20, n_candidates=200, n_neighbors=5)
for index in range(len(all_train_data)):
try:
if all_train_data[index] == None:
continue
lshf.partial_fit(all_train_data[index], all_train_label[index])
except:
continue
# 对于每个人,分别统计准确率
person_find_dic = {} # 准确的个数
person_all_dic = {} # 总的个数
for index in range(len(all_valid_data)):
try:
if all_valid_data[index] == None:
continue
all_find_distance, all_find_index = lshf.kneighbors(all_valid_data[index], n_neighbors=5, return_distance=True)
cos_sim = cosine_similarity(all_valid_data[index], all_train_data[all_find_index[0, 0]])
label = all_train_label[all_find_index[0, 0]]
real_label = all_valid_label[index]
# if cos_sim > sim_threshold:
if True:
if label == real_label:
# f_model.write('0'+'\t'+str(cos_sim)+'\n')
person_find_dic[real_label] = person_find_dic.get(real_label, 0) + 1
person_all_dic[real_label] = person_all_dic.get(real_label, 0) + 1
else:
# f_model.write('1' + '\t' + str(cos_sim) + '\n')
person_all_dic[real_label] = person_all_dic.get(real_label, 0) + 1
except:
print all_valid_label[index]
continue
print 'train_num :', num
for person in person_all_dic:
all_num = person_all_dic[person]
right_num = person_find_dic.get(person, 0)
f.write('\t'.join(map(str, [num, person, (right_num * 1.0 / all_num)]))+'\n')
示例13: lshf_scikit
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def lshf_scikit(data, n_neighbors=4,
n_estimators=10,
min_hash_match=4,
n_candidates=10,
random_state=None):
n_neighbors += 1
# initialize nearest neighbor model
nbrs = LSHForest(n_neighbors=n_neighbors,
n_estimators = 10,
min_hash_match = 4,
n_candidates = 10,
random_state = 0)
# fit nearest neighbor model to the data
nbrs.fit(data)
# return the distances and indices
return nbrs.kneighbors(data)
示例14: test_kneighbors
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_kneighbors():
"""Checks whether desired number of neighbors are returned.
It is guaranteed to return the requested number of neighbors
if `min_hash_match` is set to 0. Returned distances should be
in ascending order.
"""
n_samples = 12
n_features = 2
n_iter = 10
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = LSHForest(min_hash_match=0)
# Test unfitted estimator
assert_raises(ValueError, lshf.kneighbors, X[0])
lshf.fit(X)
for i in range(n_iter):
n_neighbors = rng.randint(0, n_samples)
query = X[rng.randint(0, n_samples)]
neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors,
return_distance=False)
# Desired number of neighbors should be returned.
assert_equal(neighbors.shape[1], n_neighbors)
# Multiple points
n_queries = 5
queries = X[rng.randint(0, n_samples, n_queries)]
distances, neighbors = lshf.kneighbors(queries,
n_neighbors=1,
return_distance=True)
assert_equal(neighbors.shape[0], n_queries)
assert_equal(distances.shape[0], n_queries)
# Test only neighbors
neighbors = lshf.kneighbors(queries, n_neighbors=1,
return_distance=False)
assert_equal(neighbors.shape[0], n_queries)
# Test random point(not in the data set)
query = rng.randn(n_features)
lshf.kneighbors(query, n_neighbors=1,
return_distance=False)
# Test n_neighbors at initialization
neighbors = lshf.kneighbors(query, return_distance=False)
assert_equal(neighbors.shape[1], 5)
# Test `neighbors` has an integer dtype
assert_true(neighbors.dtype.kind == 'i',
msg="neighbors are not in integer dtype.")
示例15: test_distances
# 需要导入模块: from sklearn.neighbors import LSHForest [as 别名]
# 或者: from sklearn.neighbors.LSHForest import kneighbors [as 别名]
def test_distances():
# Checks whether returned neighbors are from closest to farthest.
n_samples = 12
n_features = 2
n_iter = 10
rng = np.random.RandomState(42)
X = rng.rand(n_samples, n_features)
lshf = LSHForest()
ignore_warnings(lshf.fit)(X)
for i in range(n_iter):
n_neighbors = rng.randint(0, n_samples)
query = X[rng.randint(0, n_samples)].reshape(1, -1)
distances, neighbors = lshf.kneighbors(query,
n_neighbors=n_neighbors,
return_distance=True)
# Returned neighbors should be from closest to farthest, that is
# increasing distance values.
assert_true(np.all(np.diff(distances[0]) >= 0))