本文整理汇总了Python中sklearn.neighbors.NearestNeighbors.fit方法的典型用法代码示例。如果您正苦于以下问题:Python NearestNeighbors.fit方法的具体用法?Python NearestNeighbors.fit怎么用?Python NearestNeighbors.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.NearestNeighbors
的用法示例。
在下文中一共展示了NearestNeighbors.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_knn_score
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def get_knn_score(data, targetdata, filenames, num=20):
vectorizer = CountVectorizer()
tfidfvectorizer = TfidfTransformer()
counts = vectorizer.fit_transform(data)
tfidf_data = tfidfvectorizer.fit_transform(counts)
knn = NearestNeighbors(n_neighbors=num)
knn.fit(tfidf_data)
counts = vectorizer.transform(targetdata)
tfidf_target_data = tfidfvectorizer.transform(counts)
result = knn.kneighbors(tfidf_target_data)
score = result[0][0]
index = result[1][0]
"""
for i in index.tolist():
print files[i]
for i in index.tolist():
print map(float, score)
print index.tolist()
"""
#return index.tolist(), score.tolist()
for i in index.tolist():
fname = basename(filenames[i])
copy(ORI_DIR + fname, TARGET_DIR + fname)
示例2: sample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def sample(s):
if s.data is None:
raise ValueError('data not loaded.')
mdl = NearestNeighbors(n_neighbors=s.k, n_jobs=-1)
minoX = s.X[s.y == s.minolab]
majX = s.X[s.y == s.majlab]
mdl.fit(minoX)
_, nei_table = mdl.kneighbors()
generated = None
for cnt, nei_idx in enumerate(nei_table):
x = minoX[cnt]
if s.rate >= 0.5 * s.k:
nei = minoX[np.random.choice(nei_idx, int(s.rate))]
new = x + np.random.rand(int(s.rate), 1) * (nei - x)
else:
nei = minoX[nei_idx]
new = x + np.random.rand(s.k, 1) * (nei - x)
# each of the synthesed k points has N/k * 100 % probability to be chosen
new = new[np.random.rand(s.k) > s.rate * 1.0 / s.k]
if generated is None:
generated = new
else:
generated = np.vstack((generated, new))
# number of generated instances
N = len(generated)
ret = np.hstack((np.vstack((minoX, generated, majX)),
np.array([s.minolab] * (minoX.shape[0] + N) + [s.majlab] * majX.shape[0])[:, None]))
np.random.shuffle(ret)
return ret
示例3: resample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def resample(self):
from sklearn.neighbors import NearestNeighbors
# Start with the minority class
minx = self.x[self.y == self.minc]
miny = self.y[self.y == self.minc]
# Find the NNs for all samples in the data set.
print("Finding the %i nearest neighbours..." % self.m, end = "")
NN = NearestNeighbors(n_neighbors = self.m + 1)
NN.fit(self.x)
print("done!")
# Boolean array with True for minority samples in danger
index = asarray([in_danger(x, self.y, self.m, miny[0], NN) for x in minx])
# If all minority samples are safe, return the original data set.
if not any(index):
print('There are no samples in danger. No borderline synthetic samples created.')
return self.x, self.y
# Find the NNs among the minority class
NN.set_params(**{'n_neighbors' : self.k + 1})
NN.fit(minx)
nns = NN.kneighbors(minx[index], return_distance=False)[:, 1:]
# Create synthetic samples for borderline points.
sx, sy = make_samples(minx[index], minx, miny[0], nns, int(self.ratio * len(miny)), random_state=self.rs)
# Concatenate the newly generated samples to the original data set
ret_x = concatenate((self.x, sx), axis = 0)
ret_y = concatenate((self.y, sy), axis = 0)
return ret_x, ret_y
示例4: resample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def resample(self):
"""
"""
# Start with the minority class
underx = self.x[self.y == self.minc]
undery = self.y[self.y == self.minc]
# Import the k-NN classifier
from sklearn.neighbors import NearestNeighbors
# Create a k-NN to fit the whole data
nn_obj = NearestNeighbors(n_neighbors=self.size_ngh)
# Fit the whole dataset
nn_obj.fit(self.x)
idx_to_exclude = []
# Loop over the other classes under picking at random
for key in self.ucd.keys():
# Get the sample of the current class
sub_samples_x = self.x[self.y == key]
# Get the samples associated
idx_sub_sample = np.nonzero(self.y == key)[0]
# Find the NN for the current class
nnhood_idx = nn_obj.kneighbors(sub_samples_x, return_distance=False)
# Get the label of the corresponding to the index
nnhood_label = (self.y[nnhood_idx] == key)
# Check which one are the same label than the current class
# Make an AND operation through the three neighbours
nnhood_bool = np.logical_not(np.all(nnhood_label, axis=1))
# If the minority class remove the majority samples (as in politic!!!! ;))
if key == self.minc:
# Get the index to exclude
idx_to_exclude += nnhood_idx[np.nonzero(nnhood_label[np.nonzero(nnhood_bool)])].tolist()
else:
# Get the index to exclude
idx_to_exclude += idx_sub_sample[np.nonzero(nnhood_bool)].tolist()
# Create a vector with the sample to select
sel_idx = np.ones(self.y.shape)
sel_idx[idx_to_exclude] = 0
# Get the samples from the majority classes
sel_x = np.squeeze(self.x[np.nonzero(sel_idx), :])
sel_y = self.y[np.nonzero(sel_idx)]
underx = concatenate((underx, sel_x), axis=0)
undery = concatenate((undery, sel_y), axis=0)
if self.verbose:
print("Under-sampling performed: " + str(Counter(undery)))
return underx, undery
示例5: findKNN
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def findKNN(frequencyVector, newVector):
samples = np.array(frequencyVector)
neigh = NearestNeighbors(n_neighbors = 5, metric = "euclidean")
neigh.fit(samples)
indexList = neigh.kneighbors(newVector, return_distance = False).tolist()
return indexList
示例6: adasyn_sample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def adasyn_sample(X,Y,minclass,K=5,n=200):
indices = np.nonzero(Y==minclass)
Ymin = Y[indices]
Xmin = X[indices]
Cmin = len(indices[0])
Xs = []
if n > Cmin:
Xs.append(Xmin)
n -= len(Ymin)
else:
# simple random without replacement undersampling
return Xmin[random.sample(range(Cmin),n)]
neigh = NearestNeighbors(n_neighbors=30)
neigh.fit(X)
nindices = neigh.kneighbors(Xmin,K,False)
gamma = [float(sum(Y[i]==minclass))/K for i in nindices]
gamma = gamma / np.linalg.norm(gamma,ord = 1)
neigh = NearestNeighbors(n_neighbors=30)
neigh.fit(Xmin)
N = np.round(gamma*n).astype(int)
assert len(N) == Cmin
for (i,nn) in enumerate(N):
nindices = neigh.kneighbors(Xmin[i],K,False)[0]
for j in range(nn):
alpha = random.random()
Xnn = X[random.choice(nindices)]
Xs.append((1.-alpha)*Xmin[i]+alpha*Xnn)
Xadasyn = sparse.vstack(Xs)
return Xadasyn
示例7: removeRedundantFrames
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def removeRedundantFrames(self):
h, w, d = self.keyframes[0].shape
n = len(self.keyframes)
frames = np.zeros((n, 256))
self.frameHistFeats
for i, kf in enumerate(self.keyframes):
frames[i] = tools.getColorHist(kf).ravel()
k = int(np.sqrt(n))
kmeans = KMeans(k)
print("Clustering frames into {0} code vectors.".format(k))
kmeans.fit(self.frameHistFeats)
bestFrameIndices = []
bestFrames = []
NN = NearestNeighbors(1)
NN.fit(frames)
centers = kmeans.cluster_centers_
for center in centers:
nearest = NN.kneighbors(center, return_distance=False)
bestFrameIndices.append(nearest[0])
bestFrameIndices.sort()
for i in bestFrameIndices:
bestFrames.append(self.keyframes[i])
return bestFrames
示例8: select
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def select(self, x_test, metric='minkowski', p=2):
"""
Dynamically select classifiers in the pool relatively to a test
pattern x_test
Parameters
----------
x_test : test pattern
Returns
----------
best classifier : best classifier according to the dynamic selection scheme.
"""
pool = self.ensemble_clf.estimators_
if not pool:
raise ValueError("Fit the ensemble methiod before throwing it to \
the dynamic selection algorithm")
predicted_labels = [clf.predict(x_test.reshape(1, -1)) for clf in pool]
if len(np.unique(predicted_labels)) == 1:
# All the classifiers agree on the predicted class
return pool[0]
else:
knn = NearestNeighbors(n_neighbors=self.knn, metric=metric, p=p)
knn.fit(self.X_val)
iknn = knn.kneighbors(x_test.reshape(1, -1), return_distance=False)[0]
X_knn, y_knn = self.X_val[iknn], self.y_val[iknn]
accuracies = [accuracy_score(clf.predict(X_knn), y_knn) \
for clf in pool]
i_best = np.argmax(accuracies)
return pool[i_best]
示例9: load_data_with_SMOTE
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def load_data_with_SMOTE():
rawdata = read_file()
size = 150
small = rawdata[rawdata['class'] == 'B']
n_sample = small.shape[0]
idx = np.random.randint(0, n_sample, size)
X = small.iloc[idx, range(1, 5)].values
y = small.iloc[idx, 0].values
knn = NearestNeighbors(n_neighbors=2)
knn.fit(X)
_d, i = knn.kneighbors(X)
idx2 = i[:, 1]
diff = X - X[idx2]
X = X + np.random.random(4) * diff
B = np.concatenate([np.transpose(y[np.newaxis]), X], axis=1)
B = pd.DataFrame(B)
n_sample = rawdata[rawdata['class'] == 'L'].shape[0]
idx = np.random.randint(0, n_sample, size)
L = rawdata[rawdata['class'] == 'L'].iloc[idx]
n_sample = rawdata[rawdata['class'] == 'R'].shape[0]
idx = np.random.randint(0, n_sample, size)
R = rawdata[rawdata['class'] == 'R'].iloc[idx]
d = np.concatenate([B.values, L.values, R.values])
le = LabelEncoder()
X = d[:, 1:5]
y = le.fit_transform(d[:, 0])
return X, y
示例10: draw_voronoi
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def draw_voronoi(ax,rrt):
xr = ax.get_xlim()
yr = ax.get_xlim()
xres = 500
yres = 500
xs= np.linspace(xr[0],xr[1],xres)
ys= np.linspace(yr[0],yr[1],yres)
grid = np.array(np.meshgrid(xs,ys))
grid = grid.reshape((2,-1))
grid = grid.T
#grid is a 2-by-(xres * yres) array. we want the nearest node for each of those (xres*yres) points
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(algorithm='kd_tree',n_neighbors=1)
nodes = np.array(rrt.tree.nodes())
states = np.array([rrt.tree.node[i]['state'] for i in nodes])
nn.fit(states,nodes)
nn_res = nn.kneighbors(grid,return_distance=False)
nn_res = nn_res.reshape((xres,yres))
nn_res = np.array(nn_res,dtype=np.float)
#ns_res is an xres-by-yres array and contains the node-id of the nearest neighbor at each [i,j]
print 'regions' , np.unique(nn_res)
if np.max(nn_res)> 0: nn_res /= float(np.max(nn_res)) #normalize for color map. this is a stupid way to assign colors to regions
ax.imshow(nn_res,origin='lower',extent=[xr[0],xr[1],yr[0],yr[1]],alpha=.5,zorder=2,cmap=mpl.cm.get_cmap(name='prism'))
ax.figure.canvas.draw()
示例11: SimilaritySearch
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
class SimilaritySearch():
def euclidean(self, x, y):
return np.sum((x-y)**2)
#no normalization
def intersection(self, x, y):
return np.sum(x) - np.sum(np.minimum(x,y))
def __init__(self, k=C.KNN_DEFAULT, data=None, labels=None):
self.knn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree', metric='pyfunc', func=self.intersection)
#self.knn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree', metric='minkowski')
self.k = k
if not data is None:
self.data = data
self.labels = labels
self.train()
else:
self.data = []
self.labels = []
def addHistogram(self, hist, label):
self.data.append(hist)
self.labels.append(label)
def train(self):
self.knn.fit(np.array(self.data), np.array(self.labels))
def findk(self, hist):
dist, neigh = self.knn.kneighbors(np.array([hist]))
return neigh, dist
示例12: rrt
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def rrt(self):
"""
Basic RRT Algorithm
"""
probot = np.array([self._robot_pose.pose.position.x,self._robot_pose.pose.position.y])
V = [probot]
E = {}
nbrs = NearestNeighbors(n_neighbors=1)
nbrs.fit(V)
t1 = time.time()
rrt_iter = 0
while rrt_iter < self._max_rrt_iterations:
prand = self.sample_free_uniform()
(dist, idx) = nbrs.kneighbors(prand)
idx = idx.flatten(1)[0]
if dist < self._rrt_eta:
pnew = prand
else:
pnew = self.steer(V[idx], prand)
if self.segment_safe(V[idx],pnew) is True:
if E.has_key(idx):
E[idx].append(len(V))
else:
E[idx] = [len(V)]
V.append(pnew)
nbrs.fit(V)
rrt_iter += 1
print 'total time: ', time.time()-t1
self.publish_rrt(V,E)
示例13: on_mouse_move
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def on_mouse_move(self, event):
# add the knn scheme to decide selected region when moving mouse
super(ScatterSelectionToolbar, self).on_mouse_move(event=event)
if SKLEARN_INSTALLED:
if event.button == 1 and event.is_dragging and self.mode is 'point':
visible_data, visual = self.get_visible_data()
data = self.get_map_data()
visible_data = np.nan_to_num(visible_data)
# calculate the threshold and call draw visual
width = event.pos[0] - self.selection_origin[0]
height = event.pos[1] - self.selection_origin[1]
drag_distance = math.sqrt(width**2+height**2)
canvas_diag = math.sqrt(self._vispy_widget.canvas.size[0]**2
+ self._vispy_widget.canvas.size[1]**2)
# neighbor num proportioned to mouse moving distance
n_neighbors = drag_distance / canvas_diag * visible_data[0].data.shape[0]
neigh = NearestNeighbors(n_neighbors=n_neighbors)
neigh.fit(data)
select_index = neigh.kneighbors([self.selection_origin])[1]
mask = np.zeros(visible_data[0].data.shape)
mask[select_index] = 1
self.mark_selected(mask, visible_data)
示例14: pts_to_surface
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def pts_to_surface(skel, im_depth, thresh=10):
'''
Ensures that the joint positions lie within the silhouette of the person
---Parameters---
skel : should be in image coordinates
im_depth : should be masked
thresh : if distance is too large, don't move!
---Return---
The same skeleton where all joints are within the mask
'''
height, width = im_depth.shape
skel = np.array([[max(min(p[0], width-1), 0), max(min(p[1], height-1), 0), p[2]] for p in skel] )
out_of_bounds = np.where(np.array([im_depth[p[1],p[0]] for p in skel]) == 0)[0]
# embed()
# If pixel if outside of mask, find the closest 'in' neighbor
if len(out_of_bounds) > 0:
from sklearn.neighbors import NearestNeighbors
NN = NearestNeighbors(n_neighbors=1)
inds = np.array(np.nonzero(im_depth)).T
NN.fit(inds)
for i in out_of_bounds:
pos = skel[i]
closest_ind = NN.kneighbors([pos[1],pos[0]], 1, return_distance=False)[0]
closest_pos = inds[closest_ind][0]
if np.linalg.norm(closest_pos[:2]-pos[:2], 2) < thresh:
skel[i][0] = closest_pos[1]
skel[i][1] = closest_pos[0]
else:
skel[i][0] = 0
skel[i][1] = 0
return skel
示例15: ContentBased
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
class ContentBased(object):
"""
Modelo de recomendación de articulos basados en los tags con mas relevancia de cada uno de ellos.
El modelo vectoriza cada articulo para poder calcular la similitud entre cada uno de ellos.
"""
def __init__(self, stop_words=None, token_pattern=None, metric='cosine', n_neighbors=5):
if stop_words is None:
stop_words = stopwords.words("english")
if token_pattern is None:
token_pattern = '(?u)\\b[a-zA-Z]\\w\\w+\\b'
self.tfidf_vectorizer = TfidfVectorizer(stop_words=stop_words, token_pattern=token_pattern)
self.nearest_neigbors = NearestNeighbors(metric=metric, n_neighbors=n_neighbors, algorithm='brute')
def fit(self, datos, columna_descripcion):
"""
Entrenamos el modelo:
1/ Vectorizacion de cada articulo (Extracción y ponderación de atributos)
2/ Calculamos los articulos mas cercanos
"""
self.datos = datos
datos_por_tags = self.tfidf_vectorizer.fit_transform(datos[columna_descripcion])
self.nearest_neigbors.fit(datos_por_tags)
def predict(self, descripcion):
"""
Devuelve los articulos mas parecidos a la descripcion propuesta
"""
descripcion_tags = self.tfidf_vectorizer.transform(descripcion)
if descripcion_tags.sum() == 0:
return pd.DataFrame(columns=self.datos.columns)
else:
_, indices = self.nearest_neigbors.kneighbors(descripcion_tags)
return self.datos.iloc[indices[0], :]