当前位置: 首页>>代码示例>>Python>>正文


Python NearestNeighbors.fit方法代码示例

本文整理汇总了Python中sklearn.neighbors.NearestNeighbors.fit方法的典型用法代码示例。如果您正苦于以下问题:Python NearestNeighbors.fit方法的具体用法?Python NearestNeighbors.fit怎么用?Python NearestNeighbors.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.neighbors.NearestNeighbors的用法示例。


在下文中一共展示了NearestNeighbors.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_knn_score

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def get_knn_score(data, targetdata, filenames, num=20):
    vectorizer = CountVectorizer()
    tfidfvectorizer = TfidfTransformer()

    counts = vectorizer.fit_transform(data)
    tfidf_data = tfidfvectorizer.fit_transform(counts)

    knn = NearestNeighbors(n_neighbors=num)
    knn.fit(tfidf_data)

    counts = vectorizer.transform(targetdata)
    tfidf_target_data = tfidfvectorizer.transform(counts)

    result = knn.kneighbors(tfidf_target_data)
    score = result[0][0]
    index = result[1][0]

    """
    for i in index.tolist():
        print files[i]
    for i in index.tolist():
    print map(float, score)
    print index.tolist()
    """
    #return index.tolist(), score.tolist()
    for i in index.tolist():
        fname = basename(filenames[i])
        copy(ORI_DIR + fname, TARGET_DIR + fname)
开发者ID:brenden17,项目名称:immi,代码行数:30,代码来源:find_nearest.py

示例2: sample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
    def sample(s):
        if s.data is None:
            raise ValueError('data not loaded.')
        mdl = NearestNeighbors(n_neighbors=s.k, n_jobs=-1)
        minoX = s.X[s.y == s.minolab]
        majX = s.X[s.y == s.majlab]
        mdl.fit(minoX)
        _, nei_table = mdl.kneighbors()

        generated = None
        for cnt, nei_idx in enumerate(nei_table):
            x = minoX[cnt]
            if s.rate >= 0.5 * s.k:
                nei = minoX[np.random.choice(nei_idx, int(s.rate))]
                new = x + np.random.rand(int(s.rate), 1) * (nei - x)

            else:
                nei = minoX[nei_idx]
                new = x + np.random.rand(s.k, 1) * (nei - x)
                # each of the synthesed k points has N/k * 100 % probability to be chosen
                new = new[np.random.rand(s.k) > s.rate * 1.0 / s.k]
            if generated is None:
                generated = new
            else:
                generated = np.vstack((generated, new))
        # number of generated instances
        N = len(generated)
        ret = np.hstack((np.vstack((minoX, generated, majX)),
                         np.array([s.minolab] * (minoX.shape[0] + N) + [s.majlab] * majX.shape[0])[:, None]))
        np.random.shuffle(ret)
        return ret
开发者ID:tianfudhe,项目名称:ids,代码行数:33,代码来源:sampling.py

示例3: resample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
    def resample(self):
        from sklearn.neighbors import NearestNeighbors

        # Start with the minority class
        minx = self.x[self.y == self.minc]
        miny = self.y[self.y == self.minc]

        # Find the NNs for all samples in the data set.
        print("Finding the %i nearest neighbours..." % self.m, end = "")
        NN = NearestNeighbors(n_neighbors = self.m + 1)
        NN.fit(self.x)

        print("done!")

        # Boolean array with True for minority samples in danger
        index = asarray([in_danger(x, self.y, self.m, miny[0], NN) for x in minx])

        # If all minority samples are safe, return the original data set.
        if not any(index):
            print('There are no samples in danger. No borderline synthetic samples created.')
            return self.x, self.y

        # Find the NNs among the minority class
        NN.set_params(**{'n_neighbors' : self.k + 1})
        NN.fit(minx)
        nns = NN.kneighbors(minx[index], return_distance=False)[:, 1:]

        # Create synthetic samples for borderline points.
        sx, sy = make_samples(minx[index], minx, miny[0], nns, int(self.ratio * len(miny)), random_state=self.rs)

        # Concatenate the newly generated samples to the original data set
        ret_x = concatenate((self.x, sx), axis = 0)
        ret_y = concatenate((self.y, sy), axis = 0)

        return ret_x, ret_y
开发者ID:ajcobo,项目名称:tweetclassifier,代码行数:37,代码来源:UnbalancedDataset.py

示例4: resample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
    def resample(self):
        """
        """

        # Start with the minority class
        underx = self.x[self.y == self.minc]
        undery = self.y[self.y == self.minc]

        # Import the k-NN classifier
        from sklearn.neighbors import NearestNeighbors

        # Create a k-NN to fit the whole data
        nn_obj = NearestNeighbors(n_neighbors=self.size_ngh)

        # Fit the whole dataset
        nn_obj.fit(self.x)

        idx_to_exclude = []
        # Loop over the other classes under picking at random
        for key in self.ucd.keys():

            # Get the sample of the current class
            sub_samples_x = self.x[self.y == key]

            # Get the samples associated
            idx_sub_sample = np.nonzero(self.y == key)[0]

            # Find the NN for the current class
            nnhood_idx = nn_obj.kneighbors(sub_samples_x, return_distance=False)

            # Get the label of the corresponding to the index
            nnhood_label = (self.y[nnhood_idx] == key)

            # Check which one are the same label than the current class
            # Make an AND operation through the three neighbours
            nnhood_bool = np.logical_not(np.all(nnhood_label, axis=1))

            # If the minority class remove the majority samples (as in politic!!!! ;))
            if key == self.minc:
                # Get the index to exclude
                idx_to_exclude += nnhood_idx[np.nonzero(nnhood_label[np.nonzero(nnhood_bool)])].tolist()
            else:
                # Get the index to exclude
                idx_to_exclude += idx_sub_sample[np.nonzero(nnhood_bool)].tolist()

        # Create a vector with the sample to select
        sel_idx = np.ones(self.y.shape)
        sel_idx[idx_to_exclude] = 0

        # Get the samples from the majority classes
        sel_x = np.squeeze(self.x[np.nonzero(sel_idx), :])
        sel_y = self.y[np.nonzero(sel_idx)]

        underx = concatenate((underx, sel_x), axis=0)
        undery = concatenate((undery, sel_y), axis=0)

        if self.verbose:
            print("Under-sampling performed: " + str(Counter(undery)))

        return underx, undery
开发者ID:MGolubeva,项目名称:Ubalanced_classes,代码行数:62,代码来源:under_sampling.py

示例5: findKNN

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def findKNN(frequencyVector, newVector):
    samples = np.array(frequencyVector)
    neigh = NearestNeighbors(n_neighbors = 5, metric = "euclidean")
    neigh.fit(samples)
    indexList = neigh.kneighbors(newVector, return_distance = False).tolist()

    return indexList
开发者ID:alynsther,项目名称:RIPSHKCICC,代码行数:9,代码来源:nltkTesting.py

示例6: adasyn_sample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def adasyn_sample(X,Y,minclass,K=5,n=200):
    indices = np.nonzero(Y==minclass)
    Ymin = Y[indices]
    Xmin = X[indices]
    Cmin = len(indices[0])
    Xs = []
    if n > Cmin:
        Xs.append(Xmin)
        n -= len(Ymin)
    else:
        # simple random without replacement undersampling
        return Xmin[random.sample(range(Cmin),n)]
    neigh = NearestNeighbors(n_neighbors=30)
    neigh.fit(X)
    nindices = neigh.kneighbors(Xmin,K,False)
    gamma = [float(sum(Y[i]==minclass))/K for i in nindices]
    gamma = gamma / np.linalg.norm(gamma,ord = 1)
    neigh = NearestNeighbors(n_neighbors=30)
    neigh.fit(Xmin)
    N = np.round(gamma*n).astype(int)
    assert len(N) == Cmin
    for (i,nn) in enumerate(N):
        nindices = neigh.kneighbors(Xmin[i],K,False)[0]
        for j in range(nn):
            alpha = random.random()
            Xnn = X[random.choice(nindices)]
            Xs.append((1.-alpha)*Xmin[i]+alpha*Xnn)
    Xadasyn = sparse.vstack(Xs)  
    return Xadasyn
开发者ID:KenHollandWHY,项目名称:kaggle,代码行数:31,代码来源:utils.py

示例7: removeRedundantFrames

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
	def removeRedundantFrames(self):
		h, w, d = self.keyframes[0].shape
		n = len(self.keyframes)
		frames = np.zeros((n, 256))
		self.frameHistFeats
		for i, kf in enumerate(self.keyframes):
			frames[i] = tools.getColorHist(kf).ravel()
		
		k = int(np.sqrt(n))
		kmeans = KMeans(k)
		print("Clustering frames into {0} code vectors.".format(k))
		kmeans.fit(self.frameHistFeats)

		bestFrameIndices = []
		bestFrames = []
		NN = NearestNeighbors(1)
		NN.fit(frames)
		centers = kmeans.cluster_centers_
		for center in centers:
			nearest = NN.kneighbors(center, return_distance=False)
			bestFrameIndices.append(nearest[0])
		bestFrameIndices.sort()
		for i in bestFrameIndices:
			bestFrames.append(self.keyframes[i])
		return bestFrames
开发者ID:kaledj,项目名称:YTKeyframes,代码行数:27,代码来源:kfextractor.py

示例8: select

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
	def select(self, x_test, metric='minkowski', p=2):
		"""
		Dynamically select classifiers in the pool relatively to a test
		pattern x_test

		Parameters
		----------
		x_test : test pattern 

		Returns
		----------
		best classifier : best classifier according to the dynamic selection scheme.
		"""
		pool = self.ensemble_clf.estimators_
		if not pool:
			raise ValueError("Fit the ensemble methiod before throwing it to \
							  the dynamic selection algorithm")

		predicted_labels = [clf.predict(x_test.reshape(1, -1)) for clf in pool]

		if len(np.unique(predicted_labels)) == 1:
			# All the classifiers agree on the predicted class
			return pool[0]
		else:
			knn = NearestNeighbors(n_neighbors=self.knn, metric=metric, p=p)
			knn.fit(self.X_val)
			iknn = knn.kneighbors(x_test.reshape(1, -1), return_distance=False)[0]
			X_knn, y_knn = self.X_val[iknn], self.y_val[iknn]

			accuracies = [accuracy_score(clf.predict(X_knn), y_knn) \
						  for clf in pool]
			i_best = np.argmax(accuracies)

			return pool[i_best]
开发者ID:naranil,项目名称:ensemble_pruning,代码行数:36,代码来源:local_accuracy.py

示例9: load_data_with_SMOTE

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def load_data_with_SMOTE():
    rawdata = read_file()
    size = 150
    small = rawdata[rawdata['class'] == 'B']
    n_sample = small.shape[0]
    idx = np.random.randint(0, n_sample, size)
    X = small.iloc[idx, range(1, 5)].values
    y = small.iloc[idx, 0].values
    knn = NearestNeighbors(n_neighbors=2)
    knn.fit(X)
    _d, i = knn.kneighbors(X)
    idx2 = i[:, 1]
    diff = X - X[idx2]
    X = X + np.random.random(4) * diff
    B = np.concatenate([np.transpose(y[np.newaxis]), X], axis=1)
    B = pd.DataFrame(B)

    n_sample = rawdata[rawdata['class'] == 'L'].shape[0]
    idx = np.random.randint(0, n_sample, size)
    L = rawdata[rawdata['class'] == 'L'].iloc[idx]

    n_sample = rawdata[rawdata['class'] == 'R'].shape[0]
    idx = np.random.randint(0, n_sample, size)
    R = rawdata[rawdata['class'] == 'R'].iloc[idx]

    d = np.concatenate([B.values, L.values, R.values])

    le = LabelEncoder()
    X = d[:, 1:5]
    y = le.fit_transform(d[:, 0])
    return X, y
开发者ID:brenden17,项目名称:imbalanced-data,代码行数:33,代码来源:imbalanceddata.py

示例10: draw_voronoi

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def draw_voronoi(ax,rrt):
    xr = ax.get_xlim()
    yr = ax.get_xlim()
    
    xres = 500
    yres = 500

    xs= np.linspace(xr[0],xr[1],xres)
    ys= np.linspace(yr[0],yr[1],yres)
    
    grid = np.array(np.meshgrid(xs,ys))
    grid = grid.reshape((2,-1))
    grid = grid.T
    #grid is a 2-by-(xres * yres) array. we want the nearest node for each of those (xres*yres) points

    from sklearn.neighbors import NearestNeighbors
    nn = NearestNeighbors(algorithm='kd_tree',n_neighbors=1)
    nodes = np.array(rrt.tree.nodes())
    states = np.array([rrt.tree.node[i]['state'] for i in nodes])
    nn.fit(states,nodes)
    nn_res = nn.kneighbors(grid,return_distance=False)
    nn_res = nn_res.reshape((xres,yres))
    nn_res = np.array(nn_res,dtype=np.float)
    
    #ns_res is an xres-by-yres array and contains the node-id of the nearest neighbor at each [i,j]
    print 'regions' , np.unique(nn_res)
    if np.max(nn_res)> 0: nn_res /= float(np.max(nn_res))   #normalize for color map. this is a stupid way to assign colors to regions
    ax.imshow(nn_res,origin='lower',extent=[xr[0],xr[1],yr[0],yr[1]],alpha=.5,zorder=2,cmap=mpl.cm.get_cmap(name='prism'))    
    ax.figure.canvas.draw()
开发者ID:goretkin,项目名称:kinodyn,代码行数:31,代码来源:rrt_2d_example.py

示例11: SimilaritySearch

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
class SimilaritySearch():

    def euclidean(self, x, y):
        return np.sum((x-y)**2)
    
    #no normalization
    def intersection(self, x, y):
        return np.sum(x) - np.sum(np.minimum(x,y))

    def __init__(self, k=C.KNN_DEFAULT, data=None, labels=None):
        self.knn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree', metric='pyfunc', func=self.intersection)
        #self.knn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree', metric='minkowski')
        self.k = k
        if not data is None:
            self.data = data
            self.labels = labels
            self.train()
        else:
            self.data = []
            self.labels = []

    def addHistogram(self, hist, label):
        self.data.append(hist)
        self.labels.append(label)

    def train(self):
        self.knn.fit(np.array(self.data), np.array(self.labels))
    
    def findk(self, hist):
        dist, neigh = self.knn.kneighbors(np.array([hist]))
        return neigh, dist
开发者ID:maeotaku,项目名称:leaf_recognition_sdk,代码行数:33,代码来源:SimilaritySearch.py

示例12: rrt

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
 def rrt(self):
     """
     Basic RRT Algorithm
     """
     probot = np.array([self._robot_pose.pose.position.x,self._robot_pose.pose.position.y])
     V = [probot]
     E = {}
     nbrs = NearestNeighbors(n_neighbors=1)
     nbrs.fit(V)
     t1 = time.time()
     rrt_iter = 0
     while rrt_iter < self._max_rrt_iterations:
         prand = self.sample_free_uniform()
         (dist, idx) = nbrs.kneighbors(prand)
         idx = idx.flatten(1)[0]
         if dist < self._rrt_eta:
             pnew = prand
         else:
             pnew = self.steer(V[idx], prand)
         if self.segment_safe(V[idx],pnew) is True:
             if E.has_key(idx):
                 E[idx].append(len(V))
             else:
                 E[idx] = [len(V)]
             V.append(pnew)
             nbrs.fit(V)
         rrt_iter += 1
     print 'total time: ', time.time()-t1
     self.publish_rrt(V,E) 
开发者ID:jmessias,项目名称:monarch_active_perception,代码行数:31,代码来源:motion_planner_sam.py

示例13: on_mouse_move

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
    def on_mouse_move(self, event):
        # add the knn scheme to decide selected region when moving mouse
        super(ScatterSelectionToolbar, self).on_mouse_move(event=event)

        if SKLEARN_INSTALLED:
            if event.button == 1 and event.is_dragging and self.mode is 'point':
                visible_data, visual = self.get_visible_data()
                data = self.get_map_data()

                visible_data = np.nan_to_num(visible_data)

                # calculate the threshold and call draw visual
                width = event.pos[0] - self.selection_origin[0]
                height = event.pos[1] - self.selection_origin[1]
                drag_distance = math.sqrt(width**2+height**2)
                canvas_diag = math.sqrt(self._vispy_widget.canvas.size[0]**2
                                        + self._vispy_widget.canvas.size[1]**2)

                # neighbor num proportioned to mouse moving distance
                n_neighbors = drag_distance / canvas_diag * visible_data[0].data.shape[0]
                neigh = NearestNeighbors(n_neighbors=n_neighbors)
                neigh.fit(data)
                select_index = neigh.kneighbors([self.selection_origin])[1]

                mask = np.zeros(visible_data[0].data.shape)
                mask[select_index] = 1
                self.mark_selected(mask, visible_data)
开发者ID:pllim,项目名称:glue-3d-viewer,代码行数:29,代码来源:scatter_toolbar.py

示例14: pts_to_surface

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
def pts_to_surface(skel, im_depth, thresh=10):
	'''
	Ensures that the joint positions lie within the silhouette of the person
	---Parameters---
	skel : should be in image coordinates
	im_depth : should be masked
	thresh : if distance is too large, don't move!
	---Return---
	The same skeleton where all joints are within the mask
	'''

	height, width = im_depth.shape
	skel = np.array([[max(min(p[0], width-1), 0), max(min(p[1], height-1), 0), p[2]] for p in skel] )
	out_of_bounds = np.where(np.array([im_depth[p[1],p[0]] for p in skel]) == 0)[0]

	# embed()
	# If pixel if outside of mask, find the closest 'in' neighbor
	if len(out_of_bounds) > 0:
		from sklearn.neighbors import NearestNeighbors
		NN = NearestNeighbors(n_neighbors=1)
		inds = np.array(np.nonzero(im_depth)).T
		NN.fit(inds)

		for i in out_of_bounds:
			pos = skel[i]
			closest_ind = NN.kneighbors([pos[1],pos[0]], 1, return_distance=False)[0]
			closest_pos = inds[closest_ind][0]
			if np.linalg.norm(closest_pos[:2]-pos[:2], 2) < thresh:
				skel[i][0] = closest_pos[1]
				skel[i][1] = closest_pos[0]
			else:
				skel[i][0] = 0
				skel[i][1] = 0

	return skel
开发者ID:MerDane,项目名称:pyKinectTools,代码行数:37,代码来源:RandomForestPose.py

示例15: ContentBased

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import fit [as 别名]
class ContentBased(object):
    """
    Modelo de recomendación de articulos basados en los tags con mas relevancia de cada uno de ellos.
    El modelo vectoriza cada articulo para poder calcular la similitud entre cada uno de ellos. 
    """
    def __init__(self, stop_words=None, token_pattern=None, metric='cosine', n_neighbors=5):
        if stop_words is None:
            stop_words =  stopwords.words("english")
            
        if token_pattern is None:
            token_pattern = '(?u)\\b[a-zA-Z]\\w\\w+\\b'
            
        self.tfidf_vectorizer = TfidfVectorizer(stop_words=stop_words, token_pattern=token_pattern)
        self.nearest_neigbors = NearestNeighbors(metric=metric, n_neighbors=n_neighbors, algorithm='brute')
        
    def fit(self, datos, columna_descripcion):
        """
        Entrenamos el modelo:
        1/ Vectorizacion de cada articulo (Extracción y ponderación de atributos)
        2/ Calculamos los articulos mas cercanos
        """
        self.datos = datos
        datos_por_tags = self.tfidf_vectorizer.fit_transform(datos[columna_descripcion])        
        self.nearest_neigbors.fit(datos_por_tags)
        
    def predict(self, descripcion):
        """
        Devuelve los articulos mas parecidos a la descripcion propuesta
        """
        descripcion_tags = self.tfidf_vectorizer.transform(descripcion)        
        if descripcion_tags.sum() == 0:
            return pd.DataFrame(columns=self.datos.columns)
        else:
            _, indices = self.nearest_neigbors.kneighbors(descripcion_tags)
            return self.datos.iloc[indices[0], :]
开发者ID:pvalienteverde,项目名称:ElCuadernillo,代码行数:37,代码来源:ContendBased.py


注:本文中的sklearn.neighbors.NearestNeighbors.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。