当前位置: 首页>>代码示例>>Python>>正文


Python NearestNeighbors.kneighbors方法代码示例

本文整理汇总了Python中sklearn.neighbors.NearestNeighbors.kneighbors方法的典型用法代码示例。如果您正苦于以下问题:Python NearestNeighbors.kneighbors方法的具体用法?Python NearestNeighbors.kneighbors怎么用?Python NearestNeighbors.kneighbors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.neighbors.NearestNeighbors的用法示例。


在下文中一共展示了NearestNeighbors.kneighbors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class KDTrees:

    def __init__(self, nb_neighbours, leaf_size):
        self.nbrs = NearestNeighbors(n_neighbors=nb_neighbours, algorithm='ball_tree', metric = 'haversine', leaf_size=leaf_size)
    # Compute distance in time between two points on the map
    def mapDistance(self, x, y):
        if (len(x) > 2):
            return np.sum((x - y) ** 2)
        else:
            if(x[0] < y[0]):
                tmp = y
                y = x
                x = tmp
            pos1 = str(x[0]) + ", " + str(x[1])
            pos2 = str(y[0]) + ", " + str(y[1])
            timestamp = datetime.now()
            sec_to_add = 32 * 3600 + (timestamp - datetime(1970, 1, 1)).total_seconds() - 2*3600 - timestamp.hour * 3600 - timestamp.minute * 60 - timestamp.second
            traject = gmaps.directions(pos1, pos2, mode="transit", departure_time=timestamp.fromtimestamp(sec_to_add))
            try:
                print 'ok'
                return (traject[0]["legs"][0]["arrival_time"]["value"] - traject[0]["legs"][0]["departure_time"]["value"])
            except:
                print 'bug'
                return 1000000000


    def addPoints(self, points):
        self.nbrs.fit(points)

    def getNeighbours(self, points):
        self.nbrs.kneighbors(points)
开发者ID:AWehenkel,项目名称:Hive,代码行数:33,代码来源:KDTree.py

示例2: eucl_distance

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def eucl_distance(a, b):
    nbrs_a = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(a) if a.size > 0 else None
    nbrs_b = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(b) if b.size > 0 else None
    distances_a, _ = nbrs_a.kneighbors(b) if nbrs_a and b.size > 0 else ([np.inf], None)
    distances_b, _ = nbrs_b.kneighbors(a) if nbrs_b and a.size > 0 else ([np.inf], None)

    return [distances_a, distances_b]
开发者ID:marianocabezas,项目名称:data_manipulation,代码行数:9,代码来源:metrics.py

示例3: on_pick

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
 def on_pick(self, event):
     ind = event.ind[0]
     arty = event.artist
     for key in nld.layers.keys():
         layer = nld.layers[key]
         for plot in layer.plots:
             if plot is arty:
                 self.neighb_sec = key
                 break
     nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1).fit(X)
     distances, indices = nbrs.kneighbors(X)
     #nbrs.fit(X)
     #W = barycenter_kneighbors_graph(
     #    nbrs, n_neighbors=50, reg=1e-3, n_jobs=1)
     #knn = kneighbors_graph(X, 10).to_array()
     try:
         self.scatters.remove()
         self.two_scatters.remove()
     except:
         pass
     self.points = indices[ind]
     neighb_layer = nld.get_layer(self.neighb_sec)
     self.points = X[self.points]
     # self.points = [neighb_layer.x_data[0][ind], neighb_layer.y_data[0][self.points], neighb_layer.z_data[0][self.points]]
     section_num = int(self.neighb_sec[-1])
     section_ax = self.fig.get_axes()[section_num + 1]
     section_layer = section_ax.get_layer(section_ax.title._text + ' proj')
     self.scatters = nld.scatter(self.points[:, 0], self.points[:, 1], self.points[:, 2], c='yellow', s=80)
     two_mat = np.column_stack((section_layer.x_data[0], section_layer.y_data[0]))
     two_nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1).fit(two_mat)
     two_dists, two_inds = two_nbrs.kneighbors(two_mat)
     two_points = two_mat[two_inds[ind]] 
     self.two_scatters = section_ax.scatter(two_points[:, 0], two_points[:, 1], c='green', s=80)
开发者ID:mcneela,项目名称:Retina,代码行数:35,代码来源:swiss_roll2.py

示例4: k_nearest_neighbors_scores

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def k_nearest_neighbors_scores(k, eng_vec_dict, fr_vec_dict):
	eng_mat, fr_mat, index_map = build_parallel_mats_from_dicts(eng_vec_dict, fr_vec_dict, translation_dict)
	# k + 1 since we discard the top neighbor, which is itself
	neighbors_en = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(eng_mat)
	dist_en, indices_en = neighbors_en.kneighbors(eng_mat)
	neighbors_fr = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(fr_mat)
	dist_fr, indices_fr = neighbors_fr.kneighbors(fr_mat)
	# since we built the matrices in parallel, we know now that indices map to each other,
	# so we simply check the overlap of those to calculate precision and recall. 
	# calculate avg recall for k-recall
	avg_recall = 0.
	num_points = len(indices_en) + 0.
	knearest_map_en = dict()
	knearest_map_fr = dict()
	for i in range(0, int(num_points)):
		w_en = index_map[i][0]
		w_fr = index_map[i][1]
		index_set_en = set(indices_en[i][1:]) # should be size k
		index_set_fr = set(indices_fr[i][1:]) # should be size k
		if w_en not in knearest_map_en:
			knearest_map_en[w_en] = map(lambda z: index_map[z], index_set_en)
		if w_fr not in knearest_map_fr:
			knearest_map_fr[w_fr] = map(lambda z: index_map[z], index_set_fr)
		recall_count = sum(1 for i in index_set_fr if i in index_set_en)
		# precision = recall for this task
		recall = (recall_count + 0.)/len(index_set_en)
		avg_recall += recall
	return (avg_recall/num_points), knearest_map_en, knearest_map_fr
开发者ID:kiranvodrahalli,项目名称:hebb_vectors,代码行数:30,代码来源:analysis.py

示例5: estimator_knn_cv

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def estimator_knn_cv(X, y, clf, n_neigh):
    neigh = NearestNeighbors(n_neigh, metric="euclidean", algorithm="brute")
    neigh_est = NearestNeighbors(n_neigh, metric="manhattan", algorithm="brute")
    acc = []
    for train, test in StratifiedKFold(y, 5):
        X_train = X[train]
        y_train = y[train]
        X_test = X[test]
        y_test = y[test]
        clf.fit(X_train, y_train)
        estimators = clf.estimators_
        preds_train = np.array(map(lambda e: e.predict(X_train), estimators)).T
        preds_test = np.array(map(lambda e: e.predict(X_test), estimators)).T
        preds_train_proba = np.array(map(lambda e: e.predict_proba(X_train), estimators))
        preds_test_proba = np.array(map(lambda e: e.predict_proba(X_test), estimators))
        p_train = preds_train_proba.swapaxes(0, 1)[:, :, 0]
        p_test = preds_test_proba.swapaxes(0, 1)[:, :, 0]
        neigh.fit(X_train)
        dist, knn = neigh.kneighbors(X_test)
        neigh_est.fit(preds_train)
        dist, knn_est = neigh_est.kneighbors(preds_test)
        # neigh_est.fit(p_train);dist, knn_est = neigh_est.kneighbors(p_test)
        knn_combined_uniq = np.array(map(np.unique, np.hstack((knn[:, :30], knn_est[:, :30]))))
        pp_uniq = np.array([stats.mode(y_train[nn])[0][0] for nn in knn_combined_uniq])
        # pp_uniq = np.array([stats.mode(y_train[nn])[0][0] for nn in knn[:,:30]])
        preds_test_est_knn = np.array(
            [[stats.mode(y_train[nn])[0][0] for nn in knn_est[:, :i]] for i in xrange(1, n_neigh, 2)]
        )
        acc.append(
            [accuracy_score(y_test, pred) for pred in np.vstack((preds_test_est_knn, clf.predict(X_test), pp_uniq))]
        )
    mean_acc = np.mean(acc, axis=0)
    print " ".join("{:.3f}".format(v) for v in mean_acc), " max:{:.3f}".format(mean_acc.max())
开发者ID:hippozhu,项目名称:dcs,代码行数:35,代码来源:dcs_rank.py

示例6: resample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
    def resample(self):
        # Start with the minority class
        minx = self.x[self.y == self.minc]
        miny = self.y[self.y == self.minc]

        # Finding nns
        # Import the k-NN classifier
        from sklearn.neighbors import NearestNeighbors

        nearest_neighbour = NearestNeighbors(n_neighbors=self.k + 1)
        nearest_neighbour.fit(minx)
        nns = nearest_neighbour.kneighbors(minx, return_distance=False)[:, 1:]

        # Creating synthetic samples
        sx, sy = self.make_samples(
            minx, minx, self.minc, nns, int(self.ratio * len(miny)), random_state=self.rs, verbose=self.verbose
        )

        # Concatenate the newly generated samples to the original data set
        ret_x = concatenate((self.x, sx), axis=0)
        ret_y = concatenate((self.y, sy), axis=0)

        # Find the nearest neighbour of every point
        nn = NearestNeighbors(n_neighbors=2)
        nn.fit(ret_x)
        nns = nn.kneighbors(ret_x, return_distance=False)[:, 1]

        # Send the information to is_tomek function to get boolean vector back
        links = self.is_tomek(ret_y, nns, self.minc, self.verbose)

        if self.verbose:
            print("Over-sampling performed:" " " + str(Counter(ret_y[logical_not(links)])))

        # Return data set without majority Tomek links.
        return ret_x[logical_not(links)], ret_y[logical_not(links)]
开发者ID:aarshayj,项目名称:UnbalancedDataset,代码行数:37,代码来源:pipeline.py

示例7: estimate_dimension

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def estimate_dimension(X, n_neighbors='auto', neighbors_estimator=None):
    """Estimate intrinsic dimensionality.

    Based on "Manifold-Adaptive Dimension Estimation"
    Farahmand, Szepavari, Audibert ICML 2007.

    Parameters
    ----------
    X : nd-array, shape (n_samples, n_features)
        Input data.

    n_neighbors : int or auto, default='auto'
        Number of neighbors used for estimate.
        'auto' means ``np.floor(2 * np.log(n_samples))``.

    neighbors_estimator : NearestNeighbors object or None, default=None
        A pre-fitted neighbors object to speed up calculations.
    """
    if n_neighbors == 'auto':
        n_neighbors = np.floor(2 * np.log(X.shape[0])).astype("int")

    if neighbors_estimator is None:
        neighbors_estimator = NearestNeighbors(n_neighbors=n_neighbors)
        neighbors_estimator.fit(X)
    full_dist = neighbors_estimator.kneighbors(X, n_neighbors=n_neighbors)[0][:, -1]
    half_dist = neighbors_estimator.kneighbors(X, n_neighbors=n_neighbors // 2)[0][:, -1]
    est = np.log(2) / np.log(full_dist / half_dist)
    est = np.minimum(est, X.shape[1])
    return np.round(np.mean(est))
开发者ID:amueller,项目名称:information-theoretic-mst,代码行数:31,代码来源:infer_dimensionality.py

示例8: adasyn_sample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def adasyn_sample(X,Y,minclass,K=5,n=200):
    indices = np.nonzero(Y==minclass)
    Ymin = Y[indices]
    Xmin = X[indices]
    Cmin = len(indices[0])
    Xs = []
    if n > Cmin:
        Xs.append(Xmin)
        n -= len(Ymin)
    else:
        # simple random without replacement undersampling
        return Xmin[random.sample(range(Cmin),n)]
    neigh = NearestNeighbors(n_neighbors=30)
    neigh.fit(X)
    nindices = neigh.kneighbors(Xmin,K,False)
    gamma = [float(sum(Y[i]==minclass))/K for i in nindices]
    gamma = gamma / np.linalg.norm(gamma,ord = 1)
    neigh = NearestNeighbors(n_neighbors=30)
    neigh.fit(Xmin)
    N = np.round(gamma*n).astype(int)
    assert len(N) == Cmin
    for (i,nn) in enumerate(N):
        nindices = neigh.kneighbors(Xmin[i],K,False)[0]
        for j in range(nn):
            alpha = random.random()
            Xnn = X[random.choice(nindices)]
            Xs.append((1.-alpha)*Xmin[i]+alpha*Xnn)
    Xadasyn = sparse.vstack(Xs)  
    return Xadasyn
开发者ID:KenHollandWHY,项目名称:kaggle,代码行数:31,代码来源:utils.py

示例9: main

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def main():
    vectorizer = CountVectorizer(ngram_range=(1,2),max_df=1.0, min_df=0.0)

    nei = NearestNeighbors(algorithm='brute', metric='jaccard')
    matrix = vectorizer.fit_transform(training_set).todense()
    new_matrix = vectorizer.transform(new_comments).todense()
    nei.fit(matrix)
    path =  '{0}/'.format(pathsplit(abspath(__file__))[0])
    jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w')

    nodes = [{'name': (training_set+new_comments)[i],
              'group':(groups + new_groups)[i]}
             for i in range(len(training_set+new_comments))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])})

    for i in range(len(new_comments)):
        dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]
        for j in range(len(idnei[1:])):
            links.append({"source":len(matrix) + i,"target":idnei[j],"value":10*(1 - dist[j+1])})

    jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2)

    jsonfile.write(jsondumped)
开发者ID:opoirion,项目名称:pres_ml,代码行数:33,代码来源:generate_json.py

示例10: nearestN

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def nearestN():
    X = [[125,1], [200,0], [70,0], [240,1], [114,0], [120,0], [264,1], [85,0], [150,0], [90,0]]
#    y = [ 0, 0, 0, 0, 1, 0, 0, 1, 0,1 ]
    model = NN(n_neighbors=1, radius=1)
    model.fit(X)
    y = [98.,0.]
    print model.kneighbors(y)
开发者ID:B-Rich,项目名称:gsinghal_python_src,代码行数:9,代码来源:prob1.py

示例11: get_minolab

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class NNScope:

    def get_minolab(self):
        tmp = pd.Series(self.y)
        tmp = tmp.value_counts()
        return min(tmp.keys(), key=lambda o: tmp[o])

    def normalization(self):
        self.X -= np.mean(self.X, axis=0)
        self.X /= np.sqrt(np.var(self.X, axis=0))

    def __init__(self, X, y, k):
        self.X = np.array(X, dtype='float64')
        self.normalization()
        self.y = y
        self.minolab = self.get_minolab()
        self.nn = NearestNeighbors(n_neighbors=k, n_jobs=-1)
        self.nn.fit(self.X)
        self.nn_maj = NearestNeighbors(n_neighbors=k, n_jobs=-1)
        self.nn_maj.fit(self.X[y != self.minolab])
        self.distr = None

    # how many minority samples with given number of minotiry neighbors
    def calc_ratio(self):
        dis_all, _ = self.nn.kneighbors()
        dis_all = dis_all[self.y == self.minolab]
        dis_maj, _ = self.nn_maj.kneighbors(self.X[self.y == self.minolab])
        self.WBNR = np.sqrt(np.mean(dis_all ** 2, axis=1) /
                            np.mean(dis_maj ** 2, axis=1))

    def show_ratio_distr(self):
        plt.hist(self.WBNR, bins=20)
开发者ID:tianfudhe,项目名称:ids,代码行数:34,代码来源:nn.py

示例12: RunAllKnnScikit

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
    def RunAllKnnScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the query file 
      # In this case we add this to the command line.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) == 2:
        referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
        queryData = np.genfromtxt(self.dataset[1], delimiter=',')
      else:
        referenceData = np.genfromtxt(self.dataset, delimiter=',')

      with totalTimer:
        # Get all the parameters.
        k = re.search("-k (\d+)", options)
        leafSize = re.search("-l (\d+)", options)

        if not k:
          Log.Fatal("Required option: Number of furthest neighbors to find.")
          q.put(-1)
          return -1
        else:
          k = int(k.group(1))
          if (k < 1 or k > referenceData.shape[0]):
            Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0"
              + " and less or equal than " + str(referenceData.shape[0]))
            q.put(-1)
            return -1

        if not leafSize:
          l = 20
        elif int(leafSize.group(1)) < 0:
          Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must" +
              " be greater than or equal to 0.")
          q.put(-1)
          return -1
        else:
          l = int(leafSize.group(1))
  
        try:
          # Perform All K-Nearest-Neighbors.
          model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
          model.fit(referenceData)

          if len(self.dataset) == 2:
            out = model.kneighbors(queryData, k, return_distance=True)
          else:
	    # We have to increment k by one because mlpack ignores the
	    # self-neighbor, whereas scikit-learn will happily return the
	    # nearest neighbor of point 0 as point 0.
            out = model.kneighbors(referenceData, k + 1, return_distance=True)
        except Exception as e:
          q.put(-1)
          return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
开发者ID:rancho93,项目名称:benchmarks,代码行数:61,代码来源:allknn.py

示例13: findKNN

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def findKNN(frequencyVector,newVector):
    samples = np.array(frequencyVector)
    neigh = NearestNeighbors(n_neighbors=5, metric="euclidean")
    neigh.fit(samples)
    indexList = neigh.kneighbors(newVector,return_distance=False).tolist()
    a=neigh.kneighbors(newVector)
    print a
    return indexList
开发者ID:alynsther,项目名称:RIPSHKCICC,代码行数:10,代码来源:KNN_test_win.py

示例14: KNearestNeighbours

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class KNearestNeighbours(MLClassifierBase):
    """k Nearest Neighbours multi-label classifier."""
    BRIEFNAME = "MLkNN"

    def __init__(self, k = 10, s = 1.0):
        super(KNearestNeighbours, self).__init__(None)
        self.k = k # Number of neighbours
        self.s = s # Smooth parameter

    def compute_prior(self, y):
        prior_prob_true = []
        prior_prob_false = []
        for label in xrange(self.num_labels):
            prior_prob_true.append(float(self.s + sum(instance[label] == 1 for instance in y)) / (self.s * 2 + self.num_instances))
            prior_prob_false.append(1 - prior_prob_true[-1])
        return prior_prob_true, prior_prob_false

    def compute_cond(self, X, y):
        self.knn = NearestNeighbors(self.k).fit(X)
        c = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
        cn = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
        for instance in xrange(self.num_instances):
            neighbors = self.knn.kneighbors(X[instance], self.k, return_distance=False)
            for label in xrange(self.num_labels):
                delta = sum(y[neighbor][label] for neighbor in neighbors[0])
                (c if y[instance][label] == 1 else cn)[label][delta] += 1

        cond_prob_true = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
        cond_prob_false = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
        for label in xrange(self.num_labels):
            for neighbor in xrange(self.k + 1):
                cond_prob_true[label][neighbor] = (self.s + c[label][neighbor]) / (self.s * (self.k + 1) + sum(c[label]))
                cond_prob_false[label][neighbor] = (self.s + cn[label][neighbor]) / (self.s * (self.k + 1) + sum(cn[label]))
        return cond_prob_true, cond_prob_false

    def fit(self, X, y):
        self.predictions = y;
        self.num_instances = len(y)
        self.num_labels = len(y[0])
        # Computing the prior probabilities
        self.prior_prob_true, self.prior_prob_false = self.compute_prior(y)
        # Computing the posterior probabilities
        self.cond_prob_true, self.cond_prob_false = self.compute_cond(X, y)
        return self

    def predict(self, X):
        result = np.zeros((len(X), self.num_labels), dtype='i8')
        for instance in xrange(len(X)):
            neighbors = self.knn.kneighbors(X[instance], self.k, return_distance=False)
            for label in xrange(self.num_labels):
                delta = sum(self.predictions[neighbor][label] for neighbor in neighbors[0])
                p_true = self.prior_prob_true[label] * self.cond_prob_true[label][delta]
                p_false = self.prior_prob_false[label] * self.cond_prob_false[label][delta]
                prediction = (p_true >= p_false)
                result[instance][label] = int(prediction)
        return result
开发者ID:crazyfu,项目名称:scikit-multilearn,代码行数:58,代码来源:mlknn.py

示例15: sample

# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
    def sample(s):
        if s.data is None:
            raise ValueError('data not loaded.')
        mdl = NearestNeighbors(n_neighbors=s.k1, n_jobs=-1)
        mdl.fit(s.X)
        _, nei_table = mdl.kneighbors()
        # the index of those minority points with minority neighbors
        noise_mino_idx = filter(lambda o: sum(s.y[nei_table[o]] == s.minolab) != 0 and s.y[o] == s.minolab,
                                range(s.X.shape[0]))
        minoX = s.X[s.y == s.minolab]
        majX = s.X[s.y == s.majlab]

        mdl_maj = NearestNeighbors(n_neighbors=s.k2, n_jobs=-1)
        mdl_maj.fit(majX)
        # all majority examples on the bound
        _, tmp = mdl_maj.kneighbors(s.X[noise_mino_idx])
        # remove dumplicate examples
        bound_maj_idx = np.unique(np.reshape(tmp, (1, -1))[0])

        mdl_mino = NearestNeighbors(n_neighbors=s.k3, n_jobs=-1)
        mdl_mino.fit(minoX)
        # find minority examples on the bound backward
        _, tmp = mdl_mino.kneighbors(majX[bound_maj_idx])
        bound_mino_idx = np.unique(np.reshape(tmp, (1, -1))[0])

        bound_maj = majX[bound_maj_idx]
        bound_mino = minoX[bound_mino_idx]

        # difference matrix, shape = (majN, minoN).
        # Due to broadcast(strech), diff[i][j][k] would be maj[i][k]-mino[j][k],
        # thus vector diff[i][j]=maj[i]-mino[j] representing the outer vector diff.
        diff = bound_maj[:, None, :] - bound_mino
        Cf = lambda o: min(s.X.shape[1] / np.linalg.norm(o, 2), s.Cfth) * 1.0 / s.Cfth
        CM = np.apply_along_axis(Cf, 2, diff)

        W = np.mean(((CM * CM).T / np.sum(CM, axis=1)).T, axis=0)

        # P is the normalized Weight Vector, standing for the probability chosen to synthese
        P = W / np.sum(W)

        # np.save(open('W-{0}.ndarray'.format(s.mdl_args["gamma"]), 'w'), CM)

        # choose N bound minority examples to synthese, selection probability accroding to their weight
        chosen = np.random.choice(range(len(P)), size=s.N, p=P)
        chosenp = bound_mino[chosen]

        # would not implement CLUSTERING in MWMOTE, I could see no effort of that but time-consumption.
        _, nei = mdl_mino.kneighbors(chosenp, s.k1)
        dualp = minoX[[i[int(np.random.rand() * s.k1)] for i in nei]]

        generated = chosenp + np.random.rand(s.N, 1) * (dualp - chosenp)
        ret = np.hstack((np.vstack((minoX, generated, majX)),
                         np.array([s.minolab] * (minoX.shape[0] + s.N) + [s.majlab] * majX.shape[0])[:, None]))
        np.random.shuffle(ret)
        
        return ret
开发者ID:tianfudhe,项目名称:ids,代码行数:58,代码来源:sampling.py


注:本文中的sklearn.neighbors.NearestNeighbors.kneighbors方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。