本文整理汇总了Python中sklearn.neighbors.NearestNeighbors.kneighbors方法的典型用法代码示例。如果您正苦于以下问题:Python NearestNeighbors.kneighbors方法的具体用法?Python NearestNeighbors.kneighbors怎么用?Python NearestNeighbors.kneighbors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.NearestNeighbors
的用法示例。
在下文中一共展示了NearestNeighbors.kneighbors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class KDTrees:
def __init__(self, nb_neighbours, leaf_size):
self.nbrs = NearestNeighbors(n_neighbors=nb_neighbours, algorithm='ball_tree', metric = 'haversine', leaf_size=leaf_size)
# Compute distance in time between two points on the map
def mapDistance(self, x, y):
if (len(x) > 2):
return np.sum((x - y) ** 2)
else:
if(x[0] < y[0]):
tmp = y
y = x
x = tmp
pos1 = str(x[0]) + ", " + str(x[1])
pos2 = str(y[0]) + ", " + str(y[1])
timestamp = datetime.now()
sec_to_add = 32 * 3600 + (timestamp - datetime(1970, 1, 1)).total_seconds() - 2*3600 - timestamp.hour * 3600 - timestamp.minute * 60 - timestamp.second
traject = gmaps.directions(pos1, pos2, mode="transit", departure_time=timestamp.fromtimestamp(sec_to_add))
try:
print 'ok'
return (traject[0]["legs"][0]["arrival_time"]["value"] - traject[0]["legs"][0]["departure_time"]["value"])
except:
print 'bug'
return 1000000000
def addPoints(self, points):
self.nbrs.fit(points)
def getNeighbours(self, points):
self.nbrs.kneighbors(points)
示例2: eucl_distance
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def eucl_distance(a, b):
nbrs_a = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(a) if a.size > 0 else None
nbrs_b = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(b) if b.size > 0 else None
distances_a, _ = nbrs_a.kneighbors(b) if nbrs_a and b.size > 0 else ([np.inf], None)
distances_b, _ = nbrs_b.kneighbors(a) if nbrs_b and a.size > 0 else ([np.inf], None)
return [distances_a, distances_b]
示例3: on_pick
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def on_pick(self, event):
ind = event.ind[0]
arty = event.artist
for key in nld.layers.keys():
layer = nld.layers[key]
for plot in layer.plots:
if plot is arty:
self.neighb_sec = key
break
nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1).fit(X)
distances, indices = nbrs.kneighbors(X)
#nbrs.fit(X)
#W = barycenter_kneighbors_graph(
# nbrs, n_neighbors=50, reg=1e-3, n_jobs=1)
#knn = kneighbors_graph(X, 10).to_array()
try:
self.scatters.remove()
self.two_scatters.remove()
except:
pass
self.points = indices[ind]
neighb_layer = nld.get_layer(self.neighb_sec)
self.points = X[self.points]
# self.points = [neighb_layer.x_data[0][ind], neighb_layer.y_data[0][self.points], neighb_layer.z_data[0][self.points]]
section_num = int(self.neighb_sec[-1])
section_ax = self.fig.get_axes()[section_num + 1]
section_layer = section_ax.get_layer(section_ax.title._text + ' proj')
self.scatters = nld.scatter(self.points[:, 0], self.points[:, 1], self.points[:, 2], c='yellow', s=80)
two_mat = np.column_stack((section_layer.x_data[0], section_layer.y_data[0]))
two_nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1).fit(two_mat)
two_dists, two_inds = two_nbrs.kneighbors(two_mat)
two_points = two_mat[two_inds[ind]]
self.two_scatters = section_ax.scatter(two_points[:, 0], two_points[:, 1], c='green', s=80)
示例4: k_nearest_neighbors_scores
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def k_nearest_neighbors_scores(k, eng_vec_dict, fr_vec_dict):
eng_mat, fr_mat, index_map = build_parallel_mats_from_dicts(eng_vec_dict, fr_vec_dict, translation_dict)
# k + 1 since we discard the top neighbor, which is itself
neighbors_en = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(eng_mat)
dist_en, indices_en = neighbors_en.kneighbors(eng_mat)
neighbors_fr = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(fr_mat)
dist_fr, indices_fr = neighbors_fr.kneighbors(fr_mat)
# since we built the matrices in parallel, we know now that indices map to each other,
# so we simply check the overlap of those to calculate precision and recall.
# calculate avg recall for k-recall
avg_recall = 0.
num_points = len(indices_en) + 0.
knearest_map_en = dict()
knearest_map_fr = dict()
for i in range(0, int(num_points)):
w_en = index_map[i][0]
w_fr = index_map[i][1]
index_set_en = set(indices_en[i][1:]) # should be size k
index_set_fr = set(indices_fr[i][1:]) # should be size k
if w_en not in knearest_map_en:
knearest_map_en[w_en] = map(lambda z: index_map[z], index_set_en)
if w_fr not in knearest_map_fr:
knearest_map_fr[w_fr] = map(lambda z: index_map[z], index_set_fr)
recall_count = sum(1 for i in index_set_fr if i in index_set_en)
# precision = recall for this task
recall = (recall_count + 0.)/len(index_set_en)
avg_recall += recall
return (avg_recall/num_points), knearest_map_en, knearest_map_fr
示例5: estimator_knn_cv
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def estimator_knn_cv(X, y, clf, n_neigh):
neigh = NearestNeighbors(n_neigh, metric="euclidean", algorithm="brute")
neigh_est = NearestNeighbors(n_neigh, metric="manhattan", algorithm="brute")
acc = []
for train, test in StratifiedKFold(y, 5):
X_train = X[train]
y_train = y[train]
X_test = X[test]
y_test = y[test]
clf.fit(X_train, y_train)
estimators = clf.estimators_
preds_train = np.array(map(lambda e: e.predict(X_train), estimators)).T
preds_test = np.array(map(lambda e: e.predict(X_test), estimators)).T
preds_train_proba = np.array(map(lambda e: e.predict_proba(X_train), estimators))
preds_test_proba = np.array(map(lambda e: e.predict_proba(X_test), estimators))
p_train = preds_train_proba.swapaxes(0, 1)[:, :, 0]
p_test = preds_test_proba.swapaxes(0, 1)[:, :, 0]
neigh.fit(X_train)
dist, knn = neigh.kneighbors(X_test)
neigh_est.fit(preds_train)
dist, knn_est = neigh_est.kneighbors(preds_test)
# neigh_est.fit(p_train);dist, knn_est = neigh_est.kneighbors(p_test)
knn_combined_uniq = np.array(map(np.unique, np.hstack((knn[:, :30], knn_est[:, :30]))))
pp_uniq = np.array([stats.mode(y_train[nn])[0][0] for nn in knn_combined_uniq])
# pp_uniq = np.array([stats.mode(y_train[nn])[0][0] for nn in knn[:,:30]])
preds_test_est_knn = np.array(
[[stats.mode(y_train[nn])[0][0] for nn in knn_est[:, :i]] for i in xrange(1, n_neigh, 2)]
)
acc.append(
[accuracy_score(y_test, pred) for pred in np.vstack((preds_test_est_knn, clf.predict(X_test), pp_uniq))]
)
mean_acc = np.mean(acc, axis=0)
print " ".join("{:.3f}".format(v) for v in mean_acc), " max:{:.3f}".format(mean_acc.max())
示例6: resample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def resample(self):
# Start with the minority class
minx = self.x[self.y == self.minc]
miny = self.y[self.y == self.minc]
# Finding nns
# Import the k-NN classifier
from sklearn.neighbors import NearestNeighbors
nearest_neighbour = NearestNeighbors(n_neighbors=self.k + 1)
nearest_neighbour.fit(minx)
nns = nearest_neighbour.kneighbors(minx, return_distance=False)[:, 1:]
# Creating synthetic samples
sx, sy = self.make_samples(
minx, minx, self.minc, nns, int(self.ratio * len(miny)), random_state=self.rs, verbose=self.verbose
)
# Concatenate the newly generated samples to the original data set
ret_x = concatenate((self.x, sx), axis=0)
ret_y = concatenate((self.y, sy), axis=0)
# Find the nearest neighbour of every point
nn = NearestNeighbors(n_neighbors=2)
nn.fit(ret_x)
nns = nn.kneighbors(ret_x, return_distance=False)[:, 1]
# Send the information to is_tomek function to get boolean vector back
links = self.is_tomek(ret_y, nns, self.minc, self.verbose)
if self.verbose:
print("Over-sampling performed:" " " + str(Counter(ret_y[logical_not(links)])))
# Return data set without majority Tomek links.
return ret_x[logical_not(links)], ret_y[logical_not(links)]
示例7: estimate_dimension
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def estimate_dimension(X, n_neighbors='auto', neighbors_estimator=None):
"""Estimate intrinsic dimensionality.
Based on "Manifold-Adaptive Dimension Estimation"
Farahmand, Szepavari, Audibert ICML 2007.
Parameters
----------
X : nd-array, shape (n_samples, n_features)
Input data.
n_neighbors : int or auto, default='auto'
Number of neighbors used for estimate.
'auto' means ``np.floor(2 * np.log(n_samples))``.
neighbors_estimator : NearestNeighbors object or None, default=None
A pre-fitted neighbors object to speed up calculations.
"""
if n_neighbors == 'auto':
n_neighbors = np.floor(2 * np.log(X.shape[0])).astype("int")
if neighbors_estimator is None:
neighbors_estimator = NearestNeighbors(n_neighbors=n_neighbors)
neighbors_estimator.fit(X)
full_dist = neighbors_estimator.kneighbors(X, n_neighbors=n_neighbors)[0][:, -1]
half_dist = neighbors_estimator.kneighbors(X, n_neighbors=n_neighbors // 2)[0][:, -1]
est = np.log(2) / np.log(full_dist / half_dist)
est = np.minimum(est, X.shape[1])
return np.round(np.mean(est))
示例8: adasyn_sample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def adasyn_sample(X,Y,minclass,K=5,n=200):
indices = np.nonzero(Y==minclass)
Ymin = Y[indices]
Xmin = X[indices]
Cmin = len(indices[0])
Xs = []
if n > Cmin:
Xs.append(Xmin)
n -= len(Ymin)
else:
# simple random without replacement undersampling
return Xmin[random.sample(range(Cmin),n)]
neigh = NearestNeighbors(n_neighbors=30)
neigh.fit(X)
nindices = neigh.kneighbors(Xmin,K,False)
gamma = [float(sum(Y[i]==minclass))/K for i in nindices]
gamma = gamma / np.linalg.norm(gamma,ord = 1)
neigh = NearestNeighbors(n_neighbors=30)
neigh.fit(Xmin)
N = np.round(gamma*n).astype(int)
assert len(N) == Cmin
for (i,nn) in enumerate(N):
nindices = neigh.kneighbors(Xmin[i],K,False)[0]
for j in range(nn):
alpha = random.random()
Xnn = X[random.choice(nindices)]
Xs.append((1.-alpha)*Xmin[i]+alpha*Xnn)
Xadasyn = sparse.vstack(Xs)
return Xadasyn
示例9: main
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def main():
vectorizer = CountVectorizer(ngram_range=(1,2),max_df=1.0, min_df=0.0)
nei = NearestNeighbors(algorithm='brute', metric='jaccard')
matrix = vectorizer.fit_transform(training_set).todense()
new_matrix = vectorizer.transform(new_comments).todense()
nei.fit(matrix)
path = '{0}/'.format(pathsplit(abspath(__file__))[0])
jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w')
nodes = [{'name': (training_set+new_comments)[i],
'group':(groups + new_groups)[i]}
for i in range(len(training_set+new_comments))]
links = []
for i in range(len(matrix)):
dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
dist, idnei = dist[0], idnei[0]
for j in range(len(idnei[1:])):
links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])})
for i in range(len(new_comments)):
dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1)
dist, idnei = dist[0], idnei[0]
for j in range(len(idnei[1:])):
links.append({"source":len(matrix) + i,"target":idnei[j],"value":10*(1 - dist[j+1])})
jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2)
jsonfile.write(jsondumped)
示例10: nearestN
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def nearestN():
X = [[125,1], [200,0], [70,0], [240,1], [114,0], [120,0], [264,1], [85,0], [150,0], [90,0]]
# y = [ 0, 0, 0, 0, 1, 0, 0, 1, 0,1 ]
model = NN(n_neighbors=1, radius=1)
model.fit(X)
y = [98.,0.]
print model.kneighbors(y)
示例11: get_minolab
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class NNScope:
def get_minolab(self):
tmp = pd.Series(self.y)
tmp = tmp.value_counts()
return min(tmp.keys(), key=lambda o: tmp[o])
def normalization(self):
self.X -= np.mean(self.X, axis=0)
self.X /= np.sqrt(np.var(self.X, axis=0))
def __init__(self, X, y, k):
self.X = np.array(X, dtype='float64')
self.normalization()
self.y = y
self.minolab = self.get_minolab()
self.nn = NearestNeighbors(n_neighbors=k, n_jobs=-1)
self.nn.fit(self.X)
self.nn_maj = NearestNeighbors(n_neighbors=k, n_jobs=-1)
self.nn_maj.fit(self.X[y != self.minolab])
self.distr = None
# how many minority samples with given number of minotiry neighbors
def calc_ratio(self):
dis_all, _ = self.nn.kneighbors()
dis_all = dis_all[self.y == self.minolab]
dis_maj, _ = self.nn_maj.kneighbors(self.X[self.y == self.minolab])
self.WBNR = np.sqrt(np.mean(dis_all ** 2, axis=1) /
np.mean(dis_maj ** 2, axis=1))
def show_ratio_distr(self):
plt.hist(self.WBNR, bins=20)
示例12: RunAllKnnScikit
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def RunAllKnnScikit(q):
totalTimer = Timer()
# Load input dataset.
# If the dataset contains two files then the second file is the query file
# In this case we add this to the command line.
Log.Info("Loading dataset", self.verbose)
if len(self.dataset) == 2:
referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
queryData = np.genfromtxt(self.dataset[1], delimiter=',')
else:
referenceData = np.genfromtxt(self.dataset, delimiter=',')
with totalTimer:
# Get all the parameters.
k = re.search("-k (\d+)", options)
leafSize = re.search("-l (\d+)", options)
if not k:
Log.Fatal("Required option: Number of furthest neighbors to find.")
q.put(-1)
return -1
else:
k = int(k.group(1))
if (k < 1 or k > referenceData.shape[0]):
Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0"
+ " and less or equal than " + str(referenceData.shape[0]))
q.put(-1)
return -1
if not leafSize:
l = 20
elif int(leafSize.group(1)) < 0:
Log.Fatal("Invalid leaf size: " + str(leafSize.group(1)) + ". Must" +
" be greater than or equal to 0.")
q.put(-1)
return -1
else:
l = int(leafSize.group(1))
try:
# Perform All K-Nearest-Neighbors.
model = NearestNeighbors(n_neighbors=k, algorithm='kd_tree', leaf_size=l)
model.fit(referenceData)
if len(self.dataset) == 2:
out = model.kneighbors(queryData, k, return_distance=True)
else:
# We have to increment k by one because mlpack ignores the
# self-neighbor, whereas scikit-learn will happily return the
# nearest neighbor of point 0 as point 0.
out = model.kneighbors(referenceData, k + 1, return_distance=True)
except Exception as e:
q.put(-1)
return -1
time = totalTimer.ElapsedTime()
q.put(time)
return time
示例13: findKNN
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def findKNN(frequencyVector,newVector):
samples = np.array(frequencyVector)
neigh = NearestNeighbors(n_neighbors=5, metric="euclidean")
neigh.fit(samples)
indexList = neigh.kneighbors(newVector,return_distance=False).tolist()
a=neigh.kneighbors(newVector)
print a
return indexList
示例14: KNearestNeighbours
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
class KNearestNeighbours(MLClassifierBase):
"""k Nearest Neighbours multi-label classifier."""
BRIEFNAME = "MLkNN"
def __init__(self, k = 10, s = 1.0):
super(KNearestNeighbours, self).__init__(None)
self.k = k # Number of neighbours
self.s = s # Smooth parameter
def compute_prior(self, y):
prior_prob_true = []
prior_prob_false = []
for label in xrange(self.num_labels):
prior_prob_true.append(float(self.s + sum(instance[label] == 1 for instance in y)) / (self.s * 2 + self.num_instances))
prior_prob_false.append(1 - prior_prob_true[-1])
return prior_prob_true, prior_prob_false
def compute_cond(self, X, y):
self.knn = NearestNeighbors(self.k).fit(X)
c = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
cn = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
for instance in xrange(self.num_instances):
neighbors = self.knn.kneighbors(X[instance], self.k, return_distance=False)
for label in xrange(self.num_labels):
delta = sum(y[neighbor][label] for neighbor in neighbors[0])
(c if y[instance][label] == 1 else cn)[label][delta] += 1
cond_prob_true = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
cond_prob_false = [[0] * (self.k + 1) for label in xrange(self.num_labels)]
for label in xrange(self.num_labels):
for neighbor in xrange(self.k + 1):
cond_prob_true[label][neighbor] = (self.s + c[label][neighbor]) / (self.s * (self.k + 1) + sum(c[label]))
cond_prob_false[label][neighbor] = (self.s + cn[label][neighbor]) / (self.s * (self.k + 1) + sum(cn[label]))
return cond_prob_true, cond_prob_false
def fit(self, X, y):
self.predictions = y;
self.num_instances = len(y)
self.num_labels = len(y[0])
# Computing the prior probabilities
self.prior_prob_true, self.prior_prob_false = self.compute_prior(y)
# Computing the posterior probabilities
self.cond_prob_true, self.cond_prob_false = self.compute_cond(X, y)
return self
def predict(self, X):
result = np.zeros((len(X), self.num_labels), dtype='i8')
for instance in xrange(len(X)):
neighbors = self.knn.kneighbors(X[instance], self.k, return_distance=False)
for label in xrange(self.num_labels):
delta = sum(self.predictions[neighbor][label] for neighbor in neighbors[0])
p_true = self.prior_prob_true[label] * self.cond_prob_true[label][delta]
p_false = self.prior_prob_false[label] * self.cond_prob_false[label][delta]
prediction = (p_true >= p_false)
result[instance][label] = int(prediction)
return result
示例15: sample
# 需要导入模块: from sklearn.neighbors import NearestNeighbors [as 别名]
# 或者: from sklearn.neighbors.NearestNeighbors import kneighbors [as 别名]
def sample(s):
if s.data is None:
raise ValueError('data not loaded.')
mdl = NearestNeighbors(n_neighbors=s.k1, n_jobs=-1)
mdl.fit(s.X)
_, nei_table = mdl.kneighbors()
# the index of those minority points with minority neighbors
noise_mino_idx = filter(lambda o: sum(s.y[nei_table[o]] == s.minolab) != 0 and s.y[o] == s.minolab,
range(s.X.shape[0]))
minoX = s.X[s.y == s.minolab]
majX = s.X[s.y == s.majlab]
mdl_maj = NearestNeighbors(n_neighbors=s.k2, n_jobs=-1)
mdl_maj.fit(majX)
# all majority examples on the bound
_, tmp = mdl_maj.kneighbors(s.X[noise_mino_idx])
# remove dumplicate examples
bound_maj_idx = np.unique(np.reshape(tmp, (1, -1))[0])
mdl_mino = NearestNeighbors(n_neighbors=s.k3, n_jobs=-1)
mdl_mino.fit(minoX)
# find minority examples on the bound backward
_, tmp = mdl_mino.kneighbors(majX[bound_maj_idx])
bound_mino_idx = np.unique(np.reshape(tmp, (1, -1))[0])
bound_maj = majX[bound_maj_idx]
bound_mino = minoX[bound_mino_idx]
# difference matrix, shape = (majN, minoN).
# Due to broadcast(strech), diff[i][j][k] would be maj[i][k]-mino[j][k],
# thus vector diff[i][j]=maj[i]-mino[j] representing the outer vector diff.
diff = bound_maj[:, None, :] - bound_mino
Cf = lambda o: min(s.X.shape[1] / np.linalg.norm(o, 2), s.Cfth) * 1.0 / s.Cfth
CM = np.apply_along_axis(Cf, 2, diff)
W = np.mean(((CM * CM).T / np.sum(CM, axis=1)).T, axis=0)
# P is the normalized Weight Vector, standing for the probability chosen to synthese
P = W / np.sum(W)
# np.save(open('W-{0}.ndarray'.format(s.mdl_args["gamma"]), 'w'), CM)
# choose N bound minority examples to synthese, selection probability accroding to their weight
chosen = np.random.choice(range(len(P)), size=s.N, p=P)
chosenp = bound_mino[chosen]
# would not implement CLUSTERING in MWMOTE, I could see no effort of that but time-consumption.
_, nei = mdl_mino.kneighbors(chosenp, s.k1)
dualp = minoX[[i[int(np.random.rand() * s.k1)] for i in nei]]
generated = chosenp + np.random.rand(s.N, 1) * (dualp - chosenp)
ret = np.hstack((np.vstack((minoX, generated, majX)),
np.array([s.minolab] * (minoX.shape[0] + s.N) + [s.majlab] * majX.shape[0])[:, None]))
np.random.shuffle(ret)
return ret