本文整理汇总了Python中sklearn.utils.shuffle方法的典型用法代码示例。如果您正苦于以下问题:Python utils.shuffle方法的具体用法?Python utils.shuffle怎么用?Python utils.shuffle使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils
的用法示例。
在下文中一共展示了utils.shuffle方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reset
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def reset(self):
"""Resets the iterator to the beginning of the data."""
self.curr_idx = 0
#shuffle data in each bucket
random.shuffle(self.idx)
for i, buck in enumerate(self.sentences):
self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
self.sentences[i],
self.characters[i],
self.label[i])
self.ndindex = []
self.ndsent = []
self.ndchar = []
self.ndlabel = []
#for each bucket of data
for i, buck in enumerate(self.sentences):
#append the lists with an array
self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype))
示例2: test_aom_static_norepeat
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_aom_static_norepeat(self):
score = aom(self.scores, 3, method='static',
bootstrap_estimators=False,
random_state=42)
assert_equal(score.shape, (4,))
shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
manual_scores = np.zeros([4, 3])
manual_scores[:, 0] = np.max(self.scores[:, shuffled_list[0:2]],
axis=1)
manual_scores[:, 1] = np.max(self.scores[:, shuffled_list[2:4]],
axis=1)
manual_scores[:, 2] = np.max(self.scores[:, shuffled_list[4:6]],
axis=1)
manual_score = np.mean(manual_scores, axis=1)
assert_array_equal(score, manual_score)
示例3: test_moa_static_norepeat
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_moa_static_norepeat(self):
score = moa(self.scores, 3, method='static',
bootstrap_estimators=False, random_state=42)
assert_equal(score.shape, (4,))
shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
manual_scores = np.zeros([4, 3])
manual_scores[:, 0] = np.mean(self.scores[:, shuffled_list[0:2]],
axis=1)
manual_scores[:, 1] = np.mean(self.scores[:, shuffled_list[2:4]],
axis=1)
manual_scores[:, 2] = np.mean(self.scores[:, shuffled_list[4:6]],
axis=1)
manual_score = np.max(manual_scores, axis=1)
assert_array_equal(score, manual_score)
示例4: fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def fit(self,
texts: List[List[str]],
adjust_passes=True,
test_size=0.1,
random_state=123,
dictionary: Optional[gensim.corpora.Dictionary] = None) -> None:
texts = shuffle(texts)
dictionary = dictionary or self._make_dictionary(texts)
corpus = self._make_corpus(texts=texts, dictionary=dictionary)
train, test = train_test_split(corpus, test_size=test_size, random_state=random_state)
passes = np.clip(int(round(100000 / (len(corpus) + 1))), 1, 20) if adjust_passes else 1
self._lda = gensim.models.LdaModel(
alpha='auto',
corpus=train,
num_topics=self.n_topics,
id2word=dictionary,
iterations=self.iterations,
passes=passes)
self.log_perplexity = self._lda.log_perplexity(test)
logger.info('log_perplexity=%s', self.log_perplexity)
示例5: test_importances
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_importances():
# Check variable importances.
X, y = datasets.make_classification(n_samples=2000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
shuffle=False,
random_state=1)
for alg in ['SAMME', 'SAMME.R']:
clf = AdaBoostClassifier(algorithm=alg)
clf.fit(X, y)
importances = clf.feature_importances_
assert_equal(importances.shape[0], 10)
assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
True)
示例6: test_learning_curve_batch_and_incremental_learning_are_equal
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_learning_curve_batch_and_incremental_learning_are_equal():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
train_sizes = np.linspace(0.2, 1.0, 5)
estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
shuffle=False)
train_sizes_inc, train_scores_inc, test_scores_inc = \
learning_curve(
estimator, X, y, train_sizes=train_sizes,
cv=3, exploit_incremental_learning=True)
train_sizes_batch, train_scores_batch, test_scores_batch = \
learning_curve(
estimator, X, y, cv=3, train_sizes=train_sizes,
exploit_incremental_learning=False)
assert_array_equal(train_sizes_inc, train_sizes_batch)
assert_array_almost_equal(train_scores_inc.mean(axis=1),
train_scores_batch.mean(axis=1))
assert_array_almost_equal(test_scores_inc.mean(axis=1),
test_scores_batch.mean(axis=1))
示例7: check_cross_val_predict_multiclass
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def check_cross_val_predict_multiclass(est, X, y, method):
"""Helper for tests of cross_val_predict with multiclass classification"""
cv = KFold(n_splits=3, shuffle=False)
# Generate expected outputs
float_min = np.finfo(np.float64).min
default_values = {'decision_function': float_min,
'predict_log_proba': float_min,
'predict_proba': 0}
expected_predictions = np.full((len(X), len(set(y))),
default_values[method],
dtype=np.float64)
_, y_enc = np.unique(y, return_inverse=True)
for train, test in cv.split(X, y_enc):
est = clone(est).fit(X[train], y_enc[train])
fold_preds = getattr(est, method)(X[test])
i_cols_fit = np.unique(y_enc[train])
expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds
# Check actual outputs for several representations of y
for tg in [y, y + 1, y - 2, y.astype('str')]:
assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
expected_predictions)
示例8: test_power_transformer_nans
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_power_transformer_nans(method):
# Make sure lambda estimation is not influenced by NaN values
# and that transform() supports NaN silently
X = np.abs(X_1col)
pt = PowerTransformer(method=method)
pt.fit(X)
lmbda_no_nans = pt.lambdas_[0]
# concat nans at the end and check lambda stays the same
X = np.concatenate([X, np.full_like(X, np.nan)])
X = shuffle(X, random_state=0)
pt.fit(X)
lmbda_nans = pt.lambdas_[0]
assert_almost_equal(lmbda_no_nans, lmbda_nans, decimal=5)
X_trans = pt.transform(X)
assert_array_equal(np.isnan(X_trans), np.isnan(X))
示例9: read_train_sets
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def read_train_sets(train_path, image_size, classes, validation_size):
class DataSets(object):
pass
data_sets = DataSets()
images, labels, img_names, cls = load_train(train_path, image_size, classes)
images, labels, img_names, cls = shuffle(images, labels, img_names, cls)
if isinstance(validation_size, float):
validation_size = int(validation_size * images.shape[0])
validation_images = images[:validation_size]
validation_labels = labels[:validation_size]
validation_img_names = img_names[:validation_size]
validation_cls = cls[:validation_size]
train_images = images[validation_size:]
train_labels = labels[validation_size:]
train_img_names = img_names[validation_size:]
train_cls = cls[validation_size:]
data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
data_sets.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)
return data_sets
示例10: next_batch
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def next_batch(self, batch_size):
"""Return the next `batch_size` examples from this data set."""
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# # Shuffle the data (maybe)
# perm = np.arange(self._num_examples)
# np.random.shuffle(perm)
# self._images = self._images[perm]
# self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end], self._ids[start:end], self._cls[start:end]
示例11: read_train_sets
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def read_train_sets(train_path, image_size, classes, validation_size=0):
class DataSets(object):
pass
data_sets = DataSets()
images, labels, ids, cls = load_train(train_path, image_size, classes)
images, labels, ids, cls = shuffle(images, labels, ids, cls) # shuffle the data
if isinstance(validation_size, float):
validation_size = int(validation_size * images.shape[0])
validation_images = images[:validation_size]
validation_labels = labels[:validation_size]
validation_ids = ids[:validation_size]
validation_cls = cls[:validation_size]
train_images = images[validation_size:]
train_labels = labels[validation_size:]
train_ids = ids[validation_size:]
train_cls = cls[validation_size:]
data_sets.train = DataSet(train_images, train_labels, train_ids, train_cls)
data_sets.valid = DataSet(validation_images, validation_labels, validation_ids, validation_cls)
return data_sets
示例12: load_mnist
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def load_mnist():
with open('mnist/train-images-idx3-ubyte', 'rb') as f:
data = np.fromfile(file=f, dtype=np.uint8)
X_train = data[16:].reshape(60000, 28 * 28).astype(np.float32)
with open('mnist/train-labels-idx1-ubyte', 'rb') as f:
data = np.fromfile(file=f, dtype=np.uint8)
y_train = data[8:].reshape(60000).astype(np.uint8)
with open('mnist/t10k-images-idx3-ubyte', 'rb') as f:
data = np.fromfile(file=f, dtype=np.uint8)
X_test = data[16:].reshape(10000, 28 * 28).astype(np.float32)
with open('mnist/t10k-labels-idx1-ubyte', 'rb') as f:
data = np.fromfile(file=f, dtype=np.uint8)
y_test = data[8:].reshape(10000).astype(np.uint8)
X_train, y_train = shuffle(X_train, y_train)
X_test, y_test = shuffle(X_test, y_test)
X_train /= 255.
X_test /= 255.
return X_train, y_train, X_test, y_test
示例13: gen_biased_data
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def gen_biased_data(func, pos_ratio, N):
'''
Generate N data points on function func, with pos_ratio percentage of the
data points to have a positive label.
'''
pos = []
neg = []
i = 0
while len(pos) < pos_ratio * N or len(neg) < N - pos_ratio * N:
x = np.random.uniform(func.x_range[0], func.x_range[1])
y = func(x)
if y > 0:
if len(pos) < pos_ratio * N:
pos.append(np.hstack((x, y)))
elif len(neg) < N - pos_ratio * N:
neg.append(np.hstack((x, y)))
xy = np.vstack((pos, neg))
xy = shuffle(xy)
return xy[:, :-1], xy[:, -1]
示例14: getKaggleMNIST
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def getKaggleMNIST(file_path):
# MNIST data:
# column 0 is labels
# column 1-785 is data, with values 0 .. 255
# total size of CSV: (42000, 1, 28, 28)
train = pd.read_csv(file_path)
train = train.as_matrix()
train = shuffle(train)
Xtrain = train[:-1000, 1:] / 255
Ytrain = train[:-1000, 0].astype(np.int32)
Xtest = train[-1000:, 1:] / 255
Ytest = train[-1000:, 0].astype(np.int32)
return Xtrain, Ytrain, Xtest, Ytest
示例15: getSpecs
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def getSpecs(path):
specs = []
noise = []
# Get mel-specs for file
for spec in audio.specsFromFile(path,
rate=cfg.SAMPLE_RATE,
seconds=cfg.SPEC_LENGTH,
overlap=cfg.SPEC_OVERLAP,
minlen=cfg.SPEC_MINLEN,
fmin=cfg.SPEC_FMIN,
fmax=cfg.SPEC_FMAX,
spec_type=cfg.SPEC_TYPE,
shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])):
# Determine signal to noise ratio
s2n = audio.signal2noise(spec)
specs.append(spec)
noise.append(s2n)
# Shuffle arrays (we want to select randomly later)
specs, noise = shuffle(specs, noise, random_state=RANDOM)
return specs, noise