Python utils.shuffle方法代码示例

本文整理汇总了Python中sklearn.utils.shuffle方法的典型用法代码示例。如果您正苦于以下问题：Python utils.shuffle方法的具体用法？Python utils.shuffle怎么用？Python utils.shuffle使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils的用法示例。

在下文中一共展示了utils.shuffle方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: reset

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def reset(self):
        """Resets the iterator to the beginning of the data."""
        self.curr_idx = 0
        #shuffle data in each bucket
        random.shuffle(self.idx)
        for i, buck in enumerate(self.sentences):
            self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
                                                                                            self.sentences[i],
                                                                                            self.characters[i],
                                                                                            self.label[i])

        self.ndindex = []
        self.ndsent = []
        self.ndchar = []
        self.ndlabel = []

        #for each bucket of data
        for i, buck in enumerate(self.sentences):
            #append the lists with an array
            self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
            self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
            self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
            self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype))

开发者ID:awslabs，项目名称:dynamic-training-with-apache-mxnet-on-aws，代码行数:25，代码来源:iterators.py

示例2: test_aom_static_norepeat

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_aom_static_norepeat(self):
        score = aom(self.scores, 3, method='static',
                    bootstrap_estimators=False,
                    random_state=42)

        assert_equal(score.shape, (4,))

        shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
        manual_scores = np.zeros([4, 3])
        manual_scores[:, 0] = np.max(self.scores[:, shuffled_list[0:2]],
                                     axis=1)
        manual_scores[:, 1] = np.max(self.scores[:, shuffled_list[2:4]],
                                     axis=1)
        manual_scores[:, 2] = np.max(self.scores[:, shuffled_list[4:6]],
                                     axis=1)

        manual_score = np.mean(manual_scores, axis=1)
        assert_array_equal(score, manual_score)

开发者ID:yzhao062，项目名称:pyod，代码行数:20，代码来源:test_combination.py

示例3: test_moa_static_norepeat

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_moa_static_norepeat(self):
        score = moa(self.scores, 3, method='static',
                    bootstrap_estimators=False, random_state=42)

        assert_equal(score.shape, (4,))

        shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
        manual_scores = np.zeros([4, 3])
        manual_scores[:, 0] = np.mean(self.scores[:, shuffled_list[0:2]],
                                      axis=1)
        manual_scores[:, 1] = np.mean(self.scores[:, shuffled_list[2:4]],
                                      axis=1)
        manual_scores[:, 2] = np.mean(self.scores[:, shuffled_list[4:6]],
                                      axis=1)

        manual_score = np.max(manual_scores, axis=1)
        assert_array_equal(score, manual_score)

开发者ID:yzhao062，项目名称:pyod，代码行数:19，代码来源:test_combination.py

示例4: fit

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def fit(self,
            texts: List[List[str]],
            adjust_passes=True,
            test_size=0.1,
            random_state=123,
            dictionary: Optional[gensim.corpora.Dictionary] = None) -> None:
        texts = shuffle(texts)
        dictionary = dictionary or self._make_dictionary(texts)
        corpus = self._make_corpus(texts=texts, dictionary=dictionary)
        train, test = train_test_split(corpus, test_size=test_size, random_state=random_state)
        passes = np.clip(int(round(100000 / (len(corpus) + 1))), 1, 20) if adjust_passes else 1
        self._lda = gensim.models.LdaModel(
            alpha='auto',
            corpus=train,
            num_topics=self.n_topics,
            id2word=dictionary,
            iterations=self.iterations,
            passes=passes)
        self.log_perplexity = self._lda.log_perplexity(test)
        logger.info('log_perplexity=%s', self.log_perplexity)

开发者ID:m3dev，项目名称:redshells，代码行数:22，代码来源:lda_model.py

示例5: test_importances

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_importances():
    # Check variable importances.
    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=1)

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)

        clf.fit(X, y)
        importances = clf.feature_importances_

        assert_equal(importances.shape[0], 10)
        assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
                     True)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_weight_boosting.py

示例6: test_learning_curve_batch_and_incremental_learning_are_equal

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
                                            shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:24，代码来源:test_validation.py

示例7: check_cross_val_predict_multiclass

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:25，代码来源:test_validation.py

示例8: test_power_transformer_nans

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def test_power_transformer_nans(method):
    # Make sure lambda estimation is not influenced by NaN values
    # and that transform() supports NaN silently

    X = np.abs(X_1col)
    pt = PowerTransformer(method=method)
    pt.fit(X)
    lmbda_no_nans = pt.lambdas_[0]

    # concat nans at the end and check lambda stays the same
    X = np.concatenate([X, np.full_like(X, np.nan)])
    X = shuffle(X, random_state=0)

    pt.fit(X)
    lmbda_nans = pt.lambdas_[0]

    assert_almost_equal(lmbda_no_nans, lmbda_nans, decimal=5)

    X_trans = pt.transform(X)
    assert_array_equal(np.isnan(X_trans), np.isnan(X))

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:22，代码来源:test_data.py

示例9: read_train_sets

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def read_train_sets(train_path, image_size, classes, validation_size):
  class DataSets(object):
    pass
  data_sets = DataSets()

  images, labels, img_names, cls = load_train(train_path, image_size, classes)
  images, labels, img_names, cls = shuffle(images, labels, img_names, cls)  

  if isinstance(validation_size, float):
    validation_size = int(validation_size * images.shape[0])

  validation_images = images[:validation_size]
  validation_labels = labels[:validation_size]
  validation_img_names = img_names[:validation_size]
  validation_cls = cls[:validation_size]

  train_images = images[validation_size:]
  train_labels = labels[validation_size:]
  train_img_names = img_names[validation_size:]
  train_cls = cls[validation_size:]

  data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
  data_sets.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)

  return data_sets

开发者ID:legolas123，项目名称:cv-tricks.com，代码行数:27，代码来源:dataset.py

示例10: next_batch

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def next_batch(self, batch_size):
    """Return the next `batch_size` examples from this data set."""
    start = self._index_in_epoch
    self._index_in_epoch += batch_size

    if self._index_in_epoch > self._num_examples:
      # Finished epoch
      self._epochs_completed += 1

      # # Shuffle the data (maybe)
      # perm = np.arange(self._num_examples)
      # np.random.shuffle(perm)
      # self._images = self._images[perm]
      # self._labels = self._labels[perm]
      # Start next epoch

      start = 0
      self._index_in_epoch = batch_size
      assert batch_size <= self._num_examples
    end = self._index_in_epoch

    return self._images[start:end], self._labels[start:end], self._ids[start:end], self._cls[start:end]

开发者ID:PacktPublishing，项目名称:Neural-Network-Programming-with-TensorFlow，代码行数:24，代码来源:dataset.py

示例11: read_train_sets

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def read_train_sets(train_path, image_size, classes, validation_size=0):
  class DataSets(object):
    pass
  data_sets = DataSets()

  images, labels, ids, cls = load_train(train_path, image_size, classes)
  images, labels, ids, cls = shuffle(images, labels, ids, cls)  # shuffle the data

  if isinstance(validation_size, float):
    validation_size = int(validation_size * images.shape[0])

  validation_images = images[:validation_size]
  validation_labels = labels[:validation_size]
  validation_ids = ids[:validation_size]
  validation_cls = cls[:validation_size]

  train_images = images[validation_size:]
  train_labels = labels[validation_size:]
  train_ids = ids[validation_size:]
  train_cls = cls[validation_size:]

  data_sets.train = DataSet(train_images, train_labels, train_ids, train_cls)
  data_sets.valid = DataSet(validation_images, validation_labels, validation_ids, validation_cls)

  return data_sets

开发者ID:PacktPublishing，项目名称:Neural-Network-Programming-with-TensorFlow，代码行数:27，代码来源:dataset.py

示例12: load_mnist

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def load_mnist():
    with open('mnist/train-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_train = data[16:].reshape(60000, 28 * 28).astype(np.float32)
    with open('mnist/train-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_train = data[8:].reshape(60000).astype(np.uint8)

    with open('mnist/t10k-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_test = data[16:].reshape(10000, 28 * 28).astype(np.float32)
    with open('mnist/t10k-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_test = data[8:].reshape(10000).astype(np.uint8)

    X_train, y_train = shuffle(X_train, y_train)
    X_test, y_test = shuffle(X_test, y_test)

    X_train /= 255.
    X_test /= 255.

    return X_train, y_train, X_test, y_test

开发者ID:hjweide，项目名称:adversarial-autoencoder，代码行数:24，代码来源:utils.py

示例13: gen_biased_data

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def gen_biased_data(func, pos_ratio, N):
    '''
    Generate N data points on function func, with pos_ratio percentage of the 
    data points to have a positive label.
    '''
    pos = []
    neg = []
    i = 0
    while len(pos) < pos_ratio * N or len(neg) < N - pos_ratio * N:
        x = np.random.uniform(func.x_range[0], func.x_range[1])
        y = func(x)
        if y > 0:
            if len(pos) < pos_ratio * N:
                pos.append(np.hstack((x, y)))
        elif len(neg) < N - pos_ratio * N:
            neg.append(np.hstack((x, y)))
    xy = np.vstack((pos, neg))
    xy = shuffle(xy)
    return xy[:, :-1], xy[:, -1]

开发者ID:zi-w，项目名称:Kitchen2D，代码行数:21，代码来源:helper.py

示例14: getKaggleMNIST

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def getKaggleMNIST(file_path):

    # MNIST data:
    # column 0 is labels
    # column 1-785 is data, with values 0 .. 255
    # total size of CSV: (42000, 1, 28, 28)

    train = pd.read_csv(file_path)
    train = train.as_matrix()
    train = shuffle(train)

    Xtrain = train[:-1000, 1:] / 255
    Ytrain = train[:-1000, 0].astype(np.int32)
    Xtest  = train[-1000:, 1:] / 255
    Ytest  = train[-1000:, 0].astype(np.int32)

    return Xtrain, Ytrain, Xtest, Ytest

开发者ID:FederatedAI，项目名称:FATE，代码行数:19，代码来源:functional_autoencoder_test.py

示例15: getSpecs

# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import shuffle [as 别名]
def getSpecs(path):
    
    specs = []
    noise = []

    # Get mel-specs for file
    for spec in audio.specsFromFile(path,
                                    rate=cfg.SAMPLE_RATE,
                                    seconds=cfg.SPEC_LENGTH,
                                    overlap=cfg.SPEC_OVERLAP,
                                    minlen=cfg.SPEC_MINLEN,
                                    fmin=cfg.SPEC_FMIN,
                                    fmax=cfg.SPEC_FMAX,
                                    spec_type=cfg.SPEC_TYPE,
                                    shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])):

        # Determine signal to noise ratio
        s2n = audio.signal2noise(spec)
        specs.append(spec)
        noise.append(s2n)

    # Shuffle arrays (we want to select randomly later)
    specs, noise = shuffle(specs, noise, random_state=RANDOM)

    return specs, noise

开发者ID:kahst，项目名称:BirdCLEF-Baseline，代码行数:27，代码来源:spec.py

注：本文中的sklearn.utils.shuffle方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。