当前位置: 首页>>代码示例>>Python>>正文


Python cross_validation.train_test_split方法代码示例

本文整理汇总了Python中sklearn.cross_validation.train_test_split方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.train_test_split方法的具体用法?Python cross_validation.train_test_split怎么用?Python cross_validation.train_test_split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cross_validation的用法示例。


在下文中一共展示了cross_validation.train_test_split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: split_to_test_and_train

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_to_test_and_train(data, labels, entities, test_size=DEFAULT_TEST_SIZE):
    d_train, d_test, l_train, l_test, c_train, c_test = train_test_split(data, labels, entities, test_size=test_size)
    d_test_2 = []
    l_test_2 = []
    c_test_2 = []

    train_dict = {}
    for d in d_train:
        train_dict[d] = 1

    for d,l,c in zip(d_test, l_test, c_test):
        if (train_dict.has_key(d)):
            continue
        d_test_2.append(d)
        l_test_2.append(l)
        c_test_2.append(c)

    return (d_train, d_test_2, l_train, l_test_2, c_train, c_test_2)

# utility to extracts entities from preproceseed files 
开发者ID:CatalystCode,项目名称:corpus-to-graph-ml,代码行数:22,代码来源:data_preparation_tools.py

示例2: split_train_test

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_train_test(n_classes):
    from sklearn.datasets import load_digits

    n_labeled = 5
    digits = load_digits(n_class=n_classes)  # consider binary case
    X = digits.data
    y = digits.target
    print(np.shape(X))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    while len(np.unique(y_train[:n_labeled])) < n_classes:
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.33)

    trn_ds = Dataset(X_train, np.concatenate(
        [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)]))
    tst_ds = Dataset(X_test, y_test)

    return trn_ds, tst_ds, digits 
开发者ID:ntucllab,项目名称:libact,代码行数:21,代码来源:label_digits.py

示例3: fit

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def fit(self, X, y):
        X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=self.test_size,
        random_state=self.random_state)
        dim = X_train.shape[1]
        self.indices_ = tuple(range(dim))
        self.subsets_ = [self.indices_]
        score = self._calc_score(X_train, y_train,
        X_test, y_test, self.indices_)
        self.scores_ = [score]
        while dim > self.k_features:
            scores = []
            subsets = []
            for p in combinations(self.indices_, r=dim-1):
                score = self._calc_score(X_train, y_train,
                X_test, y_test, p)
                scores.append(score)
                subsets.append(p)
            best = np.argmax(scores)
            self.indices_ = subsets[best]
            self.subsets_.append(self.indices_)
            dim -= 1
            self.scores_.append(scores[best])
        self.k_score_ = self.scores_[-1]
        return self 
开发者ID:rrlyman,项目名称:PythonMachineLearningExamples,代码行数:27,代码来源:p119_squential_backward_selection.py

示例4: main

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
  # Load dataset.
  iris = datasets.load_iris()
  x_train, x_test, y_train, y_test = cross_validation.train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  # You can define you configurations by providing a RunConfig object to
  # estimator to control session configurations, e.g. num_cores
  # and gpu_memory_fraction
  run_config = tf.contrib.learn.estimators.RunConfig(
      num_cores=3, gpu_memory_fraction=0.6)

  # Build 3 layer DNN with 10, 20, 10 units respectively.
  feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
      x_train)
  classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
                                              hidden_units=[10, 20, 10],
                                              n_classes=3,
                                              config=run_config)

  # Fit and predict.
  classifier.fit(x_train, y_train, steps=200)
  predictions = list(classifier.predict(x_test, as_iterable=True))
  score = metrics.accuracy_score(y_test, predictions)
  print('Accuracy: {0:f}'.format(score)) 
开发者ID:tobegit3hub,项目名称:deep_image_model,代码行数:27,代码来源:iris_run_config.py

示例5: main

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
  # Load dataset.
  iris = learn.datasets.load_dataset('iris')
  x_train, x_test, y_train, y_test = cross_validation.train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  # Build 3 layer DNN with 10, 20, 10 units respectively.
  feature_columns = learn.infer_real_valued_columns_from_input(x_train)
  classifier = learn.DNNClassifier(
      feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3)

  # Fit and predict.
  classifier.fit(x_train, y_train, steps=200)
  predictions = list(classifier.predict(x_test, as_iterable=True))
  score = metrics.accuracy_score(y_test, predictions)
  print('Accuracy: {0:f}'.format(score)) 
开发者ID:tobegit3hub,项目名称:deep_image_model,代码行数:18,代码来源:iris.py

示例6: main

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
  iris = datasets.load_iris()
  x_train, x_test, y_train, y_test = train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
      x_train)
  classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
                                              hidden_units=[10, 20, 10],
                                              n_classes=3,
                                              optimizer=optimizer_exp_decay)

  classifier.fit(x_train, y_train, steps=800)
  predictions = list(classifier.predict(x_test, as_iterable=True))
  score = metrics.accuracy_score(y_test, predictions)
  print('Accuracy: {0:f}'.format(score)) 
开发者ID:tobegit3hub,项目名称:deep_image_model,代码行数:18,代码来源:iris_custom_decay_dnn.py

示例7: test_get_top_features

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def test_get_top_features(self):
        M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
        M = utils.cast_np_sa_to_nd(M)
        M_train, M_test, labels_train, labels_test = train_test_split(
                M, 
                labels)
        clf = RandomForestClassifier(random_state=0)
        clf.fit(M_train, labels_train)

        ctrl_feat_importances = clf.feature_importances_
        ctrl_col_names = ['f{}'.format(i) for i in xrange(15)]
        ctrl_feat_ranks = np.argsort(ctrl_feat_importances)[::-1][:10]
        ctrl = utils.convert_to_sa(
                zip(ctrl_col_names, ctrl_feat_importances),
                col_names=('feat_name', 'score'))[ctrl_feat_ranks]

        res = dsp.get_top_features(clf, M, verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res))

        res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res)) 
开发者ID:dssg,项目名称:diogenes,代码行数:23,代码来源:test_display.py

示例8: classify

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def classify(X, y, cl, name=''):
    """Classification using gene features"""

    from sklearn.metrics import classification_report, accuracy_score
    np.random.seed()
    ind = np.random.permutation(len(X))

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest  = train_test_split(X, y, test_size=0.4)
    #print X
    cl.fit(Xtrain, ytrain)
    ypred = cl.predict(Xtest)

    print (classification_report(ytest, ypred))
    #print accuracy_score(ytest, ypred)
    from sklearn import cross_validation
    yl = pd.Categorical(y).labels
    sc = cross_validation.cross_val_score(cl, X, yl, scoring='roc_auc', cv=5)
    print("AUC: %0.2f (+/- %0.2f)" % (sc.mean(), sc.std() * 2))
    return cl 
开发者ID:dmnfarrell,项目名称:smallrnaseq,代码行数:22,代码来源:analysis.py

示例9: load_data

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def load_data():
    global training_data, testing_data

    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

    xs = lfw_people.data
    ys = lfw_people.target

    inputs = []
    labels = list(ys)

    for face in xs:
        V = Vol(50, 37, 1, 0.0)
        V.w = list(face)
        inputs.append(augment(V, 30))

    x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25)

    training_data = zip(x_tr, y_tr)
    testing_data = zip(x_te, y_te)

    print 'Dataset made...' 
开发者ID:benglard,项目名称:ConvNetPy,代码行数:24,代码来源:faces.py

示例10: load_train_val_test

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def load_train_val_test(self, only_test=False):
        X_train, X_rest, y_train, y_rest = train_test_split(self.X, self.y,
                                                            test_size=0.3,
                                                            stratify=self.y,
                                                            random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_rest, y_rest,
                                                        test_size=0.5,
                                                        stratify=y_rest,
                                                        random_state=42)

        if not only_test:
            print("\nPreparing training set...")
            training = prepare_dataset(X_train, y_train, self.pipeline,
                                       self.y_one_hot)
            print("\nPreparing validation set...")
            validation = prepare_dataset(X_val, y_val, self.pipeline,
                                         self.y_one_hot)
        print("\nPreparing test set...")
        testing = prepare_dataset(X_test, y_test, self.pipeline,
                                  self.y_one_hot)

        if only_test:
            return testing
        else:
            return training, validation, testing 
开发者ID:cbaziotis,项目名称:datastories-semeval2017-task4,代码行数:27,代码来源:data_loader.py

示例11: _shuffle_images_for_target

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def _shuffle_images_for_target(self, data, target):
    """
    Takes all the non-paired images for a given person, slices them into training, validation, and
    training sets, and shuffles within each of these sets.
    """
    # train_test_split can only partition into two sets, so we have to partition into two sets, then
    # further partition the validation set into a test set.
    (train_data, other_data, train_target, other_target) = train_test_split(data, target,
      train_size=0.7, test_size=0.3, random_state=0)
    self._train["data"].extend(train_data)
    self._train["target"].extend(train_target)

    (validation_data, test_data, validation_target, test_target) = train_test_split(other_data,
      other_target, train_size=0.9, test_size=0.1, random_state=0)
    self._validation["data"].extend(validation_data)
    self._validation["target"].extend(validation_target)
    self._test["data"].extend(test_data)
    self._test["target"].extend(test_target) 
开发者ID:BradNeuberg,项目名称:personal-photos-model,代码行数:20,代码来源:prepare_data.py

示例12: splitData

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def splitData(self, dataFile, test_size):
        # 加载数据集
        header = ['user_id', 'item_id', 'rating', 'timestamp']
        df = pd.read_csv(dataFile, sep='\t', names=header)

        self.n_users = df.user_id.unique().shape[0]
        self.n_items = df.item_id.unique().shape[0]

        print('Number of users = ' + str(self.n_users) +
              ' | Number of items = ' + str(self.n_items))

        # 拆分数据集:  用户+电影
        self.train_data, self.test_data = cv.train_test_split(
            df, test_size=test_size)
        print('分离训练集和测试集成功', file=sys.stderr)
        print('len(train) = %s' % np.shape(self.train_data)[0], file=sys.stderr)
        print('len(test) = %s' % np.shape(self.test_data)[0], file=sys.stderr) 
开发者ID:apachecn,项目名称:AiLearning,代码行数:19,代码来源:sklearn-RS-demo-cf-item-test.py

示例13: _split_data_sets

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def _split_data_sets(details):
    """
    Shuffles and splits our datasets into training and validation sets.
    """
    image_paths = details["image_paths"]
    targets = details["targets"]

    print "\tShuffling data..."
    (image_paths, targets) = shuffle(image_paths, targets, random_state=0)

    print "\tSplitting data 80% training, 20% validation..."
    return train_test_split(image_paths, targets, train_size=0.8, test_size=0.2, \
      random_state=0) 
开发者ID:BradNeuberg,项目名称:cloudless,代码行数:15,代码来源:prepare_data.py

示例14: split_train_test

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_train_test(test_size):
    # choose a dataset with unbalanced class instances
    data = make_multilabel_classification(
        n_samples=300, n_classes=10, allow_unlabeled=False)
    X = StandardScaler().fit_transform(data[0])
    Y = data[1]

    X_trn, X_tst, Y_trn, Y_tst = train_test_split(X, Y, test_size=test_size)

    trn_ds = Dataset(X_trn, Y_trn[:5].tolist() + [None] * (len(Y_trn) - 5))
    tst_ds = Dataset(X_tst, Y_tst.tolist())

    fully_labeled_trn_ds = Dataset(X_trn, Y_trn)

    return trn_ds, tst_ds, fully_labeled_trn_ds 
开发者ID:ntucllab,项目名称:libact,代码行数:17,代码来源:multilabel_plot.py

示例15: main

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main():
#        Define X and y
# #        Load data
        PATH = "./data/64_64_1/offset_1.3/"
        X = np.load(PATH + "basic_dataset_img.npz")
        y = np.load(PATH + "basic_dataset_pts.npz")
        X = X['arr_0']
        y = y['arr_0'].reshape(-1, 136)
        

        print("Define X and Y")
        print("=======================================")
        
        # Split train / test dataset
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        print("Success of getting train / test dataset")
        print("=======================================")
        print("X_train: ", X_train.shape)
        print("y_train: ", y_train.shape)
        print("X_test: ", X_test.shape)
        print("y_test: ", y_test.shape)
        print("=======================================")

        model.compile(loss=smoothL1, optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['mape'])
        print(model.summary())
        # checkpoint
        filepath="./mobilenet_checkpoints/smooth_L1-{epoch:02d}-{val_mean_absolute_percentage_error:.5f}.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        callbacks_list = [checkpoint]
        history = model.fit(X_train, y_train, batch_size=64, epochs=10000, shuffle=True,\
                            verbose=1, validation_data=(X_test, y_test), callbacks=callbacks_list)

        # Save model
        model.save("./model/face_landmark_dnn.h5")
        print("=======================================")
        print("Save Final Model")
        print("=======================================") 
开发者ID:junhwanjang,项目名称:face_landmark_dnn,代码行数:39,代码来源:train_mobilenets.py


注:本文中的sklearn.cross_validation.train_test_split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。