当前位置: 首页>>代码示例>>Python>>正文


Python model_selection.train_test_split函数代码示例

本文整理汇总了Python中sklearn.model_selection.train_test_split函数的典型用法代码示例。如果您正苦于以下问题:Python train_test_split函数的具体用法?Python train_test_split怎么用?Python train_test_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了train_test_split函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: learning

	def learning( self):

		X = self.X
		y = self.y
		print( "Shape of X and y are", X.shape, y.shape)

		X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,
			test_size=0.2, random_state=42)
		X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train,
														  test_size=0.2, random_state=42)

		val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val,
														early_stopping_rounds=200)
		model = skflow.TensorFlowDNNRegressor(hidden_units=[100, 50, 10], steps=5000)
		model.fit(X_train, y_train, val_monitor)

		yP = model.predict(X_test)
		score_r2 = metrics.r2_score(y_test, yP)
		score_MedAE = metrics.median_absolute_error(y_test, yP)
		print('Accuracy')
		print('--------')
		print('R2: {0:f}, MedAE: {1:f}'.format(score_r2, score_MedAE))

		if self.graph:
			kutil.regress_show4( y_test, yP)
开发者ID:jskDr,项目名称:jamespy_py3,代码行数:25,代码来源:jmultidk.py

示例2: lda_tuner

def lda_tuner(ingroup_otu, best_models):

    best_score = -1*np.inf
    dtp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
    twp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
    topic_series = [3]
    X = ingroup_otu.values
    eval_counter = 0

    for topics in topic_series: 
        for dtp in dtp_series:
            for twp in twp_series:
                eval_counter +=1
                X_train, X_test = train_test_split(X, test_size=0.5)
                lda = LatentDirichletAllocation(n_topics=topics, 
                                                doc_topic_prior=dtp, 
                                                topic_word_prior=twp, 
                                                learning_method='batch',
                                                random_state=42,
                                                max_iter=20)
                lda.fit(X_train)
                this_score = lda.score(X_test)
                this_perplexity = lda.perplexity(X_test)
                if this_score > best_score:
                    best_score = this_score
                    print "New Max Likelihood: {}".format(best_score)

                print "#{}: n:{}, dtp:{}, twp:{}, score:{}, perp:{}".format(eval_counter, 
                                                                 topics, dtp, twp,
                                                                 this_score, this_perplexity)

                best_models.append({'n': topics, 'dtp': dtp, 'twp': twp,
                                    'score': this_score, 'perp': this_perplexity})
                if (dtp == dtp_series[-1]) and (twp == twp_series[-1]):
                    eval_counter +=1
                    X_train, X_test = train_test_split(X, test_size=0.5)
                    lda = LatentDirichletAllocation(n_topics=topics, 
                                                    doc_topic_prior=1./topics, 
                                                    topic_word_prior=1./topics, 
                                                    learning_method='batch',
                                                    random_state=42,
                                                    max_iter=20)
                    lda.fit(X_train)
                    this_score = lda.score(X_test)
                    this_perplexity = lda.perplexity(X_test)
                    if this_score > best_score:
                        best_score = this_score
                        print "New Max Likelihood: {}".format(best_score)

                    print "#{}: n:{}, dtp:{}, twp:{}, score:{} perp: {}".format(eval_counter, 
                                                                                topics, 
                                                                                (1./topics), 
                                                                                (1./topics),
                                                                                this_score,
                                                                                this_perplexity)

                    best_models.append({'n': topics, 'dtp': (1./topics), 
                                        'twp': (1./topics), 'score': this_score,
                                        'perp': this_perplexity})
    return best_models
开发者ID:karoraw1,项目名称:GLM_Wrapper,代码行数:60,代码来源:otu_ts_support.py

示例3: test_base_estimator

def test_base_estimator():
    # Check base_estimator and its default values.
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng)

    ensemble = BaggingClassifier(None, n_jobs=3, random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = BaggingClassifier(Perceptron(), n_jobs=3, random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, Perceptron))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng)

    ensemble = BaggingRegressor(None, n_jobs=3, random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = BaggingRegressor(SVR(), n_jobs=3, random_state=0).fit(X_train, y_train)
    assert_true(isinstance(ensemble.base_estimator_, SVR))
开发者ID:agamemnonc,项目名称:scikit-learn,代码行数:32,代码来源:test_bagging.py

示例4: stacking

def stacking():
    X_train,X_test,Y_train,Y_test =train_test_split(x,y,
                                                        random_state=35,
                                                        test_size=0.2)
    x1_test =np.zeros((X_test.shape[0],len(classifiers)))#存储第一层测试集的输出结果
    x1_train =np.zeros((X_train.shape[0],len(classifiers)))
    print 'x1.shape',np.shape(x1_train)
    print 'y....',np.shape(Y_train)
    accuracy = np.zeros(len(classifiers))#每个模型的准确率
    for train_index, test_index in sss.split(X_train, Y_train):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf_num = 0
        for clf in classifiers:
            clf_name = clf.__class__.__name__
            clf.fit(x_train, y_train)
            x1_train[test_index,clf_num]=clf.predict(x_test)#下层模型的训练集输入是上层模型对于对应测试集的预测输出
            x1_test[:, clf_num] += clf.predict(X_test)#直接对测试集进行预测,总共有十次,进行平均
            accuracy[clf_num] += (y_test == x1_train[test_index,clf_num]).mean()#该模型的准确率,十次平均
            clf_num += 1


    print np.shape(x1_train)
    print np.shape(y_train)
    x2_train,x2_test,y2_train,y2_test =train_test_split(x1_train,Y_train,test_size=0.1)
    lr =LogisticRegression()
    lr.fit(x2_train,y2_train)
    print lr.predict(x1_test)
    print Y_test
开发者ID:Xls1994,项目名称:DeepLearning,代码行数:29,代码来源:stackmodel.py

示例5: test_thresholded_scorers

def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    assert_raises(ValueError, get_scorer('roc_auc'), clf, X_test, y_test)
开发者ID:AlexanderFabisch,项目名称:scikit-learn,代码行数:35,代码来源:test_score_objects.py

示例6: train_test_split_mock_pandas

def train_test_split_mock_pandas():
    # X mock dataframe
    X_df = MockDataFrame(X)
    X_train, X_test = train_test_split(X_df)
    assert_true(isinstance(X_train, MockDataFrame))
    assert_true(isinstance(X_test, MockDataFrame))
    X_train_arr, X_test_arr = train_test_split(X_df)
开发者ID:absolutelyNoWarranty,项目名称:scikit-learn,代码行数:7,代码来源:test_split.py

示例7: read

def read(d):
    data = pd.read_table(path+uni+"_"+d+".txt",delimiter='\t')
    data['label'] = 0
    for i in range(len(data.index)):
        if data.iloc[i,3]<1000:
            data.iloc[i,len(data.columns)-1]=1
        else:
            data.iloc[i,len(data.columns)-1]=0
    X_0 = data.iloc[:,7:len(data.columns)-1]
    y_0 = data.iloc[:,len(data.columns)-1]    
    X_0,X_,y_0,y_ = train_test_split(X_0,y_0,test_size=0.0,random_state=3421)
    X_1,X_test,y_1,y_test = train_test_split(X_0,y_0,test_size=0.2,random_state=1257)
    X_2,X_3,y_2,y_3 = train_test_split(X_1,y_1,test_size=1-label_rate,random_state=11)

##############  整体预测与交互检验  ###########
#    scores_all = cross_val_score(RandomForestClassifier(n_estimators=500), X_1, y_1, cv=5, scoring='accuracy')
#    score_all_mean =scores_all.mean()
#    print(d+'5折交互检验:'+str(score_all_mean))
#    rf_all = RandomForestClassifier(n_estimators=500).fit(X_1,y_1)
#    answer_rf_all = rf_all.predict(X_test)
#    accuracy_all = metrics.accuracy_score(y_test,answer_rf_all)
#    print(d+'整体预测:'+str(accuracy_all))
################################################
    
    return data,X_2,y_2,X_3,y_3,X_test,y_test
开发者ID:IamCatkin,项目名称:Learning-Python,代码行数:25,代码来源:SSL-2.py

示例8: reduce_dataset

def reduce_dataset(uid):
    ds = load_validation_dataframe(uid)
    X_train, X_valid, X_test, y_train, y_valid, y_test = ds

    X=pd.concat((X_train,X_valid,X_test))
    y=np.concatenate((y_train,y_valid,y_test))

    if len(y) > 5000:
        neg_inds = [i for i, v in enumerate(y) if v==0]
        pos_inds = [i for i, v in enumerate(y) if v==1]

        n_neg = 5000 - len(pos_inds)
        neg_inds = sample(neg_inds, n_neg)
        inds = sorted(neg_inds + pos_inds)
        X = X.iloc[inds,:]
        y = y[inds]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.66666, random_state=42)

    Xtrain_fname = join(DATAFRAMES_FOLDER, "dfXtrain_%d_small.pickle" % uid)
    Xvalid_fname = join(DATAFRAMES_FOLDER, "dfXvalid_%d_small.pickle" % uid)
    Xtest_fname = join(DATAFRAMES_FOLDER, "dfXtestv_%d_small.pickle" % uid)
    ys_fname = join(DATAFRAMES_FOLDER, "ysv_%d_small.pickle" % uid)

    X_train.to_pickle(Xtrain_fname)
    X_valid.to_pickle(Xvalid_fname)
    X_test.to_pickle(Xtest_fname)
    pickle.dump((y_train, y_valid, y_test), open(ys_fname, 'wb'))

    return X_train, X_valid, X_test, y_train, y_valid, y_test
开发者ID:pablocelayes,项目名称:sna_classifier,代码行数:30,代码来源:datasets.py

示例9: test_classification_with_validation

    def test_classification_with_validation(self):
        tol_places = 4
        data_x, data_y = make_classification(n_samples=100, n_features=7,
                                             n_redundant=0, n_informative=7,
                                             n_clusters_per_class=2,
                                             random_state=3227)
        label_y = np.where(data_y == 0, 'A', 'B')

        train_x, test_x, train_y, test_y = train_test_split(data_x, label_y,
                                                            test_size=0.25,
                                                            random_state=3227)

        train_x, validate_x, train_y, validate_y = train_test_split(
            train_x, train_y, test_size=0.5, random_state=3227)

        params = {
            'ref_functions': ('linear_cov',),
            'criterion_type': 'bias_retrain',
            'criterion_minimum_width': 5,
            'max_layer_count': 5,
            'verbose': 0,
            'n_jobs': 'max'
        }
        model = Classifier(**params)
        model.fit(train_x, train_y, validation_data=(validate_x, validate_y))
        pred_y = model.predict_proba(test_x)
        roc_auc = roc_auc_score(model.le.transform(test_y), pred_y)
        self.assertAlmostEqual(roc_auc, 0.76, places=tol_places)

        no1 = model.predict_neuron_output(test_x, 0, 0)
        no2 = model.predict_neuron_output(test_x, 1, 0)
开发者ID:kvoyager,项目名称:GmdhPy,代码行数:31,代码来源:test_model.py

示例10: __init__

    def __init__(self, root, train=True, val=False, color_space='lab', transform=None, test_size=0.9, val_size=0.125, location='cpu'):
        """
            color_space: 'yub' or 'lab'
        """
        self.root_dir = root
        all_files = []
        for r, _, files in walk(self.root_dir):
            for f in files:
                if f.endswith('.jpg'):
                    all_files.append(join(r, f))
        train_val_files, test_files = train_test_split(
            all_files, test_size=test_size, random_state=69)
        train_files, val_files = train_test_split(train_val_files,
                                                  test_size=val_size, random_state=69)
        if (train and val):
            self.filenames = val_files
        elif train:
            self.filenames = train_files
        else:
            self.filenames = test_files

        self.color_space = color_space
        if (self.color_space not in ['rgb', 'lab']):
            raise(NotImplementedError)
        self.transform = transform
        self.location = location
        self.nnenc = NNEncode(location=self.location)
        self.train = train
开发者ID:stanleynguyen,项目名称:corolization,代码行数:28,代码来源:dataset.py

示例11: main

def main(_):

    if FLAGS.dataset == 'cifar10':
        (X_train, y_train), (_, _) = cifar10.load_data()
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
    else:
        with open('data/train.p', mode='rb') as f:
            train = pickle.load(f)
        X_train, X_val, y_train, y_val = train_test_split(train['features'], train['labels'], test_size=0.33, random_state=0)

    train_output_file = "{}_{}_{}.p".format(FLAGS.network, FLAGS.dataset, 'bottleneck_features_train')
    validation_output_file = "{}_{}_{}.p".format(FLAGS.network, FLAGS.dataset, 'bottleneck_features_validation')

    print("Resizing to", (w, h, ch))
    print("Saving to ...")
    print(train_output_file)
    print(validation_output_file)

    with tf.Session() as sess:
        K.set_session(sess)
        K.set_learning_phase(1)

        model = create_model()

        print('Bottleneck training')
        train_gen = gen(sess, X_train, y_train, batch_size)
        bottleneck_features_train = model.predict_generator(train_gen(), X_train.shape[0])
        data = {'features': bottleneck_features_train, 'labels': y_train}
        pickle.dump(data, open(train_output_file, 'wb'))

        print('Bottleneck validation')
        val_gen = gen(sess, X_val, y_val, batch_size)
        bottleneck_features_validation = model.predict_generator(val_gen(), X_val.shape[0])
        data = {'features': bottleneck_features_validation, 'labels': y_val}
        pickle.dump(data, open(validation_output_file, 'wb'))
开发者ID:AbdulTheProgrammer,项目名称:CarND-Transfer-Learning-Lab,代码行数:35,代码来源:run_bottleneck.py

示例12: split_data

def split_data(data):
    X_train, X_test, Y_train, Y_test = train_test_split(data.loc[:, data.columns != label], data[label],
                                                        train_size=train_size + validation_size, test_size=test_size,
                                                        shuffle=False, random_state=0)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train,
                                                      train_size=train_size / (train_size + validation_size),
                                                      test_size=validation_size / (train_size + validation_size),
                                                      shuffle=False, random_state=0)
    return X_train, X_val, X_test, Y_train, Y_val, Y_test
开发者ID:michaeltur3,项目名称:ML_HW3,代码行数:9,代码来源:prepare_data.py

示例13: test_split

  def test_split(self):
    ds = self.create_dataset()
    indexes = list(range(len(ds)))
    train, test = train_test_split(indexes)
    train, valid = train_test_split(train)

    splitter = SpecifiedIndexSplitter(train, valid, test)
    train_ds, valid_ds, test_ds = splitter.train_valid_test_split(ds)

    self.assertTrue(np.all(train_ds.X == ds.X[train]))
    self.assertTrue(np.all(valid_ds.X == ds.X[valid]))
    self.assertTrue(np.all(test_ds.X == ds.X[test]))
开发者ID:ktaneishi,项目名称:deepchem,代码行数:12,代码来源:test_specified_index_splitter.py

示例14: get_train_valid_test_split

def get_train_valid_test_split(n, train=0.7, valid=0.1, test=0.2, shuffle=False):
    other_split = valid+test
    if train+other_split!=1:
        raise ValueError("Train, Valid, Test splits should sum to 1")
    train_set, other_set = train_test_split(range(1,n+1), 
                                            train_size=train, test_size=other_split, shuffle=shuffle)
    valid_set, test_set = train_test_split(other_set, 
                                           train_size=valid/other_split, 
                                           test_size=test/other_split,
                                           shuffle=False)
    print("train:{} valid:{} test:{}".format(len(train_set), len(valid_set), len(test_set)))
    return train_set, valid_set, test_set
开发者ID:chesterxgchen,项目名称:DeepLearningFrameworks,代码行数:12,代码来源:utils.py

示例15: preprocess

def preprocess(data, test_size, sample=None, scale=True):

    data_frame_all = pandas.read_table(data)
    df = data_frame_all

    # for simplicity for now--and since only 11093 or <3 % of our data, we're just gonna drop those rows
    no_null_df = df.dropna(axis=0, how='any')

    # this shows us that we no longer have null values
    no_null_df.isnull().values.any()

    # let's rename our new data frame df again.  we're left with 238907 rows
    df = no_null_df
    df_unprocessed = df

    if sample:
        df = df.sample(frac=sample)
        print("sampled")

    df = df[['order_estimated_driving_time_min','order_estimated_shopping_time_min']]
    df['total_time_min'] = df.sum(axis=1)
    df['time_in_hours'] = df.total_time_min.divide(60)


    target = df.time_in_hours * 15
    df = df.drop(['time_in_hours', 'total_time_min'], axis=1)


    s1 = target.std()
    s2 = 7.5 #our chosen std deviation

    m1 = target.mean()
    m2 = 15 #our chosen mean

    target = m2 + (target - m1) * s2/s1  #scale our output to a mean of 15 and std deviation of 3



    X = df
    y = target

    if scale:
        df_pp = preprocessing.scale(df)
        print("scaled")

        X_train, X_test, y_train, y_test = train_test_split(df_pp, target, test_size=test_size, random_state=42)

    else:
        df_pp = None
        X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=test_size, random_state=42)


    return df_unprocessed, df, df_pp, target, X, X_train, X_test, y, y_train, y_test
开发者ID:alexjacobs08,项目名称:ShiptProject,代码行数:53,代码来源:order_pay_model.py


注:本文中的sklearn.model_selection.train_test_split函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。