当前位置: 首页>>代码示例>>Python>>正文


Python model_selection.train_test_split方法代码示例

本文整理汇总了Python中sklearn.model_selection.train_test_split方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.train_test_split方法的具体用法?Python model_selection.train_test_split怎么用?Python model_selection.train_test_split使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection的用法示例。


在下文中一共展示了model_selection.train_test_split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: optimize_hyperparam

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
        X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle)

        def objective(hyperparams):
            model = XGBModel(n_estimators=self.n_est, **self.params, **hyperparams)
            model.fit(X=X_trn, y=y_trn,
                      eval_set=[(X_val, y_val)],
                      eval_metric=self.metric,
                      early_stopping_rounds=self.n_stop,
                      verbose=False)
            score = model.evals_result()['validation_0'][self.metric][model.best_iteration] * self.loss_sign

            return {'loss': score, 'status': STATUS_OK, 'model': model}

        trials = Trials()
        best = hyperopt.fmin(fn=objective, space=self.space, trials=trials,
                             algo=tpe.suggest, max_evals=n_eval, verbose=1,
                             rstate=self.random_state)

        hyperparams = space_eval(self.space, best)
        return hyperparams, trials 
开发者ID:jeongyoonlee,项目名称:Kaggler,代码行数:23,代码来源:automl.py

示例2: test_different_results

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2)) 
开发者ID:IBM,项目名称:differential-privacy-library,代码行数:27,代码来源:test_LogisticRegression.py

示例3: test_same_results

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2)) 
开发者ID:IBM,项目名称:differential-privacy-library,代码行数:21,代码来源:test_LogisticRegression.py

示例4: test_different_results

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LinearRegression()
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2)) 
开发者ID:IBM,项目名称:differential-privacy-library,代码行数:27,代码来源:test_LinearRegression.py

示例5: test_same_results

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, epsilon=float("inf"),
                               bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LinearRegression(normalize=False)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.allclose(predict1, predict2)) 
开发者ID:IBM,项目名称:differential-privacy-library,代码行数:22,代码来源:test_LinearRegression.py

示例6: test_with_iris

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf = GaussianNB(epsilon=5.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = clf.score(x_test, y_test)
        counts = clf.class_count_.copy()
        self.assertGreater(accuracy, 0.5)

        clf.partial_fit(x_train, y_train)
        new_counts = clf.class_count_
        self.assertEqual(np.sum(new_counts), np.sum(counts) * 2) 
开发者ID:IBM,项目名称:differential-privacy-library,代码行数:21,代码来源:test_GaussianNB.py

示例7: create_cancer_data

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def create_cancer_data():
    # Import cancer dataset
    cancer = (
        retrieve_dataset("breast-cancer.train.csv", na_values="?")
        .interpolate()
        .astype("int64")
    )
    cancer_target = cancer.iloc[:, 0]
    cancer_data = cancer.iloc[:, 1:]
    feature_names = cancer_data.columns.values
    target_names = ["no_cancer", "cancer"]
    # Split data into train and test
    x_train, x_test, y_train, y_validation = train_test_split(
        cancer_data, cancer_target, test_size=0.2, random_state=0
    )
    return x_train, x_test, y_train, y_validation, feature_names, target_names 
开发者ID:interpretml,项目名称:interpret-text,代码行数:18,代码来源:common_utils.py

示例8: create_simple_titanic_data

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def create_simple_titanic_data():
    titanic_url = (
        "https://raw.githubusercontent.com/amueller/"
        "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
    )
    data = read_csv(titanic_url)
    # fill missing values
    data = data.fillna(method="ffill")
    data = data.fillna(method="bfill")
    numeric_features = ["age", "fare"]
    categorical_features = ["embarked", "sex", "pclass"]

    y = data["survived"].values
    X = data[categorical_features + numeric_features]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    return X_train, X_test, y_train, y_test, numeric_features, categorical_features 
开发者ID:interpretml,项目名称:interpret-text,代码行数:21,代码来源:common_utils.py

示例9: mmb_evaluate_model

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
开发者ID:egaus,项目名称:MaliciousMacroBot,代码行数:22,代码来源:mmbot.py

示例10: bootstrapped_split

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def bootstrapped_split(car_ids, seed=args.seed):
    """
    # Arguments
        metadata: metadata.csv provided by Carvana (should include
        `train` column).

    # Returns
        A tuple (train_ids, test_ids)
    """
    all_ids = pd.Series(car_ids)
    train_ids, valid_ids = train_test_split(car_ids, test_size=args.test_size_float,
                                                     random_state=seed)

    np.random.seed(seed)
    bootstrapped_idx = np.random.random_integers(0, len(train_ids))
    bootstrapped_train_ids = train_ids[bootstrapped_idx]

    return generate_filenames(bootstrapped_train_ids.values), generate_filenames(valid_ids) 
开发者ID:killthekitten,项目名称:kaggle-carvana-2017,代码行数:20,代码来源:datasets.py

示例11: main

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import train_test_split

    db_name = 'diabetes'
    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)

    tmp = data_set.target
    tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
    data_set.target = tmpL

    X_train, X_test, y_train, y_test = train_test_split(
        data_set.data, data_set.target, test_size=0.4)

    mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
    elm = ELM(200).fit(X_train, y_train)

    print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
    print("ELM Accuracy %0.3f " % elm.score(X_test, y_test)) 
开发者ID:masaponto,项目名称:Python-ELM,代码行数:23,代码来源:ml_elm.py

示例12: train_test_val_split

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def train_test_val_split(X, Y, split=(0.2, 0.1), shuffle=True):
    """Split dataset into train/val/test subsets by 70:20:10(default).
    
    Args:
      X: List of data.
      Y: List of labels corresponding to data.
      split: Tuple of split ratio in `test:val` order.
      shuffle: Bool of shuffle or not.
      
    Returns:
      Three dataset in `train:test:val` order.
    """
    from sklearn.model_selection import train_test_split
    assert len(X) == len(Y), 'The length of X and Y must be consistent.'
    X_train, X_test_val, Y_train, Y_test_val = train_test_split(X, Y, 
        test_size=(split[0]+split[1]), shuffle=shuffle)
    X_test, X_val, Y_test, Y_val = train_test_split(X_test_val, Y_test_val, 
        test_size=split[1], shuffle=False)
    return (X_train, Y_train), (X_test, Y_test), (X_val, Y_val) 
开发者ID:luuil,项目名称:Tensorflow-Audio-Classification,代码行数:21,代码来源:audio_util.py

示例13: pipeline

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def pipeline(args):
    '''
    Runs the model loop.
    '''
    df = pd.read_csv(args.filename)
    df.loc[:,args.x_label] = df[args.x_label].fillna("None")
    if args.dedupe:
        df = df.drop_duplicates(subset='content')
    if args.reduce:
        df = restrict_sources(df)
    X = df[args.x_label]
    y = df[args.y_label]
    parser = spacy.load('en')
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    loop = ModelLoop(X_train, X_test, y_train, y_test, args.models,
                     args.iterations, args.output_dir,
                     thresholds = args.thresholds, ks = args.ks,
                     setting=args.features[0])
    loop.run() 
开发者ID:aldengolab,项目名称:fake-news-detection,代码行数:21,代码来源:run.py

示例14: plot_learning_curve

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def plot_learning_curve(x_train, y_train, subsets=20, mmr=None, cv=5, tool='matplotlib'):
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2)

    subset_sizes = np.exp(np.linspace(3, np.log(len(y_train)), subsets)).astype(int)

    results_list = [[], []]

    for subset_size in subset_sizes:
        logger.info('Performing cross validation on subset_size %d', subset_size)
        _, _, cv_score, roc_auc, _ = evaluate([x_train[:subset_size], y_train[:subset_size]],
                                              [x_test, y_test], cv=cv)

        results_list[0].append(1 - cv_score)
        results_list[1].append(1 - roc_auc)

    if tool == 'matplotlib':
        _plot_matplotlib(subset_sizes, results_list, mmr)
    else:
        _plot_plotly(subset_sizes, results_list, mmr) 
开发者ID:andreiapostoae,项目名称:dota2-predictor,代码行数:21,代码来源:learning_curve.py

示例15: return_train_dataset

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import train_test_split [as 别名]
def return_train_dataset(self):
        """Returns train data set

        Returns:
            X (numpy.ndarray): Features

            y (numpy.ndarray): Labels
        """
        X, y = self.return_main_dataset()

        if self.test_dataset['method'] == 'split_from_main':
            X, X_test, y, y_test = train_test_split(
                X,
                y,
                test_size=self.test_dataset['split_ratio'],
                random_state=self.test_dataset['split_seed'],
                stratify=y
            )

        return X, y 
开发者ID:reiinakano,项目名称:xcessiv,代码行数:22,代码来源:models.py


注:本文中的sklearn.model_selection.train_test_split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。