Python RandomForestClassifier.apply方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.apply方法的典型用法代码示例。如果您正苦于以下问题：Python RandomForestClassifier.apply方法的具体用法？Python RandomForestClassifier.apply怎么用？Python RandomForestClassifier.apply使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier的用法示例。

在下文中一共展示了RandomForestClassifier.apply方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: modelselect

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def modelselect(input_filename, num_test_examples, block_size, n_estimators=100):
    # Perform some model selection to determine good parameters
    # Load data
    X_train, y_train, X_test, y_test, scaler = loaddata(input_filename, num_test_examples, block_size)

    # Feature generation using random forests
    forest = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1)
    forest.fit(X_train, y_train)
    encoder = OneHotEncoder()
    encoder.fit(forest.apply(X_train))
    X_train = encoder.transform(forest.apply(X_train))
    learner = SGDClassifier(
        loss="hinge",
        penalty="l2",
        learning_rate="invscaling",
        alpha=0.001,
        average=10 ** 4,
        eta0=0.5,
        class_weight="balanced",
    )

    metric = "f1"
    losses = ["log", "hinge", "modified_huber", "squared_hinge", "perceptron"]
    penalties = ["l2", "l1", "elasticnet"]
    alphas = 10.0 ** numpy.arange(-5, 0)
    learning_rates = ["constant", "optimal", "invscaling"]
    param_grid = [{"alpha": alphas, "loss": losses, "penalty": penalties, "learning_rate": learning_rates}]
    grid_search = GridSearchCV(learner, param_grid, n_jobs=-1, verbose=2, scoring=metric, refit=True)

    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_, grid_search.best_score_)
    return grid_search

开发者ID:charanpald，项目名称:tyre-hug，代码行数:34，代码来源:outofcore.py

示例2: test_classification_toy

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def test_classification_toy():
    """Check classification on a toy dataset."""
    # Random forest
    clf = RandomForestClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf))

    clf = RandomForestClassifier(n_estimators=10, max_features=1,
                                 random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf))

    # also test apply
    leaf_indices = clf.apply(X)
    assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))

    # Extra-trees
    clf = ExtraTreesClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf))

    clf = ExtraTreesClassifier(n_estimators=10, max_features=1,
                               random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf))

    # also test apply
    leaf_indices = clf.apply(X)
    assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))

开发者ID:DaveYuan，项目名称:recommendersystem，代码行数:35，代码来源:test_forest.py

示例3: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def main():
	# initialize sklearn objects
	rf = RandomForestClassifier(n_estimators = 300, max_depth = 3, verbose = 1, random_state = SEED)
	logitsgd = SGDClassifier(loss ='log', n_jobs = -1, verbose = 1)
	encoder = OneHotEncoder()
	
	train, click = load_train_data(train_loc)
	
	# rf feature transformation
	rf.fit(train, click)
	train_rf = rf.apply(train)
	train = None
	
	# encode rf features for logit
	print('fitting encoder ... ')
	encoder.fit(train_rf)
	print('transforming ...')
	embedded = []
	for row in train_rf:
		embedded = vstack((embedded, encoder.transform(row)))
	
	train_rf = None
	
	# train model
	logitsgd.fit(X = embedded, y = click)
	embedded = None
	
	# load testing data
	test = load_test_data(test_loc)
	
	# rf transform test
	test_rf = rf.apply(test)
	test = None
	
	# encode test
	print('transforming ...')
	embedded = []
	for row in test_rf:
		embedded = vstack((embedded, encoder.transform(row)))
	
	test_rf = None
	
	# make predictions
	prediction = logitsgd.predict_proba(embedded_test)
	
	# save predictions
	prediction = np.array(prediction)
	np.savetxt("predictions.csv", prediction, delimiter = ",")

开发者ID:FlorianMuellerklein，项目名称:Click-Rate，代码行数:50，代码来源:tree.features.py

示例4: test_drf_classifier_backupsklearn

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def test_drf_classifier_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.RandomForestClassifier

    #Run h2o4gpu version of RandomForest Regression
    drf = Solver(backend=backend, random_state=1234, oob_score=True)
    print("h2o4gpu fit()")
    drf.fit(X, y)

    #Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import RandomForestClassifier
    drf_sk = RandomForestClassifier(random_state=1234, oob_score=True, max_depth=3)
    print("Scikit fit()")
    drf_sk.fit(X, y)

    if backend == "sklearn":
        assert (drf.predict(X) == drf_sk.predict(X)).all() == True
        assert (drf.predict_log_proba(X) == drf_sk.predict_log_proba(X)).all() == True
        assert (drf.predict_proba(X) == drf_sk.predict_proba(X)).all() == True
        assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
        assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True
        assert (drf.apply(X) == drf_sk.apply(X)).all() == True

        print("Estimators")
        print(drf.estimators_)
        print(drf_sk.estimators_)

        print("n_features")
        print(drf.n_features_)
        print(drf_sk.n_features_)
        assert drf.n_features_ == drf_sk.n_features_

        print("n_classes_")
        print(drf.n_classes_)
        print(drf_sk.n_classes_)
        assert drf.n_classes_ == drf_sk.n_classes_

        print("n_features")
        print(drf.classes_)
        print(drf_sk.classes_)
        assert (drf.classes_ == drf_sk.classes_).all() == True

        print("n_outputs")
        print(drf.n_outputs_)
        print(drf_sk.n_outputs_)
        assert drf.n_outputs_ == drf_sk.n_outputs_

        print("Feature importance")
        print(drf.feature_importances_)
        print(drf_sk.feature_importances_)
        assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True

        print("oob_score")
        print(drf.oob_score_)
        print(drf_sk.oob_score_)
        assert drf.oob_score_ == drf_sk.oob_score_

开发者ID:wamsiv，项目名称:h2o4gpu，代码行数:61，代码来源:test_xgb_sklearn_wrapper.py

示例5: train

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def train(input_filename, num_train_examples, num_test_examples, block_size):
    # Load initial training data and test data
    X_train, y_train, X_test, y_test, scaler = loaddata(input_filename, num_test_examples, block_size)

    # Feature generation using random forests
    forest = RandomForestClassifier(n_estimators=150, n_jobs=-1)
    forest.fit(X_train, y_train)
    encoder = OneHotEncoder()
    encoder.fit(forest.apply(X_train))
    X_test = encoder.transform(forest.apply(X_test))
    # Make sure that classes are weighted inversely to their frequencies
    weights = float(y_train.shape[0]) / (2 * numpy.bincount(y_train))
    class_weights = {0: weights[0], 1: weights[1]}
    learner = SGDClassifier(
        loss="hinge",
        penalty="l2",
        learning_rate="invscaling",
        alpha=0.0001,
        average=10 ** 4,
        eta0=1.0,
        class_weight=class_weights,
    )

    num_passes = 3
    aucs = []

    for j in range(num_passes):
        for i in range(0, num_train_examples, block_size):
            df = pandas.read_csv(input_filename, header=None, skiprows=i, nrows=block_size)
            X_train = df.values[:, 1:]
            X_train = scaler.transform(X_train)
            X_train = encoder.transform(forest.apply(X_train))
            y_train = numpy.array(df.values[:, 0], numpy.int)
            del df

            learner.partial_fit(X_train, y_train, classes=numpy.array([0, 1]))
            y_pred_prob = learner.decision_function(X_test)
            auc = roc_auc_score(y_test, y_pred_prob)
            aucs.append([i + num_train_examples * j, auc])
            print(aucs[-1])

    df = pandas.DataFrame(aucs, columns=["Iterations", "AUC"])
    df = df.set_index("Iterations")
    return df

开发者ID:charanpald，项目名称:tyre-hug，代码行数:46，代码来源:outofcore.py

示例6: RF_openworld

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def RF_openworld(mon_type, path_to_dict = dic_of_feature_data):
    '''Produces leaf vectors used for classification.'''

    mon_training, mon_test = mon_train_test_references(mon_type, path_to_dict)
    unmon_training, unmon_test = unmon_train_test_references(path_to_dict)

    training = mon_training + unmon_training
    test = mon_test + unmon_test

    tr_data, tr_label1 = zip(*training)
    tr_label = zip(*tr_label1)[0]
    te_data, te_label1 = zip(*test)
    te_label = zip(*te_label1)[0]

    print "Training ..."
    model = RandomForestClassifier(n_jobs=-1, n_estimators=num_Trees, oob_score=True)
    model.fit(tr_data, tr_label)

    train_leaf = zip(model.apply(tr_data), tr_label)
    test_leaf = zip(model.apply(te_data), te_label)
    return train_leaf, test_leaf

开发者ID:jhayes14，项目名称:k-FP，代码行数:23，代码来源:k-FP.py

示例7: Train

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
class Train(object):
    """docstring for TrainModel"""
    def preprocess_model(self):
        '''This allows preprocessing using logistic regression'''
        X_train, X_train_lr, y_train, y_train_lr = train_test_split(self.train,
                                                                    self.predictors,
                                                                    test_size=0.5)
        encode = OneHotEncoder()
        logistic = LogisticRegression()
        self.clf = RandomForestClassifier(n_estimators=512,
                                          oob_score=True, n_jobs=-1)
        self.clf.fit(X_train, y_train)
        encode.fit(self.clf.apply(X_train))
        self.predmodel = logistic.fit(encode.transform(self.clf.apply(X_train_lr)), y_train_lr)

    def train_model(self):
        '''This is standard model training'''
        '''For RandomForestClassifier to work their must be no nan values, one
        way of handling this is to use the --impute option. This uses mean
        imputation, which is the least information imputer, imputation is done
        by feature
        '''
        if np.any(np.isnan(self.train)):
            warnings.warn('RandomForestClassifier requires no missing data,\
                           features being imputed by mean')
            X = self.train
            imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
            imp.fit(X)
            self.train = imp.transform(X)
        self.clf = RandomForestClassifier(n_estimators=512,
                                          oob_score=True, n_jobs=-1)
        self.predmodel = self.clf.fit(X=self.train, y=self.predictors,
                                      sample_weight=self.weights)

    def __init__(self, train):
        self.train = train.train
        self.predictors = train.predictors
        self.features = train.feature_names
        self.weights = train.weights

开发者ID:sandialabs，项目名称:BioCompoundML，代码行数:41，代码来源:train_model.py

示例8: show_roc

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def show_roc(classifier, with_probas):
    cv = StratifiedKFold(labels[:-1], n_folds=5)

    for i, (train, test) in enumerate(cv):

        vectorizer = CountVectorizer(vocabulary=vocab)
        features = vectorizer.fit_transform(data[train])
        #transformer = TfidfTransformer()
        #tfidf_features = transformer.fit(features).transform(features)
        #X = np.array(tfidf_features.todense())

        #X = preprocess(features.toarray())
        X = features.toarray()
        y = labels[train]

        X, X1, y, y1 = train_test_split(X, y, test_size=0.5)
        clf1 = RandomForestClassifier(n_estimators=20)
        enc = OneHotEncoder()
        clf2 = RandomForestClassifier(n_estimators=10)
        clf1.fit(X, y)
        enc.fit(clf1.apply(X))
        clf2.fit(enc.transform(clf1.apply(X1)), y1)

      
        #clf = classifier.fit(X, y)

        X_test = vectorizer.transform(data[test])
        #t_f = preprocess(t_features.toarray())
        y_test = labels[test]
        #res = clf.predict(t_f)
        
        res = clf2.predict(enc.transform(clf1.apply(X_test)))  

        if with_probas:
            res_p = clf2.predict_proba(enc.transform(clf1.apply(X_test)))
            #res_p = clf.predict_proba(t_features)
            fpr, tpr, _ = roc_curve(y_test, res_p[:,1])
            roc_auc = auc(fpr, tpr)
            plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

        check = zip(y_test, res)
        tp, tn, fp, fn = 0, 0, 0, 0
        for value, prediction in check:
            if (prediction and value):
                tp += 1
            if (prediction and not value):
                fp += 1
            if (not prediction and value):
                fn += 1
            if (not prediction and not value):
                tn += 1
        print ('TP: {0}, TN: {1}, FP: {2}, FN: {3}'.format(tp, tn, fp, fn))
        print ("Precision Score : %f" % metrics.precision_score(y_test, res))
        print ("Recall Score : %f" % metrics.recall_score(y_test, res))
        print ("Accuracy : %.4g" % metrics.accuracy_score(y_test, res))
        print ("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, res))

    if with_probas:
        plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
        plt.xlim([-0.05, 1.05])
        plt.ylim([-0.05, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        plt.legend(loc="lower right")
        plt.show()

开发者ID:AnastasiaSulyagina，项目名称:NLP，代码行数:68，代码来源:analyze.py

注：本文中的sklearn.ensemble.RandomForestClassifier.apply方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。