本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.apply方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.apply方法的具体用法?Python RandomForestClassifier.apply怎么用?Python RandomForestClassifier.apply使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.apply方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: modelselect
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def modelselect(input_filename, num_test_examples, block_size, n_estimators=100):
# Perform some model selection to determine good parameters
# Load data
X_train, y_train, X_test, y_test, scaler = loaddata(input_filename, num_test_examples, block_size)
# Feature generation using random forests
forest = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1)
forest.fit(X_train, y_train)
encoder = OneHotEncoder()
encoder.fit(forest.apply(X_train))
X_train = encoder.transform(forest.apply(X_train))
learner = SGDClassifier(
loss="hinge",
penalty="l2",
learning_rate="invscaling",
alpha=0.001,
average=10 ** 4,
eta0=0.5,
class_weight="balanced",
)
metric = "f1"
losses = ["log", "hinge", "modified_huber", "squared_hinge", "perceptron"]
penalties = ["l2", "l1", "elasticnet"]
alphas = 10.0 ** numpy.arange(-5, 0)
learning_rates = ["constant", "optimal", "invscaling"]
param_grid = [{"alpha": alphas, "loss": losses, "penalty": penalties, "learning_rate": learning_rates}]
grid_search = GridSearchCV(learner, param_grid, n_jobs=-1, verbose=2, scoring=metric, refit=True)
grid_search.fit(X_train, y_train)
print(grid_search.best_params_, grid_search.best_score_)
return grid_search
示例2: test_classification_toy
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def test_classification_toy():
"""Check classification on a toy dataset."""
# Random forest
clf = RandomForestClassifier(n_estimators=10, random_state=1)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf))
clf = RandomForestClassifier(n_estimators=10, max_features=1,
random_state=1)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf))
# also test apply
leaf_indices = clf.apply(X)
assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))
# Extra-trees
clf = ExtraTreesClassifier(n_estimators=10, random_state=1)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf))
clf = ExtraTreesClassifier(n_estimators=10, max_features=1,
random_state=1)
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(10, len(clf))
# also test apply
leaf_indices = clf.apply(X)
assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))
示例3: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def main():
# initialize sklearn objects
rf = RandomForestClassifier(n_estimators = 300, max_depth = 3, verbose = 1, random_state = SEED)
logitsgd = SGDClassifier(loss ='log', n_jobs = -1, verbose = 1)
encoder = OneHotEncoder()
train, click = load_train_data(train_loc)
# rf feature transformation
rf.fit(train, click)
train_rf = rf.apply(train)
train = None
# encode rf features for logit
print('fitting encoder ... ')
encoder.fit(train_rf)
print('transforming ...')
embedded = []
for row in train_rf:
embedded = vstack((embedded, encoder.transform(row)))
train_rf = None
# train model
logitsgd.fit(X = embedded, y = click)
embedded = None
# load testing data
test = load_test_data(test_loc)
# rf transform test
test_rf = rf.apply(test)
test = None
# encode test
print('transforming ...')
embedded = []
for row in test_rf:
embedded = vstack((embedded, encoder.transform(row)))
test_rf = None
# make predictions
prediction = logitsgd.predict_proba(embedded_test)
# save predictions
prediction = np.array(prediction)
np.savetxt("predictions.csv", prediction, delimiter = ",")
示例4: test_drf_classifier_backupsklearn
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def test_drf_classifier_backupsklearn(backend='auto'):
df = pd.read_csv("./open_data/creditcard.csv")
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
import h2o4gpu
Solver = h2o4gpu.RandomForestClassifier
#Run h2o4gpu version of RandomForest Regression
drf = Solver(backend=backend, random_state=1234, oob_score=True)
print("h2o4gpu fit()")
drf.fit(X, y)
#Run Sklearn version of RandomForest Regression
from sklearn.ensemble import RandomForestClassifier
drf_sk = RandomForestClassifier(random_state=1234, oob_score=True, max_depth=3)
print("Scikit fit()")
drf_sk.fit(X, y)
if backend == "sklearn":
assert (drf.predict(X) == drf_sk.predict(X)).all() == True
assert (drf.predict_log_proba(X) == drf_sk.predict_log_proba(X)).all() == True
assert (drf.predict_proba(X) == drf_sk.predict_proba(X)).all() == True
assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True
assert (drf.apply(X) == drf_sk.apply(X)).all() == True
print("Estimators")
print(drf.estimators_)
print(drf_sk.estimators_)
print("n_features")
print(drf.n_features_)
print(drf_sk.n_features_)
assert drf.n_features_ == drf_sk.n_features_
print("n_classes_")
print(drf.n_classes_)
print(drf_sk.n_classes_)
assert drf.n_classes_ == drf_sk.n_classes_
print("n_features")
print(drf.classes_)
print(drf_sk.classes_)
assert (drf.classes_ == drf_sk.classes_).all() == True
print("n_outputs")
print(drf.n_outputs_)
print(drf_sk.n_outputs_)
assert drf.n_outputs_ == drf_sk.n_outputs_
print("Feature importance")
print(drf.feature_importances_)
print(drf_sk.feature_importances_)
assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True
print("oob_score")
print(drf.oob_score_)
print(drf_sk.oob_score_)
assert drf.oob_score_ == drf_sk.oob_score_
示例5: train
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def train(input_filename, num_train_examples, num_test_examples, block_size):
# Load initial training data and test data
X_train, y_train, X_test, y_test, scaler = loaddata(input_filename, num_test_examples, block_size)
# Feature generation using random forests
forest = RandomForestClassifier(n_estimators=150, n_jobs=-1)
forest.fit(X_train, y_train)
encoder = OneHotEncoder()
encoder.fit(forest.apply(X_train))
X_test = encoder.transform(forest.apply(X_test))
# Make sure that classes are weighted inversely to their frequencies
weights = float(y_train.shape[0]) / (2 * numpy.bincount(y_train))
class_weights = {0: weights[0], 1: weights[1]}
learner = SGDClassifier(
loss="hinge",
penalty="l2",
learning_rate="invscaling",
alpha=0.0001,
average=10 ** 4,
eta0=1.0,
class_weight=class_weights,
)
num_passes = 3
aucs = []
for j in range(num_passes):
for i in range(0, num_train_examples, block_size):
df = pandas.read_csv(input_filename, header=None, skiprows=i, nrows=block_size)
X_train = df.values[:, 1:]
X_train = scaler.transform(X_train)
X_train = encoder.transform(forest.apply(X_train))
y_train = numpy.array(df.values[:, 0], numpy.int)
del df
learner.partial_fit(X_train, y_train, classes=numpy.array([0, 1]))
y_pred_prob = learner.decision_function(X_test)
auc = roc_auc_score(y_test, y_pred_prob)
aucs.append([i + num_train_examples * j, auc])
print(aucs[-1])
df = pandas.DataFrame(aucs, columns=["Iterations", "AUC"])
df = df.set_index("Iterations")
return df
示例6: RF_openworld
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def RF_openworld(mon_type, path_to_dict = dic_of_feature_data):
'''Produces leaf vectors used for classification.'''
mon_training, mon_test = mon_train_test_references(mon_type, path_to_dict)
unmon_training, unmon_test = unmon_train_test_references(path_to_dict)
training = mon_training + unmon_training
test = mon_test + unmon_test
tr_data, tr_label1 = zip(*training)
tr_label = zip(*tr_label1)[0]
te_data, te_label1 = zip(*test)
te_label = zip(*te_label1)[0]
print "Training ..."
model = RandomForestClassifier(n_jobs=-1, n_estimators=num_Trees, oob_score=True)
model.fit(tr_data, tr_label)
train_leaf = zip(model.apply(tr_data), tr_label)
test_leaf = zip(model.apply(te_data), te_label)
return train_leaf, test_leaf
示例7: Train
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
class Train(object):
"""docstring for TrainModel"""
def preprocess_model(self):
'''This allows preprocessing using logistic regression'''
X_train, X_train_lr, y_train, y_train_lr = train_test_split(self.train,
self.predictors,
test_size=0.5)
encode = OneHotEncoder()
logistic = LogisticRegression()
self.clf = RandomForestClassifier(n_estimators=512,
oob_score=True, n_jobs=-1)
self.clf.fit(X_train, y_train)
encode.fit(self.clf.apply(X_train))
self.predmodel = logistic.fit(encode.transform(self.clf.apply(X_train_lr)), y_train_lr)
def train_model(self):
'''This is standard model training'''
'''For RandomForestClassifier to work their must be no nan values, one
way of handling this is to use the --impute option. This uses mean
imputation, which is the least information imputer, imputation is done
by feature
'''
if np.any(np.isnan(self.train)):
warnings.warn('RandomForestClassifier requires no missing data,\
features being imputed by mean')
X = self.train
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp.fit(X)
self.train = imp.transform(X)
self.clf = RandomForestClassifier(n_estimators=512,
oob_score=True, n_jobs=-1)
self.predmodel = self.clf.fit(X=self.train, y=self.predictors,
sample_weight=self.weights)
def __init__(self, train):
self.train = train.train
self.predictors = train.predictors
self.features = train.feature_names
self.weights = train.weights
示例8: show_roc
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import apply [as 别名]
def show_roc(classifier, with_probas):
cv = StratifiedKFold(labels[:-1], n_folds=5)
for i, (train, test) in enumerate(cv):
vectorizer = CountVectorizer(vocabulary=vocab)
features = vectorizer.fit_transform(data[train])
#transformer = TfidfTransformer()
#tfidf_features = transformer.fit(features).transform(features)
#X = np.array(tfidf_features.todense())
#X = preprocess(features.toarray())
X = features.toarray()
y = labels[train]
X, X1, y, y1 = train_test_split(X, y, test_size=0.5)
clf1 = RandomForestClassifier(n_estimators=20)
enc = OneHotEncoder()
clf2 = RandomForestClassifier(n_estimators=10)
clf1.fit(X, y)
enc.fit(clf1.apply(X))
clf2.fit(enc.transform(clf1.apply(X1)), y1)
#clf = classifier.fit(X, y)
X_test = vectorizer.transform(data[test])
#t_f = preprocess(t_features.toarray())
y_test = labels[test]
#res = clf.predict(t_f)
res = clf2.predict(enc.transform(clf1.apply(X_test)))
if with_probas:
res_p = clf2.predict_proba(enc.transform(clf1.apply(X_test)))
#res_p = clf.predict_proba(t_features)
fpr, tpr, _ = roc_curve(y_test, res_p[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
check = zip(y_test, res)
tp, tn, fp, fn = 0, 0, 0, 0
for value, prediction in check:
if (prediction and value):
tp += 1
if (prediction and not value):
fp += 1
if (not prediction and value):
fn += 1
if (not prediction and not value):
tn += 1
print ('TP: {0}, TN: {1}, FP: {2}, FN: {3}'.format(tp, tn, fp, fn))
print ("Precision Score : %f" % metrics.precision_score(y_test, res))
print ("Recall Score : %f" % metrics.recall_score(y_test, res))
print ("Accuracy : %.4g" % metrics.accuracy_score(y_test, res))
print ("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, res))
if with_probas:
plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()