本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.fit_transform方法的具体用法?Python RandomForestClassifier.fit_transform怎么用?Python RandomForestClassifier.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def random_forest(train_vec,train_label, n_estimators = 10, min_samples_split = 2, min_samples_leaf = 1, criterion = "entropy"):
model = RandomForestClassifier(n_estimators = 15, min_samples_split = 2, min_samples_leaf = 2,criterion = "gini")
model.fit_transform(train_vec,train_label)
print 'Random Forest Classification Accu: ' + str(model.score(train_vec,train_label))
return model
示例2: predict_on_test_set
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def predict_on_test_set(label_group):
sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))
preprocessed = Preprocess(sample, which_labels = label_group)
rf = RandomForestClassifier(n_estimators = 80, criterion = "entropy", bootstrap = True, max_features = 'sqrt', max_depth = 40)
rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())
test_preprocessed = Preprocess(test, which_labels = label_group)
predicted_labels = rf.predict(test_preprocessed.features)
error_rate, _ = benchmark(predicted_labels.ravel(), test_preprocessed.labels.values)
plot_feature_importances(preprocessed.features.columns.values, rf.feature_importances_, label_group)
示例3: RandomForestModel
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
class RandomForestModel(FreshnessModel):
def __init__(self, trainfile, testfile, extra_features_file = None):
super(RandomForestModel, self).__init__(trainfile, testfile, extra_features_file)
self.clf = RandomForestClassifier(n_estimators=10, max_depth=None)
def train(self, data = None, target = None):
if data is None:
data = self.data
if target is None:
target = self.target
self.clf.fit_transform(data, target)
def pred(self, X):
return self.clf.predict(X)
示例4: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def main():
#Loading the training set and test set
path1 = "C:\Python32\A2PW1.csv"
path2 = "C:\Python32\A2PW3.csv"
train = read_csv(path1, has_header = True)
target = [x[0] for x in train]
train = [x[1:] for x in train]
test = read_csv(path2, has_header = True)
test = [x[1:] for x in test]
print('The training set is:')
print(train)
print('The test set is:')
print(test)
#create the model
rf = RandomForestClassifier(n_estimators = 100)
#throw the data into model
rf.fit(train, target)
predicted_probs = rf.predict_log_proba(test)
print(predicted_probs)
output_file_path = "C:\Python32\pythontoday.txt"
numpy.savetxt(output_file_path, predicted_probs,delimiter=',',fmt='%1.4e')
newArr = rf.fit_transform(test,target)
print('newArr becomes: ',newArr)
示例5: cross_validate_number_of_trees
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def cross_validate_number_of_trees(label_group):
sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))
preprocessed = Preprocess(sample, which_labels = label_group)
n_trees = (5, 10, 30, 60, 80)
oob_scores = []
for n_tree in n_trees:
rf = RandomForestClassifier(n_estimators = n_tree, criterion = "entropy", oob_score = True, bootstrap = True, max_features = 'sqrt', max_depth = 40)
rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())
score = 1.0 - rf.oob_score_
oob_scores.append(score)
print "Out-of-Bag Error for Number of Trees %s: %s" % (n_tree, score)
plot_oob_error_n_tress(n_trees, oob_scores, label_group)
示例6: cross_validate_depth
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def cross_validate_depth(label_group):
sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))
preprocessed = Preprocess(sample, which_labels = label_group)
depths = (2, 40, 60, 80)
oob_scores = []
for depth in depths:
rf = RandomForestClassifier(n_estimators = 80, criterion = "entropy", oob_score = True, bootstrap = True, max_features = 'sqrt', max_depth = depth)
rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())
score = 1.0 - rf.oob_score_
oob_scores.append(score)
print "Out-of-Bag Error for Depth %s: %s" % (depth, score)
plot_oob_error_depth(depths, oob_scores, label_group)
示例7: ExtraTreesClassifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
del labels
# Parameters for Randomforest
random_state = 5342
n_jobs = 8
verbose = 2
clf1 = ExtraTreesClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf2 = ExtraTreesClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf3 = RandomForestClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf4 = RandomForestClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
# Start training
print('training started')
clf1.fit(train[:, :-1], train[:, -1])
X_new1 = clf1.transform(train[:, :-1])
X_new2 = clf3.fit_transform(train[:, :-1], train[:, -1])
# print('importances', clf1.feature_importances_)
clf2.fit(X_new1, train[:, -1])
clf4.fit(X_new2, train[:, -1])
print('training completed')
print('n_components = ', len(X_new1[0]), len(X_new2[0]))
# We don't need training set now
del train
# Dimensions for train set
ntest = 10873
nfeature = 16 ** 2 + 1 # For two_byte_codes, no_que_marks
test = np.zeros((ntest, nfeature), dtype=int)
Ids = [] # Required test set ids
示例8: scale_data
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
X = scale_data(X)
print("Features Data scaled")
# SGD = SGDClassifier(penalty='elasticnet',class_weight='auto',n_jobs=-1,n_iter=35,l1_ratio =0.2)
svc = LinearSVC(class_weight='auto')
model_rf = RandomForestClassifier(n_jobs=-1, bootstrap=True, n_estimators=180,
min_samples_leaf=3, min_samples_split =3,
criterion='gini',compute_importances=True, max_depth=6)
SVC_RBF= SVC(kernel="rbf", class_weight="auto", cache_size=2600, shrinking=True)
SVC_linear= SVC(kernel="poly", cache_size=2700, shrinking=True)
# model_rf.fit(X,y)
# X_SGD = model_rf.transform(X, threshold='1.5*mean') # forests!
X_SGD = model_rf.fit_transform(X,y)
print('X Reduced (by RF) features amount:')
print(X_SGD.shape)
def ReducedFeaturesDF(X,y):
'''
Returns a dataframe with only a subset of features/columns retained
'''
from sklearn.feature_selection import RFE
est = LinearSVC( penalty='l1', loss='l2', dual=False, class_weight='auto')
# selectK = SelectKBest(score_func = f_classif, k=45)
selectRFE = RFE(estimator=est, n_features_to_select=22, step=0.15)
selectK=selectRFE
selectK.fit(X,y)
selectK_mask=selectK.get_support()
示例9: RandomForest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
#.........这里部分代码省略.........
raise BLE(ValueError('RandomForest can only classify CATEGORICAL '
'columns. Received {}'.format(targets)))
self.targets = [targets[0][0]]
# Obtain the condition columns.
if len(conditions) < 1:
raise BLE(ValueError('RandomForest requires at least one column in '
'conditions. Received {}'.format(conditions)))
self.conditions_categorical = []
self.conditions_numerical = []
for c in conditions:
if c[1].lower() == 'categorical':
self.conditions_categorical.append(c[0])
else:
self.conditions_numerical.append(c[0])
self.conditions = self.conditions_numerical + \
self.conditions_categorical
# The dataset.
self.dataset = pd.DataFrame()
# Lookup for categoricals to code.
self.categories_to_val_map = dict()
# Training set (regressors and labels)
self.X_numerical = np.ndarray(0)
self.X_categorical = np.ndarray(0)
self.Y = np.ndarray(0)
# Random Forests.
self.rf_partial = RandomForestClassifier(n_estimators=100)
self.rf_full = RandomForestClassifier(n_estimators=100)
# Preprocess the data.
self.dataset = utils.extract_sklearn_dataset(self.conditions,
self.targets, df)
self.categories_to_val_map = utils.build_categorical_to_value_map(
self.conditions_categorical, self.dataset)
self.X_categorical = utils.extract_sklearn_features_categorical(
self.conditions_categorical, self.categories_to_val_map,
self.dataset)
self.X_numerical = utils.extract_sklearn_features_numerical(
self.conditions_numerical, self.dataset)
self.Y = utils.extract_sklearn_univariate_target(self.targets,
self.dataset)
# Train the random forest.
self._train_rf()
def _train_rf(self):
"""Trains the random forests classifiers.
We train two classifiers, `partial` which is just trained on
`conditions_numerical`, and `full` which is trained on
`conditions_numerical+conditions_categorical`.
This safe-guard feature is critical for querying; otherwise sklearn
would crash whenever a categorical value unseen in training due to
filtering (but existant in df nevertheless) was passed in.
"""
# pylint: disable=no-member
self.rf_partial.fit_transform(self.X_numerical, self.Y)
self.rf_full.fit_transform(
np.hstack((self.X_numerical, self.X_categorical)), self.Y)
def _compute_targets_distribution(self, conditions):
"""Given conditions dict {feature_col:val}, returns the
distribution and (class mapping for lookup) of the random label
self.targets|conditions.
"""
if not set(self.conditions).issubset(set(conditions.keys())):
raise BLE(ValueError(
'Must specify values for all the conditionals.\n'
'Received: {}\n'
'Expected: {}'.format(conditions, self.conditions_numerical +
self.conditions_categorical)))
# Are there any category values in conditions which never appeared during
# training? If yes, we need to run the partial RF.
unseen = any([conditions[cat] not in self.categories_to_val_map[cat]
for cat in self.conditions_categorical])
X_numerical = [conditions[col] for col in self.conditions_numerical]
if unseen:
distribution = self.rf_partial.predict_proba(X_numerical)
classes = self.rf_partial.classes_
else:
X_categorical = [conditions[col] for col in
self.conditions_categorical]
X_categorical = utils.binarize_categorical_row(
self.conditions_categorical, self.categories_to_val_map,
X_categorical)
distribution = self.rf_full.predict_proba(
np.hstack((X_numerical, X_categorical)))
classes = self.rf_partial.classes_
return distribution[0], classes
def simulate(self, n_samples, conditions):
distribution, classes = self._compute_targets_distribution(conditions)
draws = self.prng.multinomial(1, distribution, size=n_samples)
return [classes[np.where(d==1)[0][0]] for d in draws]
def logpdf(self, value, conditions):
distribution, classes = self._compute_targets_distribution(conditions)
if value not in classes:
return -float('inf')
return np.log(distribution[np.where(classes==value)[0][0]])
示例10: make_test
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def make_test(train_source, test_source, light_type=None, validation=False, v_size=0.5, estimators=85):
train = read_csv(train_source)
tmp = open(train_source)
feature_count = None
for line in tmp:
feature_count = len(line.split(","))
break
trainX = np.asarray(train[range(1, feature_count)])
trainY = np.asarray(train[[0]]).ravel()
# print "All Data size: " + str(len(trainX))
testX = None
testY = None
if validation:
# --- CROSS VALIDATION ---
trainX, testX, trainY, testY = cross_validation.train_test_split(
trainX, trainY, test_size=v_size, random_state=0)
else:
# --- TEST DATA ---
test = read_csv(test_source)
testX = np.asarray(test[range(1, feature_count)])
testY = np.asarray(test[[0]]).ravel()
if len(testX) < 100:
return 0
print "Train size: " + str(len(trainX))
print "Test size: " + str(len(testX))
# --- KNN ---
# clf = KNeighborsClassifier(metric='minkowski', n_neighbors=1, p=2)
# --- SVM ---
# clf = svm.SVC()
# SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
# gamma=0.0, kernel='rbf', max_iter=-1, probability=False, random_state=None,
# shrinking=True, tol=0.001, verbose=False)
# --- Random Forest ---
clf = RandomForestClassifier(n_estimators=estimators)
clf.fit_transform(trainX, trainY)
true_false = 0
true_true = 0
false_true = 0
false_false = 0
true = 0
false = 0
for i in range(len(testY)):
answer = clf.predict(testX[i])
if testY[i] == True:
true += 1
else:
false += 1
# print str(answer[0]) + " " + str(testY[i])
if answer[0] == True and testY[i] == False:
true_false += 1
if answer[0] == True and testY[i] == True:
true_true += 1
if answer[0] == False and testY[i] == False:
false_false += 1
if answer[0] == False and testY[i] == True:
false_true += 1
if validation:
if true > 0:
print light_type + " true_true (precision): " + str(float(true_true)/float(true))
print light_type + " false_true: " + str(float(false_true)/float(true))
if false > 0:
print light_type + " true_false: " + str(float(true_false)/float(false))
print light_type + " false_false (precision): " + str(float(false_false)/float(false))
result = clf.score(testX, testY)
print "Main precision for " + light_type + ": " + str(result)
return result
示例11:
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
#binning model matrix,binned matrix==stat
stat, bin_edges, binnum = stats.binned_statistic(range(X.shape[1]), X, 'median', bins=int(bin_num))
#MODULO3
#APPLY THE MODEL AND PRINT THE RESULT
if model == 'svm.LinearSVC()':
clf=svm.LinearSVC(C=parameter)
if model == 'RandomForestClassifier()':
clf=RandomForestClassifier(n_estimators=parameters,n_jobs=-1)
if model == 'LinearDiscriminantAnlysis()':
clf=LinearDiscriminantAnalysis()
out = clf.fit(stat,y)
output = clf.fit_transform(stat, y)
'''
# Plot SVM contour
# Can't plot it because it is 160 dimensions
h = .02 # step size in the mesh
x_min, x_max = output[:, 0].min() - 1, output[:, 0].max() + 1
y_min, y_max = output[:, 1].min() - 1, output[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
arr_conc = np.c_[xx.ravel(), yy.ravel()] # concatenate two arrays together
#print (arr_conc)
#print (arr_conc.shape)
Z = clf.predict(arr_conc)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
示例12: xrange
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
StartRow = 1
StartTest = 12000
EndTest = 13000
train_file = [fin[x] for x in xrange(StartRow, StartTest) if operating[x] == 'PreK-12 Operating']
train_targets = [targets[x] for x in xrange(StartRow, StartTest) if operating[x] == 'PreK-12 Operating']
test_file = [fin[x] for x in xrange(StartTest, EndTest)]
#from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(min_df=1,charset_error='ignore')
X_train = vectorizer.fit_transform(train_file)
X_train = X_train.todense()
#from sklearn.naive_bayes import MultinomialNB
#from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
#clf = MultinomialNB()
#clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True)
clf = RandomForestClassifier(n_estimators = 500, compute_importances=True)
#MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
clf.fit_transform(X_train, train_targets)
testdata = vectorizer.transform(test_file)
testdata = testdata.todense()
output_predictions('predictions.csv',testdata, clf)
示例13: print
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
from sklearn.metrics import classification_report
print(classification_report(test_y, bnb_preds))
print(classification_report(test_y, gnb_preds))
print(classification_report(test_y, mnb_preds))
#%% Random Forest
from sklearn.ensemble import RandomForestClassifier
# Initialize a random forest
rc = RandomForestClassifier(n_estimators = 100,
max_features = 'auto',
max_depth = None,
min_samples_split = 2,
min_samples_leaf = 1,
verbose = 1,
warm_start = False,
class_weight = None)
# Train the forest
rc.fit_transform(train_X, train_y)
# Score the forest
rc_preds = rc.predict(test_X)
# Asses
print(classification_report(test_y, rc_preds))
示例14:
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
from sklearn.ensemble import RandomForestClassifier
import pickle
import sys
import numpy as np
X1=np.array(pickle.load(open('X2g_train.p', 'rb')))
X2=np.array(pickle.load(open('X3g_train.p', 'rb')))
X3=np.array(pickle.load(open('X4g_train.p', 'rb')))
X4=np.array(pickle.load(open('Xhead_train.p', 'rb')))
X=np.hstack((X2,X1,X3,X4))
y=np.array(pickle.load(open('y.p', 'rb')))
rf=RandomForestClassifier(n_estimators=200)
Xr=rf.fit_transform(X,y)
pickle.dump(Xr,open('X33_train_reproduce.p','wb'))
print(Xr.shape)
del X,X1,X2,X3,X4,Xr
X1=np.array(pickle.load(open('X2g_test.p', 'rb')))
X2=np.array(pickle.load(open('X3g_test.p', 'rb')))
X3=np.array(pickle.load(open('X4g_test.p', 'rb')))
X4=np.array(pickle.load(open('Xhead_test.p', 'rb')))
X=np.hstack((X2,X1,X3,X4))
Xr=rf.transform(X)
pickle.dump(Xr,open('X33_test_reproduce.p','wb'))
print(Xr.shape)
示例15: RFClassify
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def RFClassify(trainData,trainLabel,testData):
rfClf=RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None)
rfClf.fit_transform(trainData, trainLabel)
testlabel=rfClf.predict(testData)
return testlabel