Python RandomForestClassifier.fit_transform方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.fit_transform方法的典型用法代码示例。如果您正苦于以下问题：Python RandomForestClassifier.fit_transform方法的具体用法？Python RandomForestClassifier.fit_transform怎么用？Python RandomForestClassifier.fit_transform使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier的用法示例。

在下文中一共展示了RandomForestClassifier.fit_transform方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: random_forest

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def random_forest(train_vec,train_label, n_estimators = 10, min_samples_split = 2, min_samples_leaf = 1, criterion = "entropy"):
	model = RandomForestClassifier(n_estimators = 15, min_samples_split = 2, min_samples_leaf = 2,criterion = "gini")
	model.fit_transform(train_vec,train_label)
	
	print 'Random Forest Classification Accu: ' + str(model.score(train_vec,train_label))
	
	return model

开发者ID:zhenv5，项目名称:WayToPython，代码行数:9，代码来源:classification_models.py

示例2: predict_on_test_set

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def predict_on_test_set(label_group):
  sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
  test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))

  preprocessed = Preprocess(sample, which_labels = label_group)

  rf = RandomForestClassifier(n_estimators = 80, criterion = "entropy", bootstrap = True, max_features = 'sqrt', max_depth = 40)
  rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())
  test_preprocessed = Preprocess(test, which_labels = label_group)
  predicted_labels = rf.predict(test_preprocessed.features)
  error_rate, _ = benchmark(predicted_labels.ravel(), test_preprocessed.labels.values)

  plot_feature_importances(preprocessed.features.columns.values, rf.feature_importances_, label_group)

开发者ID:LiamFengLin，项目名称:151a-final-project，代码行数:15，代码来源:random_forest_why.py

示例3: RandomForestModel

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
class RandomForestModel(FreshnessModel):
  def __init__(self, trainfile, testfile, extra_features_file = None):
    super(RandomForestModel, self).__init__(trainfile, testfile, extra_features_file)
    self.clf = RandomForestClassifier(n_estimators=10, max_depth=None)

  def train(self, data = None, target = None):
    if data is None:
      data = self.data
    if target is None:
      target = self.target
    self.clf.fit_transform(data, target)

  def pred(self, X):
    return self.clf.predict(X)

开发者ID:arvs，项目名称:carlton，代码行数:16，代码来源:baseline.py

示例4: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def main():
    #Loading the training set and test set
    path1 = "C:\Python32\A2PW1.csv"
    path2 = "C:\Python32\A2PW3.csv"
    train = read_csv(path1, has_header = True)
    target = [x[0] for x in train]
    train = [x[1:] for x in train]
    test = read_csv(path2, has_header = True)
    test = [x[1:] for x in test]
    print('The training set is:')
    print(train)
    print('The test set is:')
    print(test)

    #create the model
    rf = RandomForestClassifier(n_estimators = 100)
    #throw the data into model
    rf.fit(train, target)
    predicted_probs = rf.predict_log_proba(test)
    print(predicted_probs)
    output_file_path = "C:\Python32\pythontoday.txt"
    numpy.savetxt(output_file_path, predicted_probs,delimiter=',',fmt='%1.4e')

    newArr = rf.fit_transform(test,target)
    print('newArr becomes: ',newArr)

开发者ID:vivafung，项目名称:Algorithms，代码行数:27，代码来源:sklearn.RandomForest.py

示例5: cross_validate_number_of_trees

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def cross_validate_number_of_trees(label_group):

  sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
  test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))

  preprocessed = Preprocess(sample, which_labels = label_group)

  n_trees = (5, 10, 30, 60, 80)
  oob_scores = []
  for n_tree in n_trees:
    rf = RandomForestClassifier(n_estimators = n_tree, criterion = "entropy", oob_score = True, bootstrap = True, max_features = 'sqrt', max_depth = 40)
    rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())

    score = 1.0 - rf.oob_score_
    
    oob_scores.append(score)
    print "Out-of-Bag Error for Number of Trees %s: %s" % (n_tree, score)

  plot_oob_error_n_tress(n_trees, oob_scores, label_group)

开发者ID:LiamFengLin，项目名称:151a-final-project，代码行数:21，代码来源:random_forest_why.py

示例6: cross_validate_depth

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def cross_validate_depth(label_group):

  sample = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_train.csv"))
  test = pd.read_csv(join(SAMPLES_FILE_PATH, "sample_test.csv"))

  preprocessed = Preprocess(sample, which_labels = label_group)

  depths = (2, 40, 60, 80)
  oob_scores = []
  for depth in depths:
    rf = RandomForestClassifier(n_estimators = 80, criterion = "entropy", oob_score = True, bootstrap = True, max_features = 'sqrt', max_depth = depth)
    rf.fit_transform(X = preprocessed.features, y = preprocessed.labels.values.ravel())

    score = 1.0 - rf.oob_score_
    
    oob_scores.append(score)
    print "Out-of-Bag Error for Depth %s: %s" % (depth, score)

  plot_oob_error_depth(depths, oob_scores, label_group)

开发者ID:LiamFengLin，项目名称:151a-final-project，代码行数:21，代码来源:random_forest_why.py

示例7: ExtraTreesClassifier

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
del labels

# Parameters for Randomforest
random_state = 5342
n_jobs = 8
verbose = 2
clf1 = ExtraTreesClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf2 = ExtraTreesClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf3 = RandomForestClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)
clf4 = RandomForestClassifier(criterion='entropy', random_state=random_state, n_jobs=n_jobs, verbose=verbose)

# Start training
print('training started')
clf1.fit(train[:, :-1], train[:, -1])
X_new1 = clf1.transform(train[:, :-1])
X_new2 = clf3.fit_transform(train[:, :-1], train[:, -1])
# print('importances', clf1.feature_importances_)
clf2.fit(X_new1, train[:, -1])
clf4.fit(X_new2, train[:, -1])
print('training completed')

print('n_components = ', len(X_new1[0]), len(X_new2[0]))

# We don't need training set now
del train

# Dimensions for train set
ntest = 10873
nfeature = 16 ** 2 + 1  # For two_byte_codes, no_que_marks
test = np.zeros((ntest, nfeature), dtype=int)
Ids = []  # Required test set ids

开发者ID:canast02，项目名称:microsoft-malware-classification-challenge，代码行数:33，代码来源:solution4.py

示例8: scale_data

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
    X = scale_data(X)
    print("Features Data scaled")

#    SGD = SGDClassifier(penalty='elasticnet',class_weight='auto',n_jobs=-1,n_iter=35,l1_ratio =0.2)
    svc = LinearSVC(class_weight='auto')
    model_rf = RandomForestClassifier(n_jobs=-1, bootstrap=True, n_estimators=180,
                                        min_samples_leaf=3, min_samples_split =3,
                                        criterion='gini',compute_importances=True, max_depth=6)

    SVC_RBF= SVC(kernel="rbf", class_weight="auto", cache_size=2600, shrinking=True)
    SVC_linear= SVC(kernel="poly", cache_size=2700, shrinking=True)


    # model_rf.fit(X,y)
    # X_SGD = model_rf.transform(X, threshold='1.5*mean') # forests!
    X_SGD = model_rf.fit_transform(X,y)
    print('X Reduced (by RF) features amount:')
    print(X_SGD.shape)

    def ReducedFeaturesDF(X,y):
        '''
        Returns a dataframe with only a subset of features/columns retained
        '''
        from sklearn.feature_selection import RFE
        est = LinearSVC( penalty='l1', loss='l2', dual=False, class_weight='auto')
#        selectK = SelectKBest(score_func = f_classif, k=45)
        selectRFE = RFE(estimator=est, n_features_to_select=22, step=0.15)
        selectK=selectRFE

        selectK.fit(X,y)
        selectK_mask=selectK.get_support()

开发者ID:MichaelDoron，项目名称:ProFET，代码行数:33，代码来源:Model_Parameters_CV.py

示例9: RandomForest

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]

#.........这里部分代码省略.........
            raise BLE(ValueError('RandomForest can only classify CATEGORICAL '
                'columns. Received {}'.format(targets)))
        self.targets = [targets[0][0]]
        # Obtain the condition columns.
        if len(conditions) < 1:
            raise BLE(ValueError('RandomForest requires at least one column in '
                'conditions. Received {}'.format(conditions)))
        self.conditions_categorical = []
        self.conditions_numerical = []
        for c in conditions:
            if c[1].lower() == 'categorical':
                self.conditions_categorical.append(c[0])
            else:
                self.conditions_numerical.append(c[0])
        self.conditions = self.conditions_numerical + \
            self.conditions_categorical
        # The dataset.
        self.dataset = pd.DataFrame()
        # Lookup for categoricals to code.
        self.categories_to_val_map = dict()
        # Training set (regressors and labels)
        self.X_numerical = np.ndarray(0)
        self.X_categorical = np.ndarray(0)
        self.Y = np.ndarray(0)
        # Random Forests.
        self.rf_partial = RandomForestClassifier(n_estimators=100)
        self.rf_full = RandomForestClassifier(n_estimators=100)
        # Preprocess the data.
        self.dataset = utils.extract_sklearn_dataset(self.conditions,
            self.targets, df)
        self.categories_to_val_map = utils.build_categorical_to_value_map(
            self.conditions_categorical, self.dataset)
        self.X_categorical = utils.extract_sklearn_features_categorical(
            self.conditions_categorical, self.categories_to_val_map,
            self.dataset)
        self.X_numerical = utils.extract_sklearn_features_numerical(
            self.conditions_numerical, self.dataset)
        self.Y = utils.extract_sklearn_univariate_target(self.targets,
            self.dataset)
        # Train the random forest.
        self._train_rf()

    def _train_rf(self):
        """Trains the random forests classifiers.

        We train two classifiers, `partial` which is just trained on
        `conditions_numerical`, and `full` which is trained on
        `conditions_numerical+conditions_categorical`.

        This safe-guard feature is critical for querying; otherwise sklearn
        would crash whenever a categorical value unseen in training due to
        filtering (but existant in df nevertheless) was passed in.
        """
        # pylint: disable=no-member
        self.rf_partial.fit_transform(self.X_numerical, self.Y)
        self.rf_full.fit_transform(
            np.hstack((self.X_numerical, self.X_categorical)), self.Y)

    def _compute_targets_distribution(self, conditions):
        """Given conditions dict {feature_col:val}, returns the
        distribution and (class mapping for lookup) of the random label
        self.targets|conditions.
        """
        if not set(self.conditions).issubset(set(conditions.keys())):
            raise BLE(ValueError(
                'Must specify values for all the conditionals.\n'
                'Received: {}\n'
                'Expected: {}'.format(conditions, self.conditions_numerical +
                self.conditions_categorical)))

        # Are there any category values in conditions which never appeared during
        # training? If yes, we need to run the partial RF.
        unseen = any([conditions[cat] not in self.categories_to_val_map[cat]
            for cat in self.conditions_categorical])

        X_numerical = [conditions[col] for col in self.conditions_numerical]
        if unseen:
            distribution = self.rf_partial.predict_proba(X_numerical)
            classes = self.rf_partial.classes_
        else:
            X_categorical = [conditions[col] for col in
                self.conditions_categorical]
            X_categorical = utils.binarize_categorical_row(
                self.conditions_categorical, self.categories_to_val_map,
                X_categorical)
            distribution = self.rf_full.predict_proba(
                np.hstack((X_numerical, X_categorical)))
            classes = self.rf_partial.classes_
        return distribution[0], classes

    def simulate(self, n_samples, conditions):
        distribution, classes = self._compute_targets_distribution(conditions)
        draws = self.prng.multinomial(1, distribution, size=n_samples)
        return [classes[np.where(d==1)[0][0]] for d in draws]

    def logpdf(self, value, conditions):
        distribution, classes = self._compute_targets_distribution(conditions)
        if value not in classes:
            return -float('inf')
        return np.log(distribution[np.where(classes==value)[0][0]])

开发者ID:jayelm，项目名称:bdbcontrib，代码行数:104，代码来源:random_forest.py

示例10: make_test

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def make_test(train_source, test_source, light_type=None, validation=False, v_size=0.5, estimators=85):
	train = read_csv(train_source)
	tmp = open(train_source)
	feature_count = None
	for line in tmp:
		feature_count = len(line.split(","))
		break

	trainX = np.asarray(train[range(1, feature_count)])
	trainY = np.asarray(train[[0]]).ravel()
	# print "All Data size: " + str(len(trainX))
	testX = None
	testY = None

	if validation:
		# --- CROSS VALIDATION ---
		trainX, testX, trainY, testY = cross_validation.train_test_split(
			trainX, trainY, test_size=v_size, random_state=0)
	else:
		# --- TEST DATA ---
		test = read_csv(test_source)
		testX = np.asarray(test[range(1, feature_count)])
		testY = np.asarray(test[[0]]).ravel()
	if len(testX) < 100:
		return 0
	print "Train size: " + str(len(trainX))
	print "Test size: " + str(len(testX))

	# --- KNN ---
	# clf = KNeighborsClassifier(metric='minkowski', n_neighbors=1, p=2)

	# --- SVM ---
	# clf = svm.SVC()
	# SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
	# gamma=0.0, kernel='rbf', max_iter=-1, probability=False, random_state=None,
	# shrinking=True, tol=0.001, verbose=False)

	# --- Random Forest ---
	clf = RandomForestClassifier(n_estimators=estimators)


	clf.fit_transform(trainX, trainY)

	true_false = 0
	true_true = 0
	false_true = 0
	false_false = 0
	true = 0
	false = 0
	for i in range(len(testY)):
		answer = clf.predict(testX[i])
		if testY[i] == True:
			true += 1
		else:
			false += 1
		# print str(answer[0]) + " " + str(testY[i])
		if answer[0] == True and testY[i] == False:
			true_false += 1
		if answer[0] == True and testY[i] == True:
			true_true += 1
		if answer[0] == False and testY[i] == False:
			false_false += 1
		if answer[0] == False and testY[i] == True:
			false_true += 1
	if validation:
		if true > 0:
			print light_type + " true_true (precision): " + str(float(true_true)/float(true))
			print light_type + " false_true: " + str(float(false_true)/float(true))
		if false > 0:
			print light_type + " true_false: " + str(float(true_false)/float(false))
			print light_type + " false_false (precision): " + str(float(false_false)/float(false))

	result = clf.score(testX, testY)
	print "Main precision for " + light_type + ": " + str(result)
	return result

开发者ID:junk2112，项目名称:detector，代码行数:77，代码来源:classificator.py

示例11:

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
#binning model matrix,binned matrix==stat 
stat, bin_edges, binnum = stats.binned_statistic(range(X.shape[1]), X, 'median', bins=int(bin_num))

#MODULO3
#APPLY THE MODEL AND PRINT THE RESULT

if model == 'svm.LinearSVC()':
    clf=svm.LinearSVC(C=parameter)
if model == 'RandomForestClassifier()': 
    clf=RandomForestClassifier(n_estimators=parameters,n_jobs=-1)
if model == 'LinearDiscriminantAnlysis()': 
    clf=LinearDiscriminantAnalysis()

out = clf.fit(stat,y)
output = clf.fit_transform(stat, y)

'''
# Plot SVM contour
# Can't plot it because it is 160 dimensions
h = .02  # step size in the mesh
x_min, x_max = output[:, 0].min() - 1, output[:, 0].max() + 1
y_min, y_max = output[:, 1].min() - 1, output[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
arr_conc = np.c_[xx.ravel(), yy.ravel()] # concatenate two arrays together
#print (arr_conc)
#print (arr_conc.shape)
Z = clf.predict(arr_conc)
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

开发者ID:vikul-gupta，项目名称:wv-ml-spectra，代码行数:32，代码来源:final_result.py

示例12: xrange

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
StartRow = 1
StartTest = 12000
EndTest = 13000

train_file = [fin[x] for x in xrange(StartRow, StartTest) if operating[x] == 'PreK-12 Operating']
train_targets = [targets[x] for x in xrange(StartRow, StartTest) if operating[x] == 'PreK-12 Operating']
test_file = [fin[x] for x in xrange(StartTest, EndTest)]


#from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(min_df=1,charset_error='ignore')

X_train = vectorizer.fit_transform(train_file)
X_train = X_train.todense()

#from sklearn.naive_bayes import MultinomialNB
#from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
#clf = MultinomialNB()
#clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True)
clf = RandomForestClassifier(n_estimators = 500, compute_importances=True)
#MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
clf.fit_transform(X_train, train_targets)

testdata = vectorizer.transform(test_file)
testdata = testdata.todense()


output_predictions('predictions.csv',testdata, clf)

开发者ID:datu925，项目名称:codomator，代码行数:32，代码来源:randomforest1.py

示例13: print

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
from sklearn.metrics import classification_report

print(classification_report(test_y, bnb_preds))
print(classification_report(test_y, gnb_preds))
print(classification_report(test_y, mnb_preds))

#%% Random Forest
from sklearn.ensemble import RandomForestClassifier

# Initialize a random forest
rc = RandomForestClassifier(n_estimators = 100,
                            max_features = 'auto',
                            max_depth = None,
                            min_samples_split = 2,
                            min_samples_leaf = 1,
                            verbose = 1,
                            warm_start = False,
                            class_weight = None)

# Train the forest
rc.fit_transform(train_X, train_y)

# Score the forest
rc_preds = rc.predict(test_X)

# Asses
print(classification_report(test_y, rc_preds))

开发者ID:brandoncwn，项目名称:Gartner，代码行数:29，代码来源:benchmark1.py

示例14:

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
from sklearn.ensemble import RandomForestClassifier
import pickle
import sys
import numpy as np

X1=np.array(pickle.load(open('X2g_train.p', 'rb')))
X2=np.array(pickle.load(open('X3g_train.p', 'rb')))
X3=np.array(pickle.load(open('X4g_train.p', 'rb')))
X4=np.array(pickle.load(open('Xhead_train.p', 'rb')))

X=np.hstack((X2,X1,X3,X4))
y=np.array(pickle.load(open('y.p', 'rb')))
rf=RandomForestClassifier(n_estimators=200)
Xr=rf.fit_transform(X,y)
pickle.dump(Xr,open('X33_train_reproduce.p','wb'))
print(Xr.shape)
del X,X1,X2,X3,X4,Xr

X1=np.array(pickle.load(open('X2g_test.p', 'rb')))
X2=np.array(pickle.load(open('X3g_test.p', 'rb')))
X3=np.array(pickle.load(open('X4g_test.p', 'rb')))
X4=np.array(pickle.load(open('Xhead_test.p', 'rb')))
X=np.hstack((X2,X1,X3,X4))
Xr=rf.transform(X)
pickle.dump(Xr,open('X33_test_reproduce.p','wb'))
print(Xr.shape)

开发者ID:ybdesire，项目名称:malware_analysis，代码行数:28，代码来源:getfea.py

示例15: RFClassify

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit_transform [as 别名]
def RFClassify(trainData,trainLabel,testData):
    rfClf=RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None)
    rfClf.fit_transform(trainData, trainLabel)
    testlabel=rfClf.predict(testData)
    return testlabel

开发者ID:doriszyj，项目名称:SVM-RF，代码行数:7，代码来源:SVM&RF.py

注：本文中的sklearn.ensemble.RandomForestClassifier.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。