当前位置: 首页>>代码示例>>Python>>正文


Python ExtraTreesClassifier.transform方法代码示例

本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.transform方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.transform方法的具体用法?Python ExtraTreesClassifier.transform怎么用?Python ExtraTreesClassifier.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.ExtraTreesClassifier的用法示例。


在下文中一共展示了ExtraTreesClassifier.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: feature_engineering_common

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
def feature_engineering_common(Y, X, X1):
    print "### Shape of training set (X)", X.shape
    print "### Shape of labels (Y)", Y.shape
    print "### Shape of Kaggle Test set (X1)", X1.shape

    # Scale features
    scaler = preprocessing.StandardScaler()
    X_SCALED = scaler.fit_transform(X)
    X1_SCALED = scaler.transform(X1)
    print "### (After scaling) Shape of training set", X_SCALED.shape
    print "### (After scaling ) Shape of Kaggle Test set", X1_SCALED.shape

    # Find Important Features using Random Forest
    xtClf = ExtraTreesClassifier().fit(X_SCALED, Y)
    X_SCALED_SUBSET = xtClf.transform(X_SCALED)
    X1_SCALED_SUBSET = xtClf.transform(X1_SCALED)
    importances = xtClf.feature_importances_
    print xtClf.feature_importances_
    print "### (After scaling & feature selection using Random Forrest) Shape of training set", X_SCALED_SUBSET.shape
    print "### (After scaling & feature selection using Random Forrest) Shape of Kaggle Test set", X1_SCALED_SUBSET.shape

    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in xrange(10):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
开发者ID:sayantansatpati,项目名称:ml,代码行数:29,代码来源:bike-sharing.py

示例2: model

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
def model(TRAIN_FX, KAGGLE_TEST_FX):
    # Feature Selection & Scaling
    TRAIN_FX_FS, KAGGLE_TEST_FX_FS  = feature_selection(TRAIN_FX, KAGGLE_TEST_FX)
    Y = TRAIN_FX_FS['count'].values
    X = TRAIN_FX_FS.drop('count', axis=1).values
    X1 = KAGGLE_TEST_FX_FS.values
    print "### Shape of training set (X)", X.shape
    print "### Shape of labels (Y)", Y.shape
    print "### Shape of Kaggle Test set (X1)", X1.shape

    # Scale features
    scaler = preprocessing.StandardScaler()
    X_SCALED = scaler.fit_transform(X)
    X1_SCALED = scaler.transform(X1)
    print "### (After scaling) Shape of training set", X_SCALED.shape
    print "### (After scaling ) Shape of Kaggle Test set", X1_SCALED.shape

    # Find Important Features using Random Forest
    xtClf = ExtraTreesClassifier().fit(X_SCALED, Y)
    X_SCALED_SUBSET = xtClf.transform(X_SCALED)
    X1_SCALED_SUBSET = xtClf.transform(X1_SCALED)
    importances = xtClf.feature_importances_
    print xtClf.feature_importances_
    print "### (After scaling & feature selection using Random Forrest) Shape of training set", X_SCALED_SUBSET.shape
    print "### (After scaling & feature selection using Random Forrest) Shape of Kaggle Test set", X1_SCALED_SUBSET.shape

    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in xrange(10):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

    #  Random Forrest with Cross Validation
    rf = ensemble.RandomForestRegressor(n_estimators=100)
    ss = cross_validation.ShuffleSplit(X.shape[0], n_iter=5, test_size=0.25, random_state=0)
    count = 1
    for train_index, test_index in ss:
        print("\n [Iteration:%d] Num of Training: %s,  Num of Test: %s" % (count, len(train_index), len(test_index)))
        # Train the model using the training sets
        rf.fit(X[train_index], Y[train_index])

        # Model Summary
        output_model_summary(rf, X[test_index], Y[test_index])

        count += 1

    # Train the model using the entire data set
    rf.fit(X, Y)
    pred = rf.predict(X1)
    generate_kaggle_submission(pred, "rf.csv")
开发者ID:sayantansatpati,项目名称:ml,代码行数:53,代码来源:bike-sharing.py

示例3: multiclass_predict

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
def multiclass_predict(train_data,labels,valid_data,test_data,output_dir,time_budget,target_num, is_sparse):
    print(strftime("%Y-%m-%d %H:%M:%S"))
    print("make multiclass prediction\n")
    np_seed = int(time.time())
    np.random.seed(np_seed)
    print ("np seed = " , np_seed)
    print(train_data.shape)

    print("train_data.shape == (%d,%d)\n"%train_data.shape)
    n_features = train_data.shape[1]
    n_samples = train_data.shape[0]
    start_time = time.time()
    if is_sparse:
        print("no FS, it is sparse data\n")
        train_data=train_data.toarray()
        valid_data=valid_data.toarray()
        test_data=test_data.toarray()
        # train_data = select_clf.transform(train_data,threshold=my_mean )
        # valid_data = select_clf.transform(valid_data,threshold=my_mean )
        # test_data = select_clf.transform(test_data,threshold=my_mean)
        print("sparse converting time = ", time.time() - start_time)
        start_time = time.time()


    FS_iterations = max(1,int(5000/target_num * (5000./n_samples)*2000./n_features))
    print ("FS_iterations = %d\n" % FS_iterations)
    select_clf = ExtraTreesClassifier(n_estimators=FS_iterations,max_depth=3)
    select_clf.fit(train_data, labels)
    print("FS time = ", time.time() - start_time)

    my_mean =1./(10*n_features)
    train_data = select_clf.transform(train_data,threshold=my_mean )
    valid_data = select_clf.transform(valid_data,threshold=my_mean )
    test_data = select_clf.transform(test_data,threshold=my_mean)
    print(my_mean)
    print(train_data.shape)

    ######################### Make validation/test predictions
    n_features=train_data.shape[1]
    if n_features < 100:
        gbt_features=n_features
    else:
        gbt_features=int(n_features**0.5)
    gbt_iterations= int((time_budget / 3000.) * 3000000/(gbt_features * target_num) * (7000./n_samples))
    gbt_params=GBT_params(n_iterations=gbt_iterations,depth=int(10 * np.log2(gbt_iterations)/14.3), learning_rate=0.01,subsample_part=0.6,n_max_features=gbt_features,min_samples_split=5, min_samples_leaf=3)
    gbt_params.print_params()
    (y_valid, y_test) = make_classification(gbt_params, train_data, labels, valid_data, test_data)
    print("y_valid.shape = ",y_valid.shape )
    print("y_test.shape = ",y_test.shape )
    return (y_valid, y_test)
开发者ID:vkocheganov,项目名称:AutoML_Phase2,代码行数:52,代码来源:multiclass.py

示例4: remove_feature_tree_based

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
def remove_feature_tree_based(train_X,train_Y):
    '''
    Removes features based on trees - see sklearn:
    http://scikit-learn.org/dev/auto_examples/ensemble/plot_forest_importances.html#example-ensemble-plot-forest-importances-py

    Actually removes based on "importance"
    '''
    forest = ExtraTreesClassifier(n_estimators=1000,
                                  compute_importances = True,
                                  random_state = 0)

    forest.fit(train_X, train_Y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                  axis=0)
    indices = np.argsort(importances)[::-1]

    x_labels = ['rc1', 'rc2', 'dca1', 'dca2','dcm1', 'dcm2','ace1','ace2','acsc1', 'acsc2', 'acsv1', 'acsv2', 'acss1','acss2', 'acsk1', 'acsk2', 'taca1', 'taca2', 'tdc1', 'tdc2', 'gmin', 'gmean', 'trd','ep111','ep112','ep211', 'ep212', 'ep311','ep312', 'ep411','ep412','ep511','ep512','ep611','ep612','ep121','ep122','ep221', 'ep222', 'ep321','ep322', 'ep421','ep422','ep521','ep522','ep621','ep622']

    # Print the feature ranking
    print "Feature ranking:"

    for f in xrange(46):
        print "%d. feature %s (%f)" % (f + 1, x_labels[indices[f]], importances[indices[f]])

    # Transform the data to have only the features that are important
    x_new = forest.transform(train_X)

    return (forest, x_new)
开发者ID:IanTheEngineer,项目名称:Penn-haptics-bolt,代码行数:31,代码来源:train_adjective_phase_feature_selection.py

示例5: process_data

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
    def process_data(self):
        test = pandas.read_csv("test.csv")
        testMat = test.as_matrix()

        train = pandas.read_csv("train.csv")
        trainMat = train.as_matrix()
        trainResult = trainMat[:, 0]
        trainMat = trainMat[:, 1:]

        # trainInd = np.where(trainResult == 0)[0]
        # how_many = (trainResult == 1).sum() - len(trainInd)
        # np.random.shuffle(trainInd)
        # addedResult = trainResult[trainInd[:how_many],:]
        # addedData = trainMat[trainInd[:how_many],:]
        # trainResult = np.append(trainResult,addedResult)
        # trainMat = np.vstack((trainMat,addedData))

        cv = StratifiedKFold(trainResult, 2)
        # cv = KFold(n=trainResult.shape[0],k=2)
        reduceFeatures = ExtraTreesClassifier(
            compute_importances=True, random_state=1234, n_jobs=self.cpus, n_estimators=1000, criterion="gini"
        )
        reduceFeatures.fit(trainMat, trainResult)
        trainScaler = Scaler()

        self.cv_data = []
        self.cv_data_nonreduced = []
        for train, test in cv:
            X_train, X_test, Y_train, Y_test = (
                trainMat[train, :],
                trainMat[test, :],
                trainResult[train, :],
                trainResult[test, :],
            )
            X_train = trainScaler.fit_transform(X_train)
            X_test = trainScaler.transform(X_test)
            self.cv_data_nonreduced.append((X_train, X_test, Y_train, Y_test))
            X_train = reduceFeatures.transform(X_train)
            X_test = reduceFeatures.transform(X_test)
            self.cv_data.append((X_train, X_test, Y_train, Y_test))
        testMat = trainScaler.transform(testMat)
        self.testMat_nonreduced = testMat
        self.testMat = reduceFeatures.transform(testMat)
        allData = self.testMat, self.cv_data, self.testMat_nonreduced, self.cv_data_nonreduced
        data_handle = open("allData.pkl", "w")
        pickle.dump(allData, data_handle)
        data_handle.close()
开发者ID:JakeMick,项目名称:kaggle,代码行数:49,代码来源:holistic.py

示例6: getSelectedValues

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
 def getSelectedValues(self):
     (train, trainLabels, test) = self.getScaledValues()
     
     selector = ExtraTreesClassifier(compute_importances=True, random_state=0)
     train = selector.fit_transform(train, trainLabels)
     
     return (train, trainLabels, test)
     test = selector.transform(test)
开发者ID:jreiberkyle,项目名称:Kaggle_Data-Science-London,代码行数:10,代码来源:prediction.py

示例7: tree_select

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
def tree_select(trainSet, testSet): 	# input as numpy array
	from sklearn.ensemble import ExtraTreesClassifier
	#import matplotlib.pyplot as plt
	import numpy
	
	X, y = trainSet[:,1:], trainSet[:,0]
	#print [X.shape, y.shape]
	clf = ExtraTreesClassifier(max_depth=10, n_jobs=-1, bootstrap=True, n_estimators=25)
	clf.fit(X, y)	
	importances = clf.feature_importances_
	#std = numpy.std([tree.feature_importances_ for tree in clf.estimators_], axis=0)
	indices = numpy.argsort(importances)[::-1]
	print("Feature ranking:")
	
	for f in range(importances.shape[0]):
	    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

	# Plot the feature importances of the forest
	'''
	plt.figure()
	plt.title("Feature importances")
	plt.bar(range(10), importances[indices],
	       color="r", yerr=std[indices], align="center")
	plt.xticks(range(10), indices)
	plt.xlim([-1, 10])
	plt.show()
	
	#clf.feature_importances_  	
	#print X_new.shape
	'''
	
	testSet = clf.transform(testSet)
	X_new = clf.transform(X)
	#raw_input('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
	#
	return numpy.hstack((numpy.reshape(y,(y.shape[0],1)), X_new)), testSet
开发者ID:frankShih,项目名称:TimeSeriesVectorization,代码行数:38,代码来源:featSelet.py

示例8: ExtraTreesClassifier

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
import numpy as np
from sklearn import preprocessing as pp
from sklearn import cross_validation as cv
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC

workDir = r'C:\users\Akshay\Downloads\kaggle\\'

# Read data
train = np.genfromtxt(open(workDir + 'train.csv','rb'), delimiter=',')
target = np.genfromtxt(open(workDir + 'trainLabels.csv','rb'), delimiter=',')
test = np.genfromtxt(open(workDir + 'test.csv','rb'), delimiter=',')

# Scale data
train = pp.scale(train)
test = pp.scale(test)

# Select features
selector = ExtraTreesClassifier(compute_importances=True, random_state=0)
train = selector.fit_transform(train, target)
test = selector.transform(test)

# Estimate score
classifier = SVC(C=8, gamma=0.17)
scores = cv.cross_val_score(classifier, train, target, cv=30)
print('Estimated score: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2))

# Predict and save
result = classifier.fit(train, target).predict(test)
np.savetxt(workDir + 'a.csv', result, fmt='%d')
开发者ID:akshayah3,项目名称:Kaggle,代码行数:32,代码来源:data_science_london.py

示例9: ExtraTreesPreprocessor

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
class ExtraTreesPreprocessor(AutoSklearnPreprocessingAlgorithm):
    def __init__(self, n_estimators, criterion, min_samples_leaf,
                 min_samples_split, max_features,
                 max_leaf_nodes_or_max_depth="max_depth",
                 bootstrap=False, max_leaf_nodes=None, max_depth="None",
                 min_weight_fraction_leaf=0.0,
                 oob_score=False, n_jobs=1, random_state=None, verbose=0,
                 class_weight=None):

        self.n_estimators = int(n_estimators)
        self.estimator_increment = 10
        if criterion not in ("gini", "entropy"):
            raise ValueError("'criterion' is not in ('gini', 'entropy'): "
                             "%s" % criterion)
        self.criterion = criterion

        if max_leaf_nodes_or_max_depth == "max_depth":
            self.max_leaf_nodes = None
            if max_depth == "None":
                self.max_depth = None
            else:
                self.max_depth = int(max_depth)
                # if use_max_depth == "True":
                #    self.max_depth = int(max_depth)
                #elif use_max_depth == "False":
                #    self.max_depth = None
        else:
            if max_leaf_nodes == "None":
                self.max_leaf_nodes = None
            else:
                self.max_leaf_nodes = int(max_leaf_nodes)
            self.max_depth = None

        self.min_samples_leaf = int(min_samples_leaf)
        self.min_samples_split = int(min_samples_split)

        self.max_features = float(max_features)

        if bootstrap == "True":
            self.bootstrap = True
        elif bootstrap == "False":
            self.bootstrap = False

        self.oob_score = oob_score
        self.n_jobs = int(n_jobs)
        self.random_state = random_state
        self.verbose = int(verbose)
        self.class_weight = class_weight
        self.preprocessor = None

    def fit(self, X, Y, sample_weight=None):
        from sklearn.ensemble import ExtraTreesClassifier

        num_features = X.shape[1]
        max_features = int(
            float(self.max_features) * (np.log(num_features) + 1))
        # Use at most half of the features
        max_features = max(1, min(int(X.shape[1] / 2), max_features))
        self.preprocessor = ExtraTreesClassifier(
            n_estimators=0, criterion=self.criterion,
            max_depth=self.max_depth, min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
            max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
            oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
            random_state=self.random_state, class_weight=self.class_weight,
            warm_start=True
        )
        # JTS TODO: I think we might have to copy here if we want self.estimator
        # to always be consistent on sigabort
        while len(self.preprocessor.estimators_) < self.n_estimators:
            tmp = self.preprocessor  # TODO copy ?
            tmp.n_estimators += self.estimator_increment
            tmp.fit(X, Y, sample_weight=sample_weight)
            self.preprocessor = tmp
        return self

    def transform(self, X):
        if self.preprocessor is None:
            raise NotImplementedError
        return self.preprocessor.transform(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'ET',
                'name': 'Extra Trees Classifier Preprocessing',
                'handles_missing_values': False,
                'handles_nominal_values': False,
                'handles_numerical_features': True,
                'prefers_data_scaled': False,
                # TODO find out if this is good because of sparcity...
                'prefers_data_normalized': False,
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': True,
                'handles_multilabel': True,
                'is_deterministic': True,
                'handles_sparse': False,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (INPUT,),
                # TODO find out what is best used here!
#.........这里部分代码省略.........
开发者ID:stokasto,项目名称:auto-sklearn,代码行数:103,代码来源:extra_trees_preproc_for_classification.py

示例10: ExtraTreesClassifier

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
X = train[:, :-1]
y = train[:,  -1]

del labels
del train

# Parameters for trees
random_state = 5342
n_jobs = 8
verbose = 1
n_estimators = 89
# ExtraTreesClassifier - feature selection
clf1 = ExtraTreesClassifier(criterion='gini', random_state=random_state, n_jobs=n_jobs, verbose=verbose, n_estimators=n_estimators, max_features=None)

clf1.fit(X, y)
X_new = clf1.transform(X, '0.5*median')
X = X_new

# Initialize classifier
clf = KNeighborsClassifier(n_neighbors=20, p=1)

# Start training
print('training started')

############################
# test log loss
print('computing log loss')
kf = cross_validation.KFold(ntrain, n_folds=4)

_logloss = 0.0
for trainIndex, testIndex in kf:
开发者ID:canast02,项目名称:microsoft-malware-classification-challenge,代码行数:33,代码来源:solution5.py

示例11: LogisticRegression

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
	    if(algo=='l1' or algo=='l2'):
                clf2 = LogisticRegression(C=reg,penalty=algo,random_state=0)
            elif(algo=='lsvm'):
                clf2 = svm.SVC(kernel='linear', C=reg)
            elif(algo=='ksvm'):
		clf2 = svm.SVC(kernel='rbf', C=reg, gamma=1e-5)
            clf2.fit(X_train, y_train)
            print "Validation set score filtered coeff: " , clf2.score(X_val, y_val)
        elif(fe==5):  #Tree based feature selection
            forest = ExtraTreesClassifier(n_estimators=20, random_state=144)
            forest.fit(X_train, y_train)
	    importances = forest.feature_importances_
            std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
            pl.figure(2)
            pl.bar(range(10), importances, color="r", yerr=std, align="center")
            X_train = forest.transform(X_train, threshold=mean)
            X_val = forest.transform(X_val,threshold=mean)
	    if(algo=='l1' or algo=='l2'):
                clf2 = LogisticRegression(C=reg,penalty=algo,random_state=0)
            elif(algo=='lsvm'):
                clf2 = svm.SVC(kernel='linear', C=reg)
            elif(algo=='ksvm'):
		clf2 = svm.SVC(kernel='rbf', C=reg, gamma=1e-5)
            clf2.fit(X_train, y_train)
            print "Validation set score filtered coeff: " , clf2.score(X_val, y_val)

    pl.show()

    if(resfile != ''):
        print "Creating the testset."
        subjects_test = range(17, 24)
开发者ID:choudharydhruv,项目名称:dec-meg-2014,代码行数:33,代码来源:submit.py

示例12: PCA

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
file_label.close()

# normalize the features in the train and test dataset
train_data_array_norm = preprocessing.scale(train_data_array)
test_data_array_norm = preprocessing.scale(test_data_array)

# run the module of PCA
#pca = PCA(n_components = 10)
#train_data_array_norm_pca = pca.fit_transform(train_data_array_norm, train_result_array)
#test_data_array_norm_pca = pca.transform(test_data_array_norm)
#print 'train data shape', train_data_array_norm_pca.shape

# tree-based feature selection
classifier = ExtraTreesClassifier()
train_data_array_norm_pca = classifier.fit_transform(train_data_array_norm, np.ravel(train_result_array))
test_data_array_norm_pca = classifier.transform(test_data_array_norm)
print 'train data shape', train_data_array_norm_pca.shape


## build SVM
# random shuffle
np.random.seed(0)
indices = np.random.permutation(len(train_result_array))

classifer = svm.SVC(C=20, gamma = 0.05)

# cross validation
scores = cv.cross_val_score(classifier, train_data_array_norm_pca, np.ravel(train_result_array), cv = 30)


classifer.fit(train_data_array_norm_pca[indices[:-200], :], np.ravel(train_result_array[indices[:-200]]))
开发者ID:huanqi,项目名称:kaggle-competitions,代码行数:33,代码来源:London2.py

示例13: ExtraTreesClassifier

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]








#X=np.hstack((np.hstack((np.real(np.fft.fft(X,axis=-1)),np.imag(np.fft.fft(X,axis=-1)))),X))
#X_test = np.hstack((np.hstack((np.real(np.fft.fft(X_test,axis=-1)),np.imag(np.fft.fft(X_test,axis=-1)))),X_test))


clf = ExtraTreesClassifier()
X= clf.fit(X, y).transform(X)
X_test=clf.transform(X_test)

#drop features
#features= [34,48,16,39,62,68,60,67,22,18,14,11,43,87,75,42,59,45,15,55,26,1,56,38,64,70,29,85,32,50,21,40,69,9,86,72,91,36,33,90,41,73,23,74,93,53,77]
#feature =[ item-1 for item in features]
'''
import random 
feature = range(93)
feature = random.sample(feature,60)
X=X[:,feature]
X_test=X_test[:,feature]
'''



nb_classes = y.shape[1]
开发者ID:WenchenLi,项目名称:kaggle,代码行数:32,代码来源:kaggle_otto_nn.py

示例14: ExtraTreesClassifier

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
del labels
del train

# Parameters for trees
random_state = 5342
n_jobs = 8
verbose = 1
n_estimators = 89
estimator = ExtraTreesClassifier(criterion='gini', random_state=random_state, n_jobs=n_jobs, verbose=verbose, n_estimators=n_estimators, max_features=None)
clf = AdaBoostClassifier(base_estimator=estimator, random_state=random_state, learning_rate=0.8)

# Start training
print('training started')

estimator.fit(X, y)
X = estimator.transform(X, '1.25*median')

############################
# test log loss
print('computing log loss')
kf = cross_validation.KFold(ntrain, n_folds=4)

_logloss = 0.0
for trainIndex, testIndex in kf:
    print("TRAIN:", trainIndex, "TEST:", testIndex)
    X_train, X_test = X[trainIndex], X[testIndex]
    y_train, y_test = y[trainIndex], y[testIndex]

    clf.fit(X_train, y_train)
    pred = clf.predict_proba(X_test)
开发者ID:canast02,项目名称:microsoft-malware-classification-challenge,代码行数:32,代码来源:solution6.py

示例15: print

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import transform [as 别名]
import numpy as np
import pandas as pd
from sklearn import preprocessing as pp
from sklearn.ensemble import ExtraTreesClassifier

print("Preparing the data")
train = pd.io.parsers.read_csv(r"D:\shared\datascience\phy_train_clean.csv", sep=',', header=0)
test = pd.io.parsers.read_csv(r"D:\shared\datascience\phy_test_clean.csv", sep=',', header=0)

test_index = test.Id
test = test.iloc[:,2:]

target = train.kind
train_index = train.Id
train = train.iloc[:,2:]

print("Preparing an Feature classifier")
selector = ExtraTreesClassifier(compute_importances=True, random_state=0)

print("Transforming the original dataset")
train = pd.DataFrame(selector.fit_transform(train, target), index = train_index)
test = pd.DataFrame(selector.transform(test), index = test_index)
train['kind'] = target

print("Storing the data...")
train.to_csv(r"D:\shared\datascience\phy_train.csv", sep=',')
test.to_csv(r"D:\shared\datascience\phy_test.csv", sep=',')
print("Job finished")
开发者ID:leonardodaniel,项目名称:phy-KDD2004,代码行数:30,代码来源:phy_clustering.py


注:本文中的sklearn.ensemble.ExtraTreesClassifier.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。