当前位置: 首页>>代码示例>>Python>>正文


Python StandardScaler.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.StandardScaler.transform方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.transform方法的具体用法?Python StandardScaler.transform怎么用?Python StandardScaler.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_data

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def process_data(train,test,features,features_non_numeric):
    train['StreetNo'] = train['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
    test['StreetNo'] = test['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
    train['Address'] = train['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
    test['Address'] = test['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
    train['hour'] = train['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
    test['hour'] = test['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
    train['dark'] = train['Dates'].apply(lambda x: 1 if (len(x) > 4 and x[11:13] >= 18 and x[11:13] < 6) else 0)
    test['dark'] = test['Dates'].apply(lambda x: 1 if (len(x) > 4 and x[11:13] >= 18 and x[11:13] < 6) else 0)
    features += ['hour','dark','StreetNo']

    print "Filling N/As: " + str(datetime.datetime.now())
    train = train.fillna(train.mode().iloc[0])
    test = test.fillna(test.mode().iloc[0])
    # Pre-processing non-numberic values
    print "Label Encoder: " + str(datetime.datetime.now())
    le = LabelEncoder()
    for col in features:
        # print col
        le.fit(list(train[col])+list(test[col]))
        train[col] = le.transform(train[col])
        test[col] = le.transform(test[col])
    # Xgb requires goal to be numeric...
    le.fit(list(train[goal]))
    train[goal] = le.transform(train[goal])

    # Neural Network, Stochastic Gradient Descent is sensitive to feature scaling, so it is highly recommended to scale your data.
    print "Standard Scaler: " + str(datetime.datetime.now())
    scaler = StandardScaler()
    for col in set(features): # - set(features_non_numeric):
        # print col
        scaler.fit(list(train[col])+list(test[col]))
        train[col] = scaler.transform(train[col])
        test[col] = scaler.transform(test[col])
    return (train,test,features)
开发者ID:AdityaRon,项目名称:kaggle-for-fun,代码行数:37,代码来源:sf-crime-classification-xgb-native.py

示例2: PCATransform

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
class PCATransform(BaseEstimator, TransformerMixin):
    """
    PCA with an argument that allows the user to skip the transform
    altogether.
    """
    def __init__(self, n_components=.1, skip=False, whiten=False, standard_scalar=True):
        print 'PCA!'
        self.n_components = n_components
        self.skip = skip
        self.whiten = whiten
        self.standard_scalar = standard_scalar

    def fit(self, X, y=None):
        if not self.skip:
            if self.standard_scalar:
                self.std_scalar = StandardScaler().fit(X)
                X = self.std_scalar.transform(X)
            self.pca = PCA(n_components=self.n_components, whiten=self.whiten).fit(X)
        return self

    def transform(self, X, y=None):
        if not self.skip:
            if self.standard_scalar:
                X = self.std_scalar.transform(X)
            return self.pca.transform(X)
        return X
开发者ID:ChiuYeeLau,项目名称:KaggleSFCrimePrediction,代码行数:28,代码来源:Ensemble1.py

示例3: generate_dataset

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
    """Generate a regression dataset with the given parameters."""
    if verbose:
        print("generating dataset...")

    X, y, coef = make_regression(n_samples=n_train + n_test,
                                 n_features=n_features, noise=noise, coef=True)

    random_seed = 13
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=n_train, random_state=random_seed)
    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)

    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test = X_scaler.transform(X_test)

    y_scaler = StandardScaler()
    y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
    y_test = y_scaler.transform(y_test[:, None])[:, 0]

    gc.collect()
    if verbose:
        print("ok")
    return X_train, y_train, X_test, y_test
开发者ID:,项目名称:,代码行数:27,代码来源:

示例4: feature_extraction_partialPCA

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def feature_extraction_partialPCA(X_grad_train,X_grad_test,X_mag_train,X_mag_test):
    #Function flatten data, then center them and calculates PCA on data from each sensor (grad & magn) type separately
    #then standartise them (z-score)

    from sklearn.preprocessing import StandardScaler
    def flat_n_standartize(Xtrain,Xtest):
        # Flatten times x channels arrays and calc z-score
        Xtrain = Xtrain.reshape(Xtrain.shape[0],-1) #flatten array n_samples x n_time x n_channels to n_samples x n_features
        mean = Xtrain.mean(axis=0)
        Xtrain = Xtrain - mean
        Xtest = Xtest.reshape(Xtest.shape[0],-1)
        Xtest = Xtest - mean
        return Xtrain,Xtest #Data with same sensor type have same scale 
    X_grad_train,X_grad_test = flat_n_standartize(X_grad_train,X_grad_test)
    X_mag_train,X_mag_test = flat_n_standartize(X_mag_train,X_mag_test)

    effective_pca_num = 40 # PCA components

    # Whitening scales variance to unit, without this svm would not work
    pca = PCA(n_components=effective_pca_num,whiten = True)
    X_grad_train=pca.fit_transform(X_grad_train)
    X_grad_test=pca.transform(X_grad_test)

    X_mag_train= pca.fit_transform(X_mag_train)
    X_mag_test=pca.transform(X_mag_test)
    Xtrain = np.hstack((X_grad_train,X_mag_train))
    Xtest = np.hstack((X_grad_test,X_mag_test))

    scaler = StandardScaler().fit(Xtrain)
    return scaler.transform(Xtrain),scaler.transform(Xtest)
开发者ID:LIKAN-BLK,项目名称:MEGcluster,代码行数:32,代码来源:main.py

示例5: get_norm_nFoldData

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def get_norm_nFoldData(trainXY, testXY):
    trainX = trainXY[:,:-1]
    trainY = trainXY[:,-1]
    testX = testXY[:,:-1]
    testY = testXY[:,-1]

    #standardise only x values not labels
    scaler = StandardScaler()
    scaler.fit(trainX)
    trainX = scaler.transform(trainX)

    scaler.fit(testX)
    testX = scaler.transform(testX)

    trainY = trainY.reshape((trainY.shape[0],1))
    testY = testY.reshape((testY.shape[0],1))
    train_X_Y = np.concatenate((trainX,trainY),axis=1)
    test_X_Y = np.concatenate((testX,testY),axis=1)

    folds_tr = []
    folds_te = []
    nfolds = 5
    for i in range(nfolds):
        xp = int(train_X_Y.shape[0]*.8)
        np.random.shuffle(train_X_Y)
        folds_tr.append(train_X_Y[:xp,:])
        folds_te.append(train_X_Y[xp:,:])
    return folds_tr, folds_te
开发者ID:deepak242424,项目名称:ml-temp,代码行数:30,代码来源:4_grid_search.py

示例6: classify

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
    def classify(self):
        """Perform classification"""
        train_X = np.asarray(self.__rawtraindata)
        train_y = np.asarray(self.__trainlabels)
        test_X = np.asarray(self.__rawtestdata)

        train_feat_X = np.asarray(self.__traindata)
        test_feat_X = np.asarray(self.__testdata)
        # print train_feat_X.shape
        # print test_feat_X.shape

        scaler = StandardScaler().fit(np.r_[train_X, test_X])
        train_X = scaler.transform(train_X)
        test_X = scaler.transform(test_X)

        ## train a sparse filter on both train and test data
        sf = SparseFilter(n_features=20, n_iterations=1000)
        sf.fit(np.r_[train_X, test_X])
        train_sf_X = sf.transform(train_X)
        test_sf_X = sf.transform(test_X)
        print train_sf_X
        print test_sf_X

        ss = StandardScaler()
        train_combined_X = ss.fit_transform(np.c_[train_sf_X, train_feat_X])
        test_combined_X = ss.transform(np.c_[test_sf_X, test_feat_X])

        self.__clf.fit(train_combined_X, train_y.ravel())
        self.__y = self.__clf.predict(test_combined_X)
        feature_importance = self.__clf.feature_importances_
        feature_importance = 100.0 * (feature_importance / feature_importance.max())
        print feature_importance
开发者ID:kevinhsu,项目名称:kaggle-axa,代码行数:34,代码来源:SPRegressionDriver.py

示例7: prepare_data

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def prepare_data():
    # prepare data
    from sklearn import datasets
    iris = datasets.load_iris()
    X = iris.data[:, [2, 3]]
    y = iris.target
    print('Class labels:', np.unique(y))
    print(X.shape, y.shape)
    
    # split train and test
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
    print(X_train.shape, X_test.shape)
    
    print('Labels counts in y:', np.bincount(y))
    print('Labels counts in y_train:', np.bincount(y_train))
    print('Labels counts in y_test:', np.bincount(y_test))
    
    # scaler
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    sc.fit(X_train)  # mean + sd of train data
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)
    return X_train_std, X_test_std, y_train, y_test
开发者ID:dazhouze,项目名称:helloWorld,代码行数:27,代码来源:ML_03_1_sklearn_perceptron.py

示例8: __init__

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
    def __init__(self):
        """
        Constructs a SimulateData object.
        """

        # Read the simulated data.
        simulated = pd.read_csv("simulated.csv", index_col=0)
        predictors = np.asarray(simulated)[:, 0:-1]
        responses = np.asarray(simulated)[:, -1]

        # Divide the simulated data into training and test sets.
        predictors_training, predictors_test,\
        self.responses_training, self.responses_test =\
            train_test_split(predictors, responses, test_size=0.33)

        # Standardize the predictors, both training and test.
        scaler = StandardScaler()
        scaler.fit(predictors_training)
        self.predictors_training = scaler.transform(predictors_training)
        self.predictors_test = scaler.transform(predictors_test)

        # Keep track of the number of samples in the training and test sets,
        # and also the number of features.
        self.training_sample_count = len(self.responses_training)
        self.test_sample_count = len(self.responses_test)
        self.feature_count = np.size(predictors, 1)
        return None
开发者ID:garygr2002,项目名称:erasmus,代码行数:29,代码来源:use_simulated_data.py

示例9: kfolds_cv

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def kfolds_cv(estimator, X, y):
    num_folds = 10
    kf = KFold(len(X), n_folds=num_folds, shuffle=True)

    yhat_train = np.zeros(len(y), dtype = y.dtype)
    yhat_test  = np.zeros(len(y), dtype = y.dtype)
    train_err  = []
    test_err   = []

    for train_idx, test_idx in kf:
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        # Scale the data
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled  = scaler.transform(X_test)
        # fit the estimator (estimator.__class__.__name__)
        estimator  = estimator.fit(X_train_scaled, y_train)
        yhat_train = estimator.predict(X_train_scaled)
        yhat_test  = estimator.predict(X_test_scaled)
        # store train and test error
        train_err.append( rmsle(y_train, yhat_train) )
        test_err.append(  rmsle(y_test,  yhat_test) )

    return {"Model Name":(estimator.__class__.__name__),
            "Err Train": np.mean(train_err),
            "Err Test": np.mean(test_err)}
开发者ID:mcbada,项目名称:daBus,代码行数:30,代码来源:models.py

示例10: testLogistic

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def testLogistic(lbda=1.0, n_components=20, kbest=4):
	# X = otto.data[:1000, :20]
	# y = otto.target[:1000]
	otto = load_otto()
	X = otto.data[:, :]
	y = otto.target[:]
	# n_components = 20
	# kbest = 4
#	print 'y.shape =', y.shape

	scalar = StandardScaler().fit(X)
	X = scalar.transform(X)

	pca = PCA(n_components=n_components)
	selection = SelectKBest(k=kbest)

	combined_features = FeatureUnion(
		[("pca", pca), ('univ_select', selection)]
	)
	X_features = combined_features.fit(X,y).transform(X)

	logistic = LogisticRegression(C=1.0/lbda)
	pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
	trainData = X
	trainTarget = y
	pipe.fit(trainData, trainTarget)
	# print trainTarget
	test_otto = load_testotto()
	testData = test_otto.data
	testData = scalar.transform(testData)
	# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
	'save the prediction'
	prediction = pipe.predict_proba(testData)
	proba = pipe.predict_proba(testData)
	save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:37,代码来源:logistic_submission.py

示例11: exp1

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def exp1():
	train,y,test,idx = get_data_1()
	train = np.log1p(train.astype(float))
	test = np.log1p(test.astype(float))
	scaler = StandardScaler().fit(train)
	train = scaler.transform(train)
	test = scaler.transform(test)
	mtrain = pd.read_csv('meta_features_train.csv')
	mtest = pd.read_csv('meta_features_test.csv')
	scaler2 = StandardScaler().fit(mtrain)
	mtrain = scaler2.transform(mtrain)
	mtest = scaler2.transform(mtest)
	train = np.column_stack((train,mtrain))
	test = np.column_stack((test,mtest))
	rtrain_nn,rtest_nn = nn_features(train,y,test,model=build_nn2,random_state=1,n_folds=5,early_stop=50)
	rtrain_nn_total = rtrain_nn
	rtest_nn_total = rtest_nn
	for i in range(9):
		rand_seed = i*113+9201
		rtrain_nn,rtest_nn = nn_features(train,y,test,model=build_nn2,random_state=rand_seed,n_folds=5,early_stop=50)
		rtrain_nn_total += rtrain_nn
		rtest_nn_total += rtest_nn
		pd.DataFrame(data=rtrain_nn_total).to_csv('rtrain_nn_last.csv',index=False)
		pd.DataFrame(data=rtest_nn_total).to_csv('rtest_nn_last.csv',index=False)
	
	pd.DataFrame(data=rtrain_nn_total/10).to_csv('rtrain_nn_final.csv',index=False)
	pd.DataFrame(data=rtest_nn_total/10).to_csv('rtest_nn_final.csv',index=False)
开发者ID:hiendang,项目名称:kaggle-crowdflower-search,代码行数:29,代码来源:nn_1.py

示例12: _xgboost_transform

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
    def _xgboost_transform(self, X, X_new, y=None):
        for column_name in self._devided_features['class']:

            current_X_columns = copy(list(X.columns.values))
            current_X_columns.remove(column_name)

            current_X, _, _, X_test = self._get_X_and_y_by_column_name_with_imputs(X,
                                                                                   current_X_columns,
                                                                                   column_name)

            if X_test.empty is False:
                scaler = StandardScaler().fit(current_X)
                y_pred = self._classifiers[column_name].predict(xgb.DMatrix(scaler.transform(X_test)))
                y_pred = self._label_encoders[column_name].inverse_transform(y_pred.astype(int))

                self._set_pred_values_to_df(list(X_test.index.values), X_new, y_pred, column_name)

        for column_name in self._devided_features['regr']:

            current_X_columns = copy(list(X.columns.values))
            current_X_columns.remove(column_name)

            current_X, _, _, X_test = self._get_X_and_y_by_column_name_with_imputs(X,
                                                                                   current_X_columns,
                                                                                   column_name)

            if X_test.empty is False:
                scaler = StandardScaler().fit(current_X)
                y_pred = self._regressors[column_name].predict(xgb.DMatrix(scaler.transform(X_test)))
                y_pred = self._label_scalers[column_name].inverse_transform(y_pred)

                self._set_pred_values_to_df(list(X_test.index.values), X_new, y_pred, column_name)
开发者ID:tyamana,项目名称:imputer,代码行数:34,代码来源:imputers.py

示例13: data_processing

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def data_processing(train,test,features):
    # train['StreetNo'] = train['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
    # test['StreetNo'] = test['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
    # train['Address'] = train['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
    # test['Address'] = test['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
    # train['hour'] = train['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
    # test['hour'] = test['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
    # train['dark'] = train['Dates'].apply(lambda x: 1 if (len(x) > 4 and int(x[11:13]) >= 18 and int(x[11:13]) < 6) else 0)
    # test['dark'] = test['Dates'].apply(lambda x: 1 if (len(x) > 4 and int(x[11:13]) >= 18 and int(x[11:13]) < 6) else 0)
    # features += ['hour','dark','StreetNo']

    print("Filling NAs")
    # print(train.mode())
    train = train.fillna(train.median().iloc[0])
    test = test.fillna(test.median().iloc[0])
    print("Label Encoder")
    le=LabelEncoder()
    for col in features:
        le.fit(list(train[col])+list(test[col]))
        train[col]=le.transform(train[col])
        test[col]=le.transform(test[col])

    le.fit(list(train[target]))
    train[target]=le.transform(train[target])

    print("Standard Scalaer")
    scaler=StandardScaler()
    for col in features:
        scaler.fit(list(train[col]))
        train[col]=scaler.transform(train[col])
        test[col]=scaler.transform(test[col])

    return train,test,features
开发者ID:ssdf93,项目名称:kaggle,代码行数:35,代码来源:xgboost_native.py

示例14: load_data

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def load_data(dataset, scale=False):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: The folder in ../data/ containing the training/testing numpy arrays
    '''

    print '... loading data'
    path = "../data/" + dataset + "/"
    
    #training set
    trainingData = numpy.load(path + "training.data.npy") 
    trainingIndices = numpy.load(path + "training.indices.npy")
    trainingIndptr = numpy.load(path + "training.indptr.npy")
    training_y = numpy.load(path + "training.labels.npy")
    training_X = scipy.sparse.csr_matrix((trainingData, trainingIndices, trainingIndptr))

    #testing set
    testingData = numpy.load(path + "testing.data.npy") 
    testingIndices = numpy.load(path + "testing.indices.npy")
    testingIndptr = numpy.load(path + "testing.indptr.npy")
    testing_y = numpy.load(path + "testing.labels.npy")
    testing_X = scipy.sparse.csr_matrix((testingData, testingIndices, testingIndptr))

    #scale the data 
    if scale:
        print "..training scaler"
        scaler = StandardScaler(with_mean=False)
        scaler.fit(training_X)
        print "..scaling features"
        training_X = scaler.transform(training_X)
        testing_X = scaler.transform(testing_X)
    
    return [(training_X, training_y),(testing_X, testing_y)]
开发者ID:uci-cbcl,项目名称:DeepCADD,代码行数:36,代码来源:sklearn_CADD_mlp.py

示例15: sgc_test

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 别名]
def sgc_test(X, y, weight):
    from sklearn.linear_model import SGDClassifier
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.preprocessing import StandardScaler

    for i in range(0,1):
        X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
            X, y, weight, test_size=0.2, random_state=0)
        clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
        #clf = LogisticRegression( max_iter=100)

        scaler = StandardScaler(with_mean=False)
        scaler.fit(X_train)  # Don't cheat - fit only on training data
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)  # apply same transformation to test data

        clf.fit(X_train, y_train, sample_weight=weight_train)

        y_pred = clf.predict(X_train)
        #print(confusion_matrix(y_train, y_pred))
        print(clf.score(X_train,y_train,weight_train))

        y_pred = clf.predict(X_test)

        #print(confusion_matrix(y_test, y_pred))
        print(clf.score(X_test,y_test,weight_test))
开发者ID:organization-lab,项目名称:weibo-predict,代码行数:29,代码来源:regressor.py


注:本文中的sklearn.preprocessing.StandardScaler.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。