本文整理匯總了Python中sklearn.preprocessing.StandardScaler.transform方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.transform方法的具體用法?Python StandardScaler.transform怎麽用?Python StandardScaler.transform使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.transform方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process_data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def process_data(train,test,features,features_non_numeric):
train['StreetNo'] = train['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
test['StreetNo'] = test['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
train['Address'] = train['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
test['Address'] = test['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
train['hour'] = train['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
test['hour'] = test['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
train['dark'] = train['Dates'].apply(lambda x: 1 if (len(x) > 4 and x[11:13] >= 18 and x[11:13] < 6) else 0)
test['dark'] = test['Dates'].apply(lambda x: 1 if (len(x) > 4 and x[11:13] >= 18 and x[11:13] < 6) else 0)
features += ['hour','dark','StreetNo']
print "Filling N/As: " + str(datetime.datetime.now())
train = train.fillna(train.mode().iloc[0])
test = test.fillna(test.mode().iloc[0])
# Pre-processing non-numberic values
print "Label Encoder: " + str(datetime.datetime.now())
le = LabelEncoder()
for col in features:
# print col
le.fit(list(train[col])+list(test[col]))
train[col] = le.transform(train[col])
test[col] = le.transform(test[col])
# Xgb requires goal to be numeric...
le.fit(list(train[goal]))
train[goal] = le.transform(train[goal])
# Neural Network, Stochastic Gradient Descent is sensitive to feature scaling, so it is highly recommended to scale your data.
print "Standard Scaler: " + str(datetime.datetime.now())
scaler = StandardScaler()
for col in set(features): # - set(features_non_numeric):
# print col
scaler.fit(list(train[col])+list(test[col]))
train[col] = scaler.transform(train[col])
test[col] = scaler.transform(test[col])
return (train,test,features)
示例2: PCATransform
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
class PCATransform(BaseEstimator, TransformerMixin):
"""
PCA with an argument that allows the user to skip the transform
altogether.
"""
def __init__(self, n_components=.1, skip=False, whiten=False, standard_scalar=True):
print 'PCA!'
self.n_components = n_components
self.skip = skip
self.whiten = whiten
self.standard_scalar = standard_scalar
def fit(self, X, y=None):
if not self.skip:
if self.standard_scalar:
self.std_scalar = StandardScaler().fit(X)
X = self.std_scalar.transform(X)
self.pca = PCA(n_components=self.n_components, whiten=self.whiten).fit(X)
return self
def transform(self, X, y=None):
if not self.skip:
if self.standard_scalar:
X = self.std_scalar.transform(X)
return self.pca.transform(X)
return X
示例3: generate_dataset
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
"""Generate a regression dataset with the given parameters."""
if verbose:
print("generating dataset...")
X, y, coef = make_regression(n_samples=n_train + n_test,
n_features=n_features, noise=noise, coef=True)
random_seed = 13
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=n_train, random_state=random_seed)
X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
y_test = y_scaler.transform(y_test[:, None])[:, 0]
gc.collect()
if verbose:
print("ok")
return X_train, y_train, X_test, y_test
示例4: feature_extraction_partialPCA
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def feature_extraction_partialPCA(X_grad_train,X_grad_test,X_mag_train,X_mag_test):
#Function flatten data, then center them and calculates PCA on data from each sensor (grad & magn) type separately
#then standartise them (z-score)
from sklearn.preprocessing import StandardScaler
def flat_n_standartize(Xtrain,Xtest):
# Flatten times x channels arrays and calc z-score
Xtrain = Xtrain.reshape(Xtrain.shape[0],-1) #flatten array n_samples x n_time x n_channels to n_samples x n_features
mean = Xtrain.mean(axis=0)
Xtrain = Xtrain - mean
Xtest = Xtest.reshape(Xtest.shape[0],-1)
Xtest = Xtest - mean
return Xtrain,Xtest #Data with same sensor type have same scale
X_grad_train,X_grad_test = flat_n_standartize(X_grad_train,X_grad_test)
X_mag_train,X_mag_test = flat_n_standartize(X_mag_train,X_mag_test)
effective_pca_num = 40 # PCA components
# Whitening scales variance to unit, without this svm would not work
pca = PCA(n_components=effective_pca_num,whiten = True)
X_grad_train=pca.fit_transform(X_grad_train)
X_grad_test=pca.transform(X_grad_test)
X_mag_train= pca.fit_transform(X_mag_train)
X_mag_test=pca.transform(X_mag_test)
Xtrain = np.hstack((X_grad_train,X_mag_train))
Xtest = np.hstack((X_grad_test,X_mag_test))
scaler = StandardScaler().fit(Xtrain)
return scaler.transform(Xtrain),scaler.transform(Xtest)
示例5: get_norm_nFoldData
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def get_norm_nFoldData(trainXY, testXY):
trainX = trainXY[:,:-1]
trainY = trainXY[:,-1]
testX = testXY[:,:-1]
testY = testXY[:,-1]
#standardise only x values not labels
scaler = StandardScaler()
scaler.fit(trainX)
trainX = scaler.transform(trainX)
scaler.fit(testX)
testX = scaler.transform(testX)
trainY = trainY.reshape((trainY.shape[0],1))
testY = testY.reshape((testY.shape[0],1))
train_X_Y = np.concatenate((trainX,trainY),axis=1)
test_X_Y = np.concatenate((testX,testY),axis=1)
folds_tr = []
folds_te = []
nfolds = 5
for i in range(nfolds):
xp = int(train_X_Y.shape[0]*.8)
np.random.shuffle(train_X_Y)
folds_tr.append(train_X_Y[:xp,:])
folds_te.append(train_X_Y[xp:,:])
return folds_tr, folds_te
示例6: classify
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def classify(self):
"""Perform classification"""
train_X = np.asarray(self.__rawtraindata)
train_y = np.asarray(self.__trainlabels)
test_X = np.asarray(self.__rawtestdata)
train_feat_X = np.asarray(self.__traindata)
test_feat_X = np.asarray(self.__testdata)
# print train_feat_X.shape
# print test_feat_X.shape
scaler = StandardScaler().fit(np.r_[train_X, test_X])
train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)
## train a sparse filter on both train and test data
sf = SparseFilter(n_features=20, n_iterations=1000)
sf.fit(np.r_[train_X, test_X])
train_sf_X = sf.transform(train_X)
test_sf_X = sf.transform(test_X)
print train_sf_X
print test_sf_X
ss = StandardScaler()
train_combined_X = ss.fit_transform(np.c_[train_sf_X, train_feat_X])
test_combined_X = ss.transform(np.c_[test_sf_X, test_feat_X])
self.__clf.fit(train_combined_X, train_y.ravel())
self.__y = self.__clf.predict(test_combined_X)
feature_importance = self.__clf.feature_importances_
feature_importance = 100.0 * (feature_importance / feature_importance.max())
print feature_importance
示例7: prepare_data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def prepare_data():
# prepare data
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))
print(X.shape, y.shape)
# split train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
print(X_train.shape, X_test.shape)
print('Labels counts in y:', np.bincount(y))
print('Labels counts in y_train:', np.bincount(y_train))
print('Labels counts in y_test:', np.bincount(y_test))
# scaler
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train) # mean + sd of train data
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
return X_train_std, X_test_std, y_train, y_test
示例8: __init__
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def __init__(self):
"""
Constructs a SimulateData object.
"""
# Read the simulated data.
simulated = pd.read_csv("simulated.csv", index_col=0)
predictors = np.asarray(simulated)[:, 0:-1]
responses = np.asarray(simulated)[:, -1]
# Divide the simulated data into training and test sets.
predictors_training, predictors_test,\
self.responses_training, self.responses_test =\
train_test_split(predictors, responses, test_size=0.33)
# Standardize the predictors, both training and test.
scaler = StandardScaler()
scaler.fit(predictors_training)
self.predictors_training = scaler.transform(predictors_training)
self.predictors_test = scaler.transform(predictors_test)
# Keep track of the number of samples in the training and test sets,
# and also the number of features.
self.training_sample_count = len(self.responses_training)
self.test_sample_count = len(self.responses_test)
self.feature_count = np.size(predictors, 1)
return None
示例9: kfolds_cv
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def kfolds_cv(estimator, X, y):
num_folds = 10
kf = KFold(len(X), n_folds=num_folds, shuffle=True)
yhat_train = np.zeros(len(y), dtype = y.dtype)
yhat_test = np.zeros(len(y), dtype = y.dtype)
train_err = []
test_err = []
for train_idx, test_idx in kf:
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
# Scale the data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# fit the estimator (estimator.__class__.__name__)
estimator = estimator.fit(X_train_scaled, y_train)
yhat_train = estimator.predict(X_train_scaled)
yhat_test = estimator.predict(X_test_scaled)
# store train and test error
train_err.append( rmsle(y_train, yhat_train) )
test_err.append( rmsle(y_test, yhat_test) )
return {"Model Name":(estimator.__class__.__name__),
"Err Train": np.mean(train_err),
"Err Test": np.mean(test_err)}
示例10: testLogistic
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def testLogistic(lbda=1.0, n_components=20, kbest=4):
# X = otto.data[:1000, :20]
# y = otto.target[:1000]
otto = load_otto()
X = otto.data[:, :]
y = otto.target[:]
# n_components = 20
# kbest = 4
# print 'y.shape =', y.shape
scalar = StandardScaler().fit(X)
X = scalar.transform(X)
pca = PCA(n_components=n_components)
selection = SelectKBest(k=kbest)
combined_features = FeatureUnion(
[("pca", pca), ('univ_select', selection)]
)
X_features = combined_features.fit(X,y).transform(X)
logistic = LogisticRegression(C=1.0/lbda)
pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
trainData = X
trainTarget = y
pipe.fit(trainData, trainTarget)
# print trainTarget
test_otto = load_testotto()
testData = test_otto.data
testData = scalar.transform(testData)
# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
'save the prediction'
prediction = pipe.predict_proba(testData)
proba = pipe.predict_proba(testData)
save_submission(lbda, proba, prediction)
示例11: exp1
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def exp1():
train,y,test,idx = get_data_1()
train = np.log1p(train.astype(float))
test = np.log1p(test.astype(float))
scaler = StandardScaler().fit(train)
train = scaler.transform(train)
test = scaler.transform(test)
mtrain = pd.read_csv('meta_features_train.csv')
mtest = pd.read_csv('meta_features_test.csv')
scaler2 = StandardScaler().fit(mtrain)
mtrain = scaler2.transform(mtrain)
mtest = scaler2.transform(mtest)
train = np.column_stack((train,mtrain))
test = np.column_stack((test,mtest))
rtrain_nn,rtest_nn = nn_features(train,y,test,model=build_nn2,random_state=1,n_folds=5,early_stop=50)
rtrain_nn_total = rtrain_nn
rtest_nn_total = rtest_nn
for i in range(9):
rand_seed = i*113+9201
rtrain_nn,rtest_nn = nn_features(train,y,test,model=build_nn2,random_state=rand_seed,n_folds=5,early_stop=50)
rtrain_nn_total += rtrain_nn
rtest_nn_total += rtest_nn
pd.DataFrame(data=rtrain_nn_total).to_csv('rtrain_nn_last.csv',index=False)
pd.DataFrame(data=rtest_nn_total).to_csv('rtest_nn_last.csv',index=False)
pd.DataFrame(data=rtrain_nn_total/10).to_csv('rtrain_nn_final.csv',index=False)
pd.DataFrame(data=rtest_nn_total/10).to_csv('rtest_nn_final.csv',index=False)
示例12: _xgboost_transform
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def _xgboost_transform(self, X, X_new, y=None):
for column_name in self._devided_features['class']:
current_X_columns = copy(list(X.columns.values))
current_X_columns.remove(column_name)
current_X, _, _, X_test = self._get_X_and_y_by_column_name_with_imputs(X,
current_X_columns,
column_name)
if X_test.empty is False:
scaler = StandardScaler().fit(current_X)
y_pred = self._classifiers[column_name].predict(xgb.DMatrix(scaler.transform(X_test)))
y_pred = self._label_encoders[column_name].inverse_transform(y_pred.astype(int))
self._set_pred_values_to_df(list(X_test.index.values), X_new, y_pred, column_name)
for column_name in self._devided_features['regr']:
current_X_columns = copy(list(X.columns.values))
current_X_columns.remove(column_name)
current_X, _, _, X_test = self._get_X_and_y_by_column_name_with_imputs(X,
current_X_columns,
column_name)
if X_test.empty is False:
scaler = StandardScaler().fit(current_X)
y_pred = self._regressors[column_name].predict(xgb.DMatrix(scaler.transform(X_test)))
y_pred = self._label_scalers[column_name].inverse_transform(y_pred)
self._set_pred_values_to_df(list(X_test.index.values), X_new, y_pred, column_name)
示例13: data_processing
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def data_processing(train,test,features):
# train['StreetNo'] = train['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
# test['StreetNo'] = test['Address'].apply(lambda x: x.split(' ', 1)[0] if x.split(' ', 1)[0].isdigit() else 0)
# train['Address'] = train['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
# test['Address'] = test['Address'].apply(lambda x: x.split(' ', 1)[1] if x.split(' ', 1)[0].isdigit() else x)
# train['hour'] = train['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
# test['hour'] = test['Dates'].apply(lambda x: x[11:13] if len(x) > 4 else 12)
# train['dark'] = train['Dates'].apply(lambda x: 1 if (len(x) > 4 and int(x[11:13]) >= 18 and int(x[11:13]) < 6) else 0)
# test['dark'] = test['Dates'].apply(lambda x: 1 if (len(x) > 4 and int(x[11:13]) >= 18 and int(x[11:13]) < 6) else 0)
# features += ['hour','dark','StreetNo']
print("Filling NAs")
# print(train.mode())
train = train.fillna(train.median().iloc[0])
test = test.fillna(test.median().iloc[0])
print("Label Encoder")
le=LabelEncoder()
for col in features:
le.fit(list(train[col])+list(test[col]))
train[col]=le.transform(train[col])
test[col]=le.transform(test[col])
le.fit(list(train[target]))
train[target]=le.transform(train[target])
print("Standard Scalaer")
scaler=StandardScaler()
for col in features:
scaler.fit(list(train[col]))
train[col]=scaler.transform(train[col])
test[col]=scaler.transform(test[col])
return train,test,features
示例14: load_data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def load_data(dataset, scale=False):
''' Loads the dataset
:type dataset: string
:param dataset: The folder in ../data/ containing the training/testing numpy arrays
'''
print '... loading data'
path = "../data/" + dataset + "/"
#training set
trainingData = numpy.load(path + "training.data.npy")
trainingIndices = numpy.load(path + "training.indices.npy")
trainingIndptr = numpy.load(path + "training.indptr.npy")
training_y = numpy.load(path + "training.labels.npy")
training_X = scipy.sparse.csr_matrix((trainingData, trainingIndices, trainingIndptr))
#testing set
testingData = numpy.load(path + "testing.data.npy")
testingIndices = numpy.load(path + "testing.indices.npy")
testingIndptr = numpy.load(path + "testing.indptr.npy")
testing_y = numpy.load(path + "testing.labels.npy")
testing_X = scipy.sparse.csr_matrix((testingData, testingIndices, testingIndptr))
#scale the data
if scale:
print "..training scaler"
scaler = StandardScaler(with_mean=False)
scaler.fit(training_X)
print "..scaling features"
training_X = scaler.transform(training_X)
testing_X = scaler.transform(testing_X)
return [(training_X, training_y),(testing_X, testing_y)]
示例15: sgc_test
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import transform [as 別名]
def sgc_test(X, y, weight):
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
for i in range(0,1):
X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
X, y, weight, test_size=0.2, random_state=0)
clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
#clf = LogisticRegression( max_iter=100)
scaler = StandardScaler(with_mean=False)
scaler.fit(X_train) # Don't cheat - fit only on training data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) # apply same transformation to test data
clf.fit(X_train, y_train, sample_weight=weight_train)
y_pred = clf.predict(X_train)
#print(confusion_matrix(y_train, y_pred))
print(clf.score(X_train,y_train,weight_train))
y_pred = clf.predict(X_test)
#print(confusion_matrix(y_test, y_pred))
print(clf.score(X_test,y_test,weight_test))