本文整理汇总了Python中sklearn.preprocessing.Scaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python Scaler.fit_transform方法的具体用法?Python Scaler.fit_transform怎么用?Python Scaler.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.Scaler
的用法示例。
在下文中一共展示了Scaler.fit_transform方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_svm
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def run_svm(svc,X):
X = X.copy()
scaler = Scaler()
X = scaler.fit_transform(X)
y_predict = svc.predict(X)
return y_predict
示例2: data_to_kernels
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def data_to_kernels(tr_data, te_data):
scaler = Scaler(copy=False)
scaler.fit_transform(tr_data)
#tr_data, mu, sigma = standardize(tr_data)
tr_data = power_normalize(tr_data, 0.5)
tr_data = L2_normalize(tr_data)
#te_data, _, _ = standardize(te_data, mu, sigma)
scaler.transform(te_data)
te_data = power_normalize(te_data, 0.5)
te_data = L2_normalize(te_data)
tr_kernel = np.dot(tr_data, tr_data.T)
te_kernel = np.dot(te_data, tr_data.T)
return tr_kernel, te_kernel
示例3: process_data
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def process_data(self):
test = pandas.read_csv("test.csv")
testMat = test.as_matrix()
train = pandas.read_csv("train.csv")
trainMat = train.as_matrix()
trainResult = trainMat[:, 0]
trainMat = trainMat[:, 1:]
# trainInd = np.where(trainResult == 0)[0]
# how_many = (trainResult == 1).sum() - len(trainInd)
# np.random.shuffle(trainInd)
# addedResult = trainResult[trainInd[:how_many],:]
# addedData = trainMat[trainInd[:how_many],:]
# trainResult = np.append(trainResult,addedResult)
# trainMat = np.vstack((trainMat,addedData))
cv = StratifiedKFold(trainResult, 2)
# cv = KFold(n=trainResult.shape[0],k=2)
reduceFeatures = ExtraTreesClassifier(
compute_importances=True, random_state=1234, n_jobs=self.cpus, n_estimators=1000, criterion="gini"
)
reduceFeatures.fit(trainMat, trainResult)
trainScaler = Scaler()
self.cv_data = []
self.cv_data_nonreduced = []
for train, test in cv:
X_train, X_test, Y_train, Y_test = (
trainMat[train, :],
trainMat[test, :],
trainResult[train, :],
trainResult[test, :],
)
X_train = trainScaler.fit_transform(X_train)
X_test = trainScaler.transform(X_test)
self.cv_data_nonreduced.append((X_train, X_test, Y_train, Y_test))
X_train = reduceFeatures.transform(X_train)
X_test = reduceFeatures.transform(X_test)
self.cv_data.append((X_train, X_test, Y_train, Y_test))
testMat = trainScaler.transform(testMat)
self.testMat_nonreduced = testMat
self.testMat = reduceFeatures.transform(testMat)
allData = self.testMat, self.cv_data, self.testMat_nonreduced, self.cv_data_nonreduced
data_handle = open("allData.pkl", "w")
pickle.dump(allData, data_handle)
data_handle.close()
示例4: get_sl_test_data
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def get_sl_test_data(fileEvents,fileLabels,includedChannels,useMeans=False,parentIndices=None):
## declare variables
X = fileEvents[:,includedChannels].copy()
scaler = Scaler()
X = scaler.fit_transform(X)
#if parentIndices != None:
# X = X[parentIndices,:]
#X = (X - X.mean(axis=0)) / X.std(axis=0)
if useMeans == True:
clusterIds,X = get_mean_matrix(X,fileLabels)
#X = (X - X.mean(axis=0)) / X.std(axis=0)
return clusterIds,X
return X
示例5: run_svm_validation
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def run_svm_validation(X1,y1,X2,y2,gammaRange=[0.5],cRange=[0.005],useLinear=False):
#X_train,y_train,X_test,y_test = split_train_test(X1,y1,X2,y2)
X = np.vstack((X1, X2))
Y = np.hstack((y1, y2))
scaler = Scaler()
X = scaler.fit_transform(X)
#if useLinear == True:
# svc = svm.SVC(kernel='linear')#class_weight={1: 10
# # # #svc = svm.SVC(kernel='poly',degree=3,C=1.0)
# svc.fit(X, Y)
# return svc
C_range = 10.0 ** np.arange(-2, 9)
gamma_range = 10.0 ** np.arange(-5, 4)
param_grid = dict(gamma=gamma_range, C=C_range)
grid = GridSearchCV(SVC(class_weight={1: 100}), param_grid=param_grid, cv=StratifiedKFold(y=Y,k=2))
grid.fit(X, Y)
print("The best classifier is: ", grid.best_estimator_)
return grid.best_estimator_
示例6: load_iris
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
from sklearn.datasets import load_iris
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
iris_dataset = load_iris()
X, Y = iris_dataset.data, iris_dataset.target
# It is usually a good idea to scale the data for SVM training.
# We are cheating a bit in this example in scaling all of the data,
# instead of fitting the transformation on the trainingset and
# just applying it on the test set.
scaler = Scaler()
X = scaler.fit_transform(X)
# For an initial search, a logarithmic grid with basis
# 10 is often helpful. Using a basis of 2, a finer
# tuning can be achieved but at a much higher cost.
C_range = 10. ** np.arange(-5, 5)
gamma_range = 10. ** np.arange(-5, 5)
param_grid = dict(gamma=gamma_range, C=C_range)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=StratifiedKFold(y=Y, k=5))
grid.fit(X, Y)
print("The best classifier is: ", grid.best_estimator_)
示例7: Scaler
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
Y = iris.target
# dataset for decision function visualization
X_2d = X[:, :2]
X_2d = X_2d[Y > 0]
Y_2d = Y[Y > 0]
Y_2d -= 1
# It is usually a good idea to scale the data for SVM training.
# We are cheating a bit in this example in scaling all of the data,
# instead of fitting the transformation on the training set and
# just applying it on the test set.
scaler = Scaler()
X = scaler.fit_transform(X)
X_2d = scaler.fit_transform(X_2d)
##############################################################################
# Train classifier
#
# For an initial search, a logarithmic grid with basis
# 10 is often helpful. Using a basis of 2, a finer
# tuning can be achieved but at a much higher cost.
C_range = 10.0 ** np.arange(-2, 9)
gamma_range = 10.0 ** np.arange(-5, 4)
param_grid = dict(gamma=gamma_range, C=C_range)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=StratifiedKFold(y=Y, k=3))
grid.fit(X, Y)
示例8: list
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
from errorcurves import ErrorCurves
import numpy as np
from sklearn import mixture
import pandas
df = pandas.read_csv('TrainingDataset.csv')
df_test = pandas.read_csv('TestDataset.csv')
ids = df_test.pop('id')
outcomes = list()
train_sets = list()
quants = [i for i in df.columns if 'Q' in i]
df_quants = df[quants]
scaler = Scaler()
scaled = scaler.fit_transform(df_quants.fillna(0))
dpgmm = mixture.DPGMM(n_components = 75)
dpgmm.fit(scaled)
clusters = dpgmm.predict(scaled)
df['clusters'] = clusters
# Parse dates
jan1 = datetime(2000,1,1)
# Drop all rows where response variable == NaN
for i in range(1,13):
df_i = df[df['Outcome_M'+str(i)]>0]
outcomes.append(df_i.pop('Outcome_M'+str(i)))
[df_i.pop(i) for i in df_i.columns if 'Out' in i]
#drop nas first
示例9: load_kernels
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def load_kernels(
dataset, tr_norms=['std', 'sqrt', 'L2'], te_norms=['std', 'sqrt', 'L2'],
analytical_fim=False, pi_derivatives=False, sqrt_nr_descs=False,
only_train=False, verbose=0, do_plot=False, outfile=None):
tr_outfile = outfile % "train" if outfile is not None else outfile
# Load sufficient statistics.
samples, _ = dataset.get_data('train')
tr_data, tr_counts, tr_labels = load_video_data(
dataset, samples, outfile=tr_outfile, analytical_fim=analytical_fim,
pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)
if verbose > 0:
print "Train data: %dx%d" % tr_data.shape
if do_plot:
plot_fisher_vector(tr_data[0], 'before')
scalers = []
for norm in tr_norms:
if norm == 'std':
scaler = Scaler()
tr_data = scaler.fit_transform(tr_data)
scalers.append(scaler)
elif norm == 'sqrt':
tr_data = power_normalize(tr_data, 0.5)
elif norm == 'sqrt_cnt':
tr_data = approximate_signed_sqrt(
tr_data, tr_counts, pi_derivatives=pi_derivatives)
elif norm == 'L2':
tr_data = L2_normalize(tr_data)
if do_plot:
plot_fisher_vector(tr_data[0], 'after_%s' % norm)
tr_kernel = np.dot(tr_data, tr_data.T)
if only_train:
return tr_kernel, tr_labels, scalers, tr_data
te_outfile = outfile % "test" if outfile is not None else outfile
# Load sufficient statistics.
samples, _ = dataset.get_data('test')
te_data, te_counts, te_labels = load_video_data(
dataset, samples, outfile=te_outfile, analytical_fim=analytical_fim,
pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)
if verbose > 0:
print "Test data: %dx%d" % te_data.shape
ii = 0
for norm in te_norms:
if norm == 'std':
te_data = scalers[ii].transform(te_data)
ii += 1
elif norm == 'sqrt':
te_data = power_normalize(te_data, 0.5)
elif norm == 'sqrt_cnt':
te_data = approximate_signed_sqrt(
te_data, te_counts, pi_derivatives=pi_derivatives)
elif norm == 'L2':
te_data = L2_normalize(te_data)
te_kernel = np.dot(te_data, tr_data.T)
return tr_kernel, tr_labels, te_kernel, te_labels
示例10: KMPBase
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
#.........这里部分代码省略.........
else:
if self.init_components is None:
if self.verbose: print "Selecting components..."
self.components_ = select_components(X, y,
self.n_components,
random_state=random_state)
else:
self.components_ = self.init_components
n_components = self.components_.shape[0]
n_nonzero_coefs = self.n_nonzero_coefs
if 0 < n_nonzero_coefs and n_nonzero_coefs <= 1:
n_nonzero_coefs = int(n_nonzero_coefs * n_components)
n_nonzero_coefs = int(n_nonzero_coefs)
if n_nonzero_coefs > n_components:
raise AttributeError("n_nonzero_coefs cannot be bigger than "
"n_components.")
if self.verbose: print "Computing dictionary..."
start = time.time()
K = pairwise_kernels(X, self.components_, metric=self.metric,
filter_params=True, n_jobs=self.n_jobs,
**self._kernel_params())
if self.verbose: print "Done in", time.time() - start, "seconds"
if self.scale:
if self.verbose: print "Scaling dictionary"
start = time.time()
copy = True if self.metric == "precomputed" else False
self.scaler_ = Scaler(copy=copy)
K = self.scaler_.fit_transform(K)
if self.verbose: print "Done in", time.time() - start, "seconds"
# FIXME: this allocates a lot of intermediary memory
norms = np.sqrt(np.sum(K ** 2, axis=0))
return n_nonzero_coefs, K, y, norms
def _fit_multi(self, K, y, Y, n_nonzero_coefs, norms):
if self.verbose: print "Starting training..."
start = time.time()
coef = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(_run_iterator)(self._get_estimator(),
self._get_loss(),
K, Y[:, i], n_nonzero_coefs, norms,
self.n_refit, self.check_duplicates)
for i in xrange(Y.shape[1]))
self.coef_ = np.array(coef)
if self.verbose: print "Done in", time.time() - start, "seconds"
def _score(self, y_true, y_pred):
if self.score_func == "auc":
return auc(y_true, y_pred)
if hasattr(self, "lb_"):
y_pred = self.lb_.inverse_transform(y_pred, threshold=0.5)
if self.score_func is None:
return np.mean(y_true == y_pred)
else:
return self.score_func(y_true, y_pred)
else:
# FIXME: no need to ravel y_pred if y_true is 2d!
return -np.mean((y_true - y_pred.ravel()) ** 2)
示例11: normalize
# 需要导入模块: from sklearn.preprocessing import Scaler [as 别名]
# 或者: from sklearn.preprocessing.Scaler import fit_transform [as 别名]
def normalize(self, data, n=N_COMPONENTS):
X=np.array(data, dtype='float')
#X=np.array(X[:,np.std(X,0)!=0.0], dtype='float')
scaler=Scaler()
Xnorm=scaler.fit_transform(X)
return Xnorm