本文整理汇总了Python中sklearn.preprocessing.RobustScaler.transform方法的典型用法代码示例。如果您正苦于以下问题:Python RobustScaler.transform方法的具体用法?Python RobustScaler.transform怎么用?Python RobustScaler.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.RobustScaler
的用法示例。
在下文中一共展示了RobustScaler.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: best_rp_nba
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def best_rp_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
X_train_transformed = rp.fit_transform(X_train_scl, y_train)
X_test_transformed = rp.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/nba_rp_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例2: num_scaler
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def num_scaler(d_num,t_num):
scl = RobustScaler()
scl.fit(d_num)
d_num = scl.transform(d_num)
t_num = scl.transform(t_num)
return d_num, t_num
示例3: best_ica_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def best_ica_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
ica = FastICA(n_components=X_train_scl.shape[1])
X_train_transformed = ica.fit_transform(X_train_scl, y_train)
X_test_transformed = ica.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/wine_ica_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例4: processing
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def processing(df):
dummies_df = pd.get_dummies(df["City Group"])
def add_CG(name):
return "CG_" + name
dummies_df = dummies_df.rename(columns=add_CG)
# print dummies_df.head()
df = pd.concat([df, dummies_df.iloc[:, 0]], axis=1)
dummies_df = pd.get_dummies(df["Type"])
def add_Type(name):
return "Type_" + name
dummies_df = dummies_df.rename(columns=add_Type)
df = pd.concat([df, dummies_df.iloc[:, 0:3]], axis=1)
# try to put in age as a column
def add_Age(string):
age = datetime.datetime.now() - datetime.datetime.strptime(string, "%m/%d/%Y")
return age.days
df["Age"] = df["Open Date"].map(add_Age)
df = df.drop(["Id", "Open Date", "City", "City Group", "Type", "revenue"], axis=1)
# scaler = StandardScaler().fit(df)
scaler = RobustScaler().fit(df)
df = scaler.transform(df)
# print df.head()
return df
示例5: _robust_scaler
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def _robust_scaler(self, input_df):
"""Uses Scikit-learn's RobustScaler to scale the features using statistics that are robust to outliers
Parameters
----------
input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
Input DataFrame to scale
Returns
-------
scaled_df: pandas.DataFrame {n_samples, n_features + ['guess', 'group', 'class']}
Returns a DataFrame containing the scaled features
"""
training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)
if len(training_features.columns.values) == 0:
return input_df.copy()
# The scaler must be fit on only the training data
scaler = RobustScaler()
scaler.fit(training_features.values.astype(np.float64))
scaled_features = scaler.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64))
for col_num, column in enumerate(input_df.drop(['class', 'group', 'guess'], axis=1).columns.values):
input_df.loc[:, column] = scaled_features[:, col_num]
return input_df.copy()
示例6: ica_analysis
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## ICA
##
ica = FastICA(n_components=X_train_scl.shape[1])
X_ica = ica.fit_transform(X_train_scl)
##
## Plots
##
ph = plot_helper()
kurt = kurtosis(X_ica)
print(kurt)
title = 'Kurtosis (FastICA) for ' + data_set_name
name = data_set_name.lower() + '_ica_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
kurt,
np.arange(1, len(kurt)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
示例7: nn_wine_orig
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def nn_wine_orig(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
self.part4.nn_analysis(X_train_scl, X_test_scl, y_train, y_test, 'Wine', 'Neural Network Original')
示例8: lda_analysis
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## Plots
##
ph = plot_helper()
scores = []
train_scores = []
rng = range(1, X_train_scl.shape[1]+1)
for i in rng:
lda = LinearDiscriminantAnalysis(n_components=i)
cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
# cross validation
cv_scores = []
for (train, test) in cv:
lda.fit(X_train_scl[train], y_train[train])
score = lda.score(X_train_scl[test], y_train[test])
cv_scores.append(score)
mean_score = np.mean(cv_scores)
scores.append(mean_score)
# train score
lda = LinearDiscriminantAnalysis(n_components=i)
lda.fit(X_train_scl, y_train)
train_score = lda.score(X_train_scl, y_train)
train_scores.append(train_score)
print(i, mean_score)
##
## Score Plot
##
title = 'Score Summary Plot (LDA) for ' + data_set_name
name = data_set_name.lower() + '_lda_score'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(rng,
[scores, train_scores],
[None, None],
['cross validation score', 'training score'],
cm.viridis(np.linspace(0, 1, 2)),
['o', '*'],
title,
'n_components',
'Score',
filename)
示例9: test_robustscaler_vs_sklearn
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def test_robustscaler_vs_sklearn():
# Compare msmbuilder.preprocessing.RobustScaler
# with sklearn.preprocessing.RobustScaler
robustscalerr = RobustScalerR()
robustscalerr.fit(np.concatenate(trajs))
robustscaler = RobustScaler()
robustscaler.fit(trajs)
y_ref1 = robustscalerr.transform(trajs[0])
y1 = robustscaler.transform(trajs)[0]
np.testing.assert_array_almost_equal(y_ref1, y1)
示例10: best_lda_cluster_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def best_lda_cluster_wine(self):
dh = data_helper()
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## K-Means
##
km = KMeans(n_clusters=4, algorithm='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
##
## GMM
##
gmm = GaussianMixture(n_components=4, covariance_type='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例11: best_pca_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def best_pca_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
pca = PCA(n_components=3)
X_train_transformed = pca.fit_transform(X_train_scl, y_train)
X_test_transformed = pca.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_pca_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例12: best_lda_nba
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
def best_lda_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_transformed = lda.fit_transform(X_train_scl, y_train)
X_test_transformed = lda.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/nba_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例13: Learned
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
class Learned(Model):
def __init__(self, *args, scale=False, center=False, **kwargs):
"""
A machine learned model. Beyond :class:`revscoring.Model`, this
"Learned" models implement
:func:`~revscoring.scoring.models.Learned.fit` and
:func:`~revscoring.scoring.models.Learned.cross_validate`.
"""
super().__init__(*args, **kwargs)
self.trained = None
if scale or center:
self.scaler = RobustScaler(with_centering=center,
with_scaling=scale)
else:
self.scaler = None
self.params.update({
'scale': scale,
'center': center
})
def train(self, values_labels):
"""
Fits the model using labeled data by learning its shape.
:Parameters:
values_labels : [( `<feature_values>`, `<label>` )]
an iterable of labeled data Where <values_labels> is an ordered
collection of predictive values that correspond to the
:class:`revscoring.Feature` s provided to the constructor
"""
raise NotImplementedError()
def fit_scaler_and_transform(self, fv_vectors):
"""
Fits the internal scale to labeled data.
:Parameters:
fv_vectors : `iterable` (( `<feature_values>`, `<label>` ))
an iterable of labeled data Where <values_labels> is an ordered
collection of predictive values that correspond to the
`Feature` s provided to the constructor
:Returns:
A dictionary of model statistics.
"""
if self.scaler is not None:
return self.scaler.fit_transform(fv_vectors)
else:
return fv_vectors
def apply_scaling(self, fv_vector):
if self.scaler is not None:
if not hasattr(self.scaler, "center_") and \
not hasattr(self.scaler, "scale_"):
raise RuntimeError("Cannot scale a vector before " +
"training the scaler")
fv_vector = self.scaler.transform([fv_vector])[0]
return fv_vector
def _clean_copy(self):
raise NotImplementedError()
def cross_validate(self, values_labels, folds=10, processes=1):
"""
Trains and tests the model agaists folds of labeled data.
:Parameters:
values_labels : [( `<feature_values>`, `<label>` )]
an iterable of labeled data Where <values_labels> is an ordered
collection of predictive values that correspond to the
`Feature` s provided to the constructor
folds : `int`
When set to 1, cross-validation will run in the parent thread.
When set to 2 or greater, a :class:`multiprocessing.Pool` will
be created.
"""
folds_i = KFold(n_splits=folds, shuffle=True,
random_state=0)
if processes == 1:
mapper = map
else:
pool = Pool(processes=processes or cpu_count())
mapper = pool.map
results = mapper(self._cross_score,
((i, [values_labels[i] for i in train_i],
[values_labels[i] for i in test_i])
for i, (train_i, test_i) in enumerate(
folds_i.split(values_labels))))
agg_score_labels = []
for score_labels in results:
agg_score_labels.extend(score_labels)
self.info['statistics'].fit(agg_score_labels)
return self.info['statistics']
def _cross_score(self, i_train_test):
#.........这里部分代码省略.........
示例14: RobustScaler
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
devtest='./exp/ivectors_semeval_devtest_NGMM_2048_W_2_DIM_200/feats.txt'
dev='./exp/ivectors_semeval_dev_NGMM_2048_W_2_DIM_200/feats.txt'
train='./exp/ivectors_semeval_train_NGMM_2048_W_2_DIM_200/feats.txt'
trainy,trainx=imdb_bag_of_word_libs.loadFeatsText(train)
trainy=imdb_bag_of_word_libs.kaldiID_2_LB(trainy)
evaly,evalx=imdb_bag_of_word_libs.loadFeatsText(dev)
evaly=imdb_bag_of_word_libs.kaldiID_2_LB(evaly)
evaly2,evalx2=imdb_bag_of_word_libs.loadFeatsText(devtest)
evaly2=imdb_bag_of_word_libs.kaldiID_2_LB(evaly2)
robust_scaler = RobustScaler()
trainx=robust_scaler.fit_transform(trainx)
evalx=robust_scaler.transform(evalx)
clf= LinearDiscriminantAnalysis() #
clf.fit(trainx,trainy)
predictValue=clf.predict(evalx)
print semeval2016_libs.scoreSameOrder(predictValue,configure.SCORE_REF_DEV)
evalx2=robust_scaler.transform(evalx2)
predictValue=clf.predict(evalx2)
print semeval2016_libs.scoreSameOrder(predictValue,configure.SCORE_REF_DEVTEST)
示例15: RobustScaler
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import transform [as 别名]
print 'done in',time.time()-ts,len(x),len(y)
y=imdb_bag_of_word_libs.kaldiID_2_LB(y)
print y[0],x[0]
x=np.array(x)
y=np.array(y)
trainx,trainy=x,y
robust_scaler = RobustScaler()
trainx=robust_scaler.fit_transform(trainx)
evalx=robust_scaler.transform(testx)
clf= LinearDiscriminantAnalysis()
clf.fit(trainx,trainy)
predictValue=clf.predict(evalx)
sdict=dict()
ptrue=list()
for id,score in zip(testy,predictValue):
sdict[id]=score
#print id,score
truevalue=int(id.split('_')[2])
if truevalue>=5:
ptrue.append('1')
else:
ptrue.append('0')