本文整理汇总了Python中sklearn.preprocessing.RobustScaler类的典型用法代码示例。如果您正苦于以下问题:Python RobustScaler类的具体用法?Python RobustScaler怎么用?Python RobustScaler使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RobustScaler类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scale_feature_matrix
def scale_feature_matrix(feature_M, linear=False, outliers=False):
from sklearn.preprocessing import StandardScaler, RobustScaler
import numpy as np
binary_fields = [col for col in feature_M.columns if len(set(feature_M[col])) == 2]
if outliers:
#Scaling 0 median & unit variance
scaler_obj = RobustScaler()
print 'centering around median'
else:
#Scale 0 mean & unit variance
scaler_obj = StandardScaler()
print 'centering around mean'
print 'found these binaries'
print '-' * 10
print '\n'.join(binary_fields)
X_scaled = scaler_obj.fit_transform(feature_M.drop(binary_fields, axis=1))
X_scaled_w_cats = np.c_[X_scaled, feature_M[binary_fields].as_matrix()]
return X_scaled_w_cats, scaler_obj
示例2: _robust_scaler
def _robust_scaler(self, input_df):
"""Uses Scikit-learn's RobustScaler to scale the features using statistics that are robust to outliers
Parameters
----------
input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
Input DataFrame to scale
Returns
-------
scaled_df: pandas.DataFrame {n_samples, n_features + ['guess', 'group', 'class']}
Returns a DataFrame containing the scaled features
"""
training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)
if len(training_features.columns.values) == 0:
return input_df.copy()
# The scaler must be fit on only the training data
scaler = RobustScaler()
scaler.fit(training_features.values.astype(np.float64))
scaled_features = scaler.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64))
for col_num, column in enumerate(input_df.drop(['class', 'group', 'guess'], axis=1).columns.values):
input_df.loc[:, column] = scaled_features[:, col_num]
return input_df.copy()
示例3: processing
def processing(df):
dummies_df = pd.get_dummies(df["City Group"])
def add_CG(name):
return "CG_" + name
dummies_df = dummies_df.rename(columns=add_CG)
# print dummies_df.head()
df = pd.concat([df, dummies_df.iloc[:, 0]], axis=1)
dummies_df = pd.get_dummies(df["Type"])
def add_Type(name):
return "Type_" + name
dummies_df = dummies_df.rename(columns=add_Type)
df = pd.concat([df, dummies_df.iloc[:, 0:3]], axis=1)
# try to put in age as a column
def add_Age(string):
age = datetime.datetime.now() - datetime.datetime.strptime(string, "%m/%d/%Y")
return age.days
df["Age"] = df["Open Date"].map(add_Age)
df = df.drop(["Id", "Open Date", "City", "City Group", "Type", "revenue"], axis=1)
# scaler = StandardScaler().fit(df)
scaler = RobustScaler().fit(df)
df = scaler.transform(df)
# print df.head()
return df
示例4: ica_analysis
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## ICA
##
ica = FastICA(n_components=X_train_scl.shape[1])
X_ica = ica.fit_transform(X_train_scl)
##
## Plots
##
ph = plot_helper()
kurt = kurtosis(X_ica)
print(kurt)
title = 'Kurtosis (FastICA) for ' + data_set_name
name = data_set_name.lower() + '_ica_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
kurt,
np.arange(1, len(kurt)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
示例5: best_ica_wine
def best_ica_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
ica = FastICA(n_components=X_train_scl.shape[1])
X_train_transformed = ica.fit_transform(X_train_scl, y_train)
X_test_transformed = ica.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/wine_ica_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例6: best_rp_nba
def best_rp_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
X_train_transformed = rp.fit_transform(X_train_scl, y_train)
X_test_transformed = rp.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/nba_rp_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例7: num_scaler
def num_scaler(d_num,t_num):
scl = RobustScaler()
scl.fit(d_num)
d_num = scl.transform(d_num)
t_num = scl.transform(t_num)
return d_num, t_num
示例8: rp_analysis
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
ks = []
for i in range(1000):
##
## Random Projection
##
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
rp.fit(X_train_scl)
X_train_rp = rp.transform(X_train_scl)
ks.append(kurtosis(X_train_rp))
mean_k = np.mean(ks, 0)
##
## Plots
##
ph = plot_helper()
title = 'Kurtosis (Randomized Projection) for ' + data_set_name
name = data_set_name.lower() + '_rp_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
mean_k,
np.arange(1, len(mean_k)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
示例9: nn_wine_orig
def nn_wine_orig(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
self.part4.nn_analysis(X_train_scl, X_test_scl, y_train, y_test, 'Wine', 'Neural Network Original')
示例10: standardize_columns
def standardize_columns(data):
"""
We decided to standardize the weather factor due to outliers.
"""
columns_to_standardize = ['temp', 'atemp', 'humidity', 'windspeed']
min_max_scaler = RobustScaler()
for column in columns_to_standardize:
data[column] = min_max_scaler.fit_transform(data[column])
return data
示例11: lda_analysis
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## Plots
##
ph = plot_helper()
scores = []
train_scores = []
rng = range(1, X_train_scl.shape[1]+1)
for i in rng:
lda = LinearDiscriminantAnalysis(n_components=i)
cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
# cross validation
cv_scores = []
for (train, test) in cv:
lda.fit(X_train_scl[train], y_train[train])
score = lda.score(X_train_scl[test], y_train[test])
cv_scores.append(score)
mean_score = np.mean(cv_scores)
scores.append(mean_score)
# train score
lda = LinearDiscriminantAnalysis(n_components=i)
lda.fit(X_train_scl, y_train)
train_score = lda.score(X_train_scl, y_train)
train_scores.append(train_score)
print(i, mean_score)
##
## Score Plot
##
title = 'Score Summary Plot (LDA) for ' + data_set_name
name = data_set_name.lower() + '_lda_score'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(rng,
[scores, train_scores],
[None, None],
['cross validation score', 'training score'],
cm.viridis(np.linspace(0, 1, 2)),
['o', '*'],
title,
'n_components',
'Score',
filename)
示例12: demensionReduction
def demensionReduction(numFeatures,cateFeatures):
"""
:param numFeatures:
:param labels:
:return:
"""
scaler = RobustScaler()
scaledFeatures = scaler.fit_transform(numFeatures)
pca = PCA(n_components=5)
reducedFeatures = pca.fit_transform(scaledFeatures)
allFeatures = np.concatenate((reducedFeatures,cateFeatures),axis=1)
return allFeatures
示例13: test_robustscaler_vs_sklearn
def test_robustscaler_vs_sklearn():
# Compare msmbuilder.preprocessing.RobustScaler
# with sklearn.preprocessing.RobustScaler
robustscalerr = RobustScalerR()
robustscalerr.fit(np.concatenate(trajs))
robustscaler = RobustScaler()
robustscaler.fit(trajs)
y_ref1 = robustscalerr.transform(trajs[0])
y1 = robustscaler.transform(trajs)[0]
np.testing.assert_array_almost_equal(y_ref1, y1)
示例14: best_lda_cluster_wine
def best_lda_cluster_wine(self):
dh = data_helper()
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## K-Means
##
km = KMeans(n_clusters=4, algorithm='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
##
## GMM
##
gmm = GaussianMixture(n_components=4, covariance_type='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例15: transform_dataframe
def transform_dataframe(dataframe):
"""
Function to read dataframe and standardize the dataframe with
a mean 0 and unit variance on every column
Parameters:
dataframe : Input pandas dataframe
Input types: pd.Dataframe
Output types: pd.Dataframe
"""
cols = [col for col in dataframe.columns]
robust_scaler = RobustScaler()
df = robust_scaler.fit_transform(dataframe[cols])
dataframe.columns = df
return dataframe