本文整理汇总了Python中sklearn.preprocessing.RobustScaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python RobustScaler.fit_transform方法的具体用法?Python RobustScaler.fit_transform怎么用?Python RobustScaler.fit_transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.RobustScaler
的用法示例。
在下文中一共展示了RobustScaler.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: best_rp_nba
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def best_rp_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
X_train_transformed = rp.fit_transform(X_train_scl, y_train)
X_test_transformed = rp.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/nba_rp_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例2: ica_analysis
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## ICA
##
ica = FastICA(n_components=X_train_scl.shape[1])
X_ica = ica.fit_transform(X_train_scl)
##
## Plots
##
ph = plot_helper()
kurt = kurtosis(X_ica)
print(kurt)
title = 'Kurtosis (FastICA) for ' + data_set_name
name = data_set_name.lower() + '_ica_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
kurt,
np.arange(1, len(kurt)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
示例3: rp_analysis
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
ks = []
for i in range(1000):
##
## Random Projection
##
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
rp.fit(X_train_scl)
X_train_rp = rp.transform(X_train_scl)
ks.append(kurtosis(X_train_rp))
mean_k = np.mean(ks, 0)
##
## Plots
##
ph = plot_helper()
title = 'Kurtosis (Randomized Projection) for ' + data_set_name
name = data_set_name.lower() + '_rp_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
mean_k,
np.arange(1, len(mean_k)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
示例4: scale_feature_matrix
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def scale_feature_matrix(feature_M, linear=False, outliers=False):
from sklearn.preprocessing import StandardScaler, RobustScaler
import numpy as np
binary_fields = [col for col in feature_M.columns if len(set(feature_M[col])) == 2]
if outliers:
#Scaling 0 median & unit variance
scaler_obj = RobustScaler()
print 'centering around median'
else:
#Scale 0 mean & unit variance
scaler_obj = StandardScaler()
print 'centering around mean'
print 'found these binaries'
print '-' * 10
print '\n'.join(binary_fields)
X_scaled = scaler_obj.fit_transform(feature_M.drop(binary_fields, axis=1))
X_scaled_w_cats = np.c_[X_scaled, feature_M[binary_fields].as_matrix()]
return X_scaled_w_cats, scaler_obj
示例5: best_ica_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def best_ica_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
ica = FastICA(n_components=X_train_scl.shape[1])
X_train_transformed = ica.fit_transform(X_train_scl, y_train)
X_test_transformed = ica.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/wine_ica_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例6: nn_wine_orig
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def nn_wine_orig(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
self.part4.nn_analysis(X_train_scl, X_test_scl, y_train, y_test, 'Wine', 'Neural Network Original')
示例7: standardize_columns
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def standardize_columns(data):
"""
We decided to standardize the weather factor due to outliers.
"""
columns_to_standardize = ['temp', 'atemp', 'humidity', 'windspeed']
min_max_scaler = RobustScaler()
for column in columns_to_standardize:
data[column] = min_max_scaler.fit_transform(data[column])
return data
示例8: lda_analysis
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## Plots
##
ph = plot_helper()
scores = []
train_scores = []
rng = range(1, X_train_scl.shape[1]+1)
for i in rng:
lda = LinearDiscriminantAnalysis(n_components=i)
cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
# cross validation
cv_scores = []
for (train, test) in cv:
lda.fit(X_train_scl[train], y_train[train])
score = lda.score(X_train_scl[test], y_train[test])
cv_scores.append(score)
mean_score = np.mean(cv_scores)
scores.append(mean_score)
# train score
lda = LinearDiscriminantAnalysis(n_components=i)
lda.fit(X_train_scl, y_train)
train_score = lda.score(X_train_scl, y_train)
train_scores.append(train_score)
print(i, mean_score)
##
## Score Plot
##
title = 'Score Summary Plot (LDA) for ' + data_set_name
name = data_set_name.lower() + '_lda_score'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(rng,
[scores, train_scores],
[None, None],
['cross validation score', 'training score'],
cm.viridis(np.linspace(0, 1, 2)),
['o', '*'],
title,
'n_components',
'Score',
filename)
示例9: demensionReduction
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def demensionReduction(numFeatures,cateFeatures):
"""
:param numFeatures:
:param labels:
:return:
"""
scaler = RobustScaler()
scaledFeatures = scaler.fit_transform(numFeatures)
pca = PCA(n_components=5)
reducedFeatures = pca.fit_transform(scaledFeatures)
allFeatures = np.concatenate((reducedFeatures,cateFeatures),axis=1)
return allFeatures
示例10: best_lda_cluster_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def best_lda_cluster_wine(self):
dh = data_helper()
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## K-Means
##
km = KMeans(n_clusters=4, algorithm='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
##
## GMM
##
gmm = GaussianMixture(n_components=4, covariance_type='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例11: transform_dataframe
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def transform_dataframe(dataframe):
"""
Function to read dataframe and standardize the dataframe with
a mean 0 and unit variance on every column
Parameters:
dataframe : Input pandas dataframe
Input types: pd.Dataframe
Output types: pd.Dataframe
"""
cols = [col for col in dataframe.columns]
robust_scaler = RobustScaler()
df = robust_scaler.fit_transform(dataframe[cols])
dataframe.columns = df
return dataframe
示例12: scale
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def scale(self,columns,categorical_cols,apply_list,target_column):
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
if apply_list:
numerical_cols = columns
else:
numerical_cols = []
for col in self.dataset.columns.values:
if col not in categorical_cols:
numerical_cols.append(col)
else:
pass
# We don't want to scale the target variable, as it is already binary.
# The target column uses the same value as target_value from Split Data section
# in the settings popup.
numerical_cols.remove(target_column)
# Scale, fit and transform all the numerical columns
scaled_data = scaler.fit_transform(self.dataset[numerical_cols])
self.dataset[numerical_cols] = scaled_data
return self.dataset
示例13: best_lda_nba
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def best_lda_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_transformed = lda.fit_transform(X_train_scl, y_train)
X_test_transformed = lda.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/nba_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例14: best_pca_wine
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def best_pca_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
pca = PCA(n_components=3)
X_train_transformed = pca.fit_transform(X_train_scl, y_train)
X_test_transformed = pca.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_pca_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例15: detect_bad_channels
# 需要导入模块: from sklearn.preprocessing import RobustScaler [as 别名]
# 或者: from sklearn.preprocessing.RobustScaler import fit_transform [as 别名]
def detect_bad_channels(inst, pick_types=None, threshold=.2):
from sklearn.preprocessing import RobustScaler
from sklearn.covariance import EmpiricalCovariance
from jr.stats import median_abs_deviation
if pick_types is None:
pick_types = dict(meg='mag')
inst = inst.pick_types(copy=True, **pick_types)
cov = EmpiricalCovariance()
cov.fit(inst._data.T)
cov = cov.covariance_
# center
scaler = RobustScaler()
cov = scaler.fit_transform(cov).T
cov /= median_abs_deviation(cov)
cov -= np.median(cov)
# compute robust summary metrics
mu = np.median(cov, axis=0)
sigma = median_abs_deviation(cov, axis=0)
mu /= median_abs_deviation(mu)
sigma /= median_abs_deviation(sigma)
distance = np.sqrt(mu ** 2 + sigma ** 2)
bad = np.where(distance < threshold)[0]
bad = [inst.ch_names[ch] for ch in bad]
return bad