本文整理汇总了Python中sklearn.preprocessing.data.StandardScaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.fit_transform方法的具体用法?Python StandardScaler.fit_transform怎么用?Python StandardScaler.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.data.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.fit_transform方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_file
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def read_file():
file_content = pd.read_csv('train.csv')
exc_cols = [u'Id', u'Response']
cols = [c for c in file_content.columns if c not in exc_cols]
train_datas = file_content.ix[:, cols]
train_lables = file_content['Response'].values
test_file = pd.read_csv('test.csv')
test_ids = test_file['Id'].values
test_datas = test_file.ix[:, [c for c in test_file.columns if c not in [u'Id']]]
# 填充平均值
test_datas = test_datas.fillna(-1)
train_datas = train_datas.fillna(-1)
all_datas = pd.concat([train_datas, test_datas], axis=0)
# 对数据进行一下划分
categoricalVariables = ["Product_Info_1", "Product_Info_2", "Product_Info_3", "Product_Info_5", "Product_Info_6", "Product_Info_7", "Employment_Info_2", "Employment_Info_3", "Employment_Info_5", "InsuredInfo_1", "InsuredInfo_2", "InsuredInfo_3", "InsuredInfo_4", "InsuredInfo_5", "InsuredInfo_6", "InsuredInfo_7", "Insurance_History_1", "Insurance_History_2", "Insurance_History_3", "Insurance_History_4", "Insurance_History_7", "Insurance_History_8", "Insurance_History_9", "Family_Hist_1", "Medical_History_2", "Medical_History_3", "Medical_History_4", "Medical_History_5", "Medical_History_6", "Medical_History_7", "Medical_History_8", "Medical_History_9", "Medical_History_10", "Medical_History_11", "Medical_History_12", "Medical_History_13", "Medical_History_14", "Medical_History_16", "Medical_History_17", "Medical_History_18", "Medical_History_19", "Medical_History_20", "Medical_History_21", "Medical_History_22", "Medical_History_23", "Medical_History_25", "Medical_History_26", "Medical_History_27", "Medical_History_28", "Medical_History_29", "Medical_History_30", "Medical_History_31", "Medical_History_33", "Medical_History_34", "Medical_History_35", "Medical_History_36", "Medical_History_37", "Medical_History_38", "Medical_History_39", "Medical_History_40", "Medical_History_41"]
all_file_data = all_datas.ix[:, [c for c in all_datas.columns if c not in categoricalVariables]]
all_file_cate = all_datas.ix[:, [c for c in categoricalVariables]]
# 归一化 对数值数据
scalar_this = StandardScaler()
scalar_this.fit_transform(all_file_data)
# 重新组合数据
train_datas = pd.concat([all_file_data[:train_datas.shape[0]], all_file_cate[:train_datas.shape[0]]], axis=1)
test_datas = pd.concat([all_file_data[file_content.shape[0]:], all_file_cate[file_content.shape[0]:]], axis=1)
# 向量化
train_datas = DictVectorizer().fit_transform(train_datas.to_dict(outtype='records')).toarray()
test_datas = DictVectorizer().fit_transform(test_datas.to_dict(outtype='records')).toarray()
return (train_datas, train_lables, test_ids, test_datas)
示例2: test_scaler_without_centering
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scaler_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
assert_raises(ValueError, StandardScaler().fit, X_csr)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
示例3: test_scalar
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scalar():
from sklearn.preprocessing.data import MinMaxScaler, StandardScaler
scalar = StandardScaler()
training = pd.read_csv(TRAIN_FEATURES_CSV, nrows=200000)
test = pd.read_csv(TEST_FEATURES_CSV)
# normalize the values
for column in TOTAL_TRAINING_FEATURE_COLUMNS:
training[column] = scalar.fit_transform(training[column])
test[column] = scalar.transform(test[column])
示例4: test_scaler_int
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scaler_int():
# test that scaler converts integer input to floating
# for both sparse and dense matrices
rng = np.random.RandomState(42)
X = rng.randint(20, size=(4, 5))
X[:, 0] = 0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
with warnings.catch_warnings(record=True):
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
with warnings.catch_warnings(record=True):
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
with warnings.catch_warnings(record=True):
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
with warnings.catch_warnings(record=True):
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0),
[0., 1.109, 1.856, 21., 1.559], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(
X_csr_scaled.astype(np.float))
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
示例5: SkRanker
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
class SkRanker(Ranker, SkLearner):
'''
Basic ranker wrapping scikit-learn functions
'''
def train(self, dataset_filename,
scale=True,
feature_selector=None,
feature_selection_params={},
feature_selection_threshold=.25,
learning_params={},
optimize=True,
optimization_params={},
scorers=['f1_score'],
attribute_set=None,
class_name=None,
metaresults_prefix="./0-",
**kwargs):
plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf")
data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name, **kwargs)
learner = self.learner
#the class must remember the attribute_set and the class_name in order to reproduce the vectors
self.attribute_set = attribute_set
self.class_name = class_name
#scale data to the mean
if scale:
log.info("Scaling datasets...")
log.debug("Data shape before scaling: {}".format(data.shape))
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
log.debug("Data shape after scaling: {}".format(data.shape))
log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_))
#avoid any NaNs and Infs that may have occurred due to the scaling
data = np.nan_to_num(data)
#feature selection
if isinstance(feature_selection_params, basestring):
feature_selection_params = eval(feature_selection_params)
self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename)
#initialize learning method and scoring functions and optimize
self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers)
log.info("Data shape before fitting: {}".format(data.shape))
self.learner.fit(data, labels)
self.fit = True
return metadata
def get_model_description(self):
params = {}
if self.scaler:
params = self.scaler.get_params(deep=True)
try: #these are for SVC
if self.learner.kernel == "rbf":
params["gamma"] = self.learner.gamma
params["C"] = self.learner.C
for i, n_support in enumerate(self.learner.n_support_):
params["n_{}".format(i)] = n_support
log.debug(len(self.learner.dual_coef_))
return params
elif self.learner.kernel == "linear":
coefficients = self.learner.coef_
att_coefficients = {}
for attname, coeff in zip(self.attribute_set.get_names_pairwise(), coefficients[0]):
att_coefficients[attname] = coeff
return att_coefficients
except AttributeError:
pass
try: #adaboost etc
params = self.learner.get_params()
numeric_params = OrderedDict()
for key, value in params.iteritems():
try:
value = float(value)
except ValueError:
continue
numeric_params[key] = value
return numeric_params
except:
pass
return {}
def get_ranked_sentence(self, parallelsentence, critical_attribute="rank_predicted",
new_rank_name="rank_hard",
del_orig_class_att=False,
bidirectional_pairs=False,
ties=True,
reconstruct='hard'):
"""
"""
if type(self.learner) == str:
if self.classifier:
#.........这里部分代码省略.........