本文整理汇总了Python中sklearn.preprocessing.data.StandardScaler.transform方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.transform方法的具体用法?Python StandardScaler.transform怎么用?Python StandardScaler.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.data.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.transform方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_scaler_without_centering
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
def test_scaler_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
assert_raises(ValueError, StandardScaler().fit, X_csr)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
示例2: test_scalar
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
def test_scalar():
from sklearn.preprocessing.data import MinMaxScaler, StandardScaler
scalar = StandardScaler()
training = pd.read_csv(TRAIN_FEATURES_CSV, nrows=200000)
test = pd.read_csv(TEST_FEATURES_CSV)
# normalize the values
for column in TOTAL_TRAINING_FEATURE_COLUMNS:
training[column] = scalar.fit_transform(training[column])
test[column] = scalar.transform(test[column])
示例3: test_center_kernel
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
def test_center_kernel():
"""Test that KernelCenterer is equivalent to StandardScaler
in feature space"""
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
scaler = StandardScaler(with_std=False)
scaler.fit(X_fit)
X_fit_centered = scaler.transform(X_fit)
K_fit = np.dot(X_fit, X_fit.T)
# center fit time matrix
centerer = KernelCenterer()
K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
K_fit_centered2 = centerer.fit_transform(K_fit)
assert_array_almost_equal(K_fit_centered, K_fit_centered2)
# center predict time matrix
X_pred = rng.random_sample((2, 4))
K_pred = np.dot(X_pred, X_fit.T)
X_pred_centered = scaler.transform(X_pred)
K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
K_pred_centered2 = centerer.transform(K_pred)
assert_array_almost_equal(K_pred_centered, K_pred_centered2)
示例4: test_scale_sparse_with_mean_raise_exception
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
def test_scale_sparse_with_mean_raise_exception():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X_csr = sparse.csr_matrix(X)
# check scaling and fit with direct calls on sparse data
assert_raises(ValueError, scale, X_csr, with_mean=True)
assert_raises(ValueError, StandardScaler(with_mean=True).fit, X_csr)
# check transform and inverse_transform after a fit on a dense array
scaler = StandardScaler(with_mean=True).fit(X)
assert_raises(ValueError, scaler.transform, X_csr)
X_transformed_csr = sparse.csr_matrix(scaler.transform(X))
assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
示例5: test_scaler_int
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
def test_scaler_int():
# test that scaler converts integer input to floating
# for both sparse and dense matrices
rng = np.random.RandomState(42)
X = rng.randint(20, size=(4, 5))
X[:, 0] = 0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
with warnings.catch_warnings(record=True):
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
with warnings.catch_warnings(record=True):
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
with warnings.catch_warnings(record=True):
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
with warnings.catch_warnings(record=True):
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0),
[0., 1.109, 1.856, 21., 1.559], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(
X_csr_scaled.astype(np.float))
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
示例6: SkRanker
# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import transform [as 别名]
class SkRanker(Ranker, SkLearner):
'''
Basic ranker wrapping scikit-learn functions
'''
def train(self, dataset_filename,
scale=True,
feature_selector=None,
feature_selection_params={},
feature_selection_threshold=.25,
learning_params={},
optimize=True,
optimization_params={},
scorers=['f1_score'],
attribute_set=None,
class_name=None,
metaresults_prefix="./0-",
**kwargs):
plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf")
data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name, **kwargs)
learner = self.learner
#the class must remember the attribute_set and the class_name in order to reproduce the vectors
self.attribute_set = attribute_set
self.class_name = class_name
#scale data to the mean
if scale:
log.info("Scaling datasets...")
log.debug("Data shape before scaling: {}".format(data.shape))
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
log.debug("Data shape after scaling: {}".format(data.shape))
log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_))
#avoid any NaNs and Infs that may have occurred due to the scaling
data = np.nan_to_num(data)
#feature selection
if isinstance(feature_selection_params, basestring):
feature_selection_params = eval(feature_selection_params)
self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename)
#initialize learning method and scoring functions and optimize
self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers)
log.info("Data shape before fitting: {}".format(data.shape))
self.learner.fit(data, labels)
self.fit = True
return metadata
def get_model_description(self):
params = {}
if self.scaler:
params = self.scaler.get_params(deep=True)
try: #these are for SVC
if self.learner.kernel == "rbf":
params["gamma"] = self.learner.gamma
params["C"] = self.learner.C
for i, n_support in enumerate(self.learner.n_support_):
params["n_{}".format(i)] = n_support
log.debug(len(self.learner.dual_coef_))
return params
elif self.learner.kernel == "linear":
coefficients = self.learner.coef_
att_coefficients = {}
for attname, coeff in zip(self.attribute_set.get_names_pairwise(), coefficients[0]):
att_coefficients[attname] = coeff
return att_coefficients
except AttributeError:
pass
try: #adaboost etc
params = self.learner.get_params()
numeric_params = OrderedDict()
for key, value in params.iteritems():
try:
value = float(value)
except ValueError:
continue
numeric_params[key] = value
return numeric_params
except:
pass
return {}
def get_ranked_sentence(self, parallelsentence, critical_attribute="rank_predicted",
new_rank_name="rank_hard",
del_orig_class_att=False,
bidirectional_pairs=False,
ties=True,
reconstruct='hard'):
"""
"""
if type(self.learner) == str:
if self.classifier:
#.........这里部分代码省略.........