本文整理汇总了Python中sklearn.utils.sparsefuncs.mean_variance_axis方法的典型用法代码示例。如果您正苦于以下问题:Python sparsefuncs.mean_variance_axis方法的具体用法?Python sparsefuncs.mean_variance_axis怎么用?Python sparsefuncs.mean_variance_axis使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils.sparsefuncs
的用法示例。
在下文中一共展示了sparsefuncs.mean_variance_axis方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_scale_function_without_centering
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def test_scale_function_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_scaled = scale(X, with_mean=False)
assert not np.any(np.isnan(X_scaled))
X_csr_scaled = scale(X_csr, with_mean=False)
assert not np.any(np.isnan(X_csr_scaled.data))
# test csc has same outcome
X_csc_scaled = scale(X_csr.tocsc(), with_mean=False)
assert_array_almost_equal(X_scaled, X_csc_scaled.toarray())
# raises value error on axis != 0
assert_raises(ValueError, scale, X_csr, with_mean=False, axis=1)
assert_array_almost_equal(X_scaled.mean(axis=0),
[0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert X_scaled is not X
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis(X_csr_scaled, 0)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# null scale
X_csr_scaled = scale(X_csr, with_mean=False, with_std=False, copy=True)
assert_array_almost_equal(X_csr.toarray(), X_csr_scaled.toarray())
示例2: test_mean_var_sparse
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def test_mean_var_sparse():
from sklearn.utils.sparsefuncs import mean_variance_axis
csr64 = sp.random(10000, 1000, format="csr", dtype=np.float64)
csc64 = csr64.tocsc()
# Test that we're equivalent for 64 bit
for mtx, ax in product((csr64, csc64), (0, 1)):
scm, scv = sc.pp._utils._get_mean_var(mtx, axis=ax)
skm, skv = mean_variance_axis(mtx, ax)
skv *= (mtx.shape[ax] / (mtx.shape[ax] - 1))
assert np.allclose(scm, skm)
assert np.allclose(scv, skv)
csr32 = csr64.astype(np.float32)
csc32 = csc64.astype(np.float32)
# Test whether ours is more accurate for 32 bit
for mtx32, mtx64 in [(csc32, csc64), (csr32, csr64)]:
scm32, scv32 = sc.pp._utils._get_mean_var(mtx32)
scm64, scv64 = sc.pp._utils._get_mean_var(mtx64)
skm32, skv32 = mean_variance_axis(mtx32, 0)
skm64, skv64 = mean_variance_axis(mtx64, 0)
skv32 *= (mtx.shape[0] / (mtx.shape[0] - 1))
skv64 *= (mtx.shape[0] / (mtx.shape[0] - 1))
m_resid_sc = np.mean(np.abs(scm64 - scm32))
m_resid_sk = np.mean(np.abs(skm64 - skm32))
v_resid_sc = np.mean(np.abs(scv64 - scv32))
v_resid_sk = np.mean(np.abs(skv64 - skv32))
assert m_resid_sc < m_resid_sk
assert v_resid_sc < v_resid_sk
示例3: _tolerance
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def _tolerance(X, tol):
"""Return a tolerance which is independent of the dataset"""
if sp.issparse(X):
variances = mean_variance_axis(X, axis=0)[1]
else:
variances = np.var(X, axis=0)
return np.mean(variances) * tol
示例4: _tolerance
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def _tolerance(X, rtol):
"""Compute absolute tolerance from the relative tolerance"""
if rtol == 0.0:
return rtol
if sp.issparse(X):
variances = mean_variance_axis(X, axis=0)[1]
mean_var = np.mean(variances)
else:
mean_var = _daal_mean_var(X)
return mean_var * rtol
示例5: compute_scoring_func
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def compute_scoring_func(self, func):
if func == 'variance':
features = self.instances.features.get_values()
annotations = self.instances.annotations.get_labels()
if isinstance(features, spmatrix):
variance = mean_variance_axis(features, axis=0)[1]
else:
variance = features.var(axis=0)
return variance, None
features = self.annotated_instances.features.get_values()
annotations = self.annotated_instances.annotations.get_supervision(
self.multiclass)
if func == 'f_classif':
return f_classif(features, annotations)
elif func == 'mutual_info_classif':
if isinstance(features, spmatrix):
discrete_indexes = True
else:
features_types = self.instances.features.info.types
discrete_indexes = [i for i, t in enumerate(features_types)
if t == FeatureType.binary]
if not discrete_indexes:
discrete_indexes = False
return (mutual_info_classif(features, annotations,
discrete_features=discrete_indexes),
None)
elif func == 'chi2':
return chi2(features, annotations)
else:
assert(False)
示例6: _display_dataset
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def _display_dataset(self, dataset):
eps = 0.00001
linewidth = dataset.linewidth
delta = self.max_value - self.min_value
density_delta = 1.2 * delta
if delta > 0:
x = np.arange(self.min_value - 0.1*delta,
self.max_value + 0.1*delta,
density_delta / self.num_points)
else:
x = np.array([self.min_value - 2*eps, self.max_value + 2*eps])
if isinstance(dataset.values, spmatrix):
variance = mean_variance_axis(dataset.values, axis=0)[1]
else:
variance = np.var(dataset.values)
if variance < eps:
linewidth += 2
mean = np.mean(dataset.values)
x = np.sort(np.append(x, [mean, mean - eps, mean + eps]))
density = [1 if v == mean else 0 for v in x]
else:
self.kde.fit(dataset.values)
x_density = [[y] for y in x]
# kde.score_samples returns the 'log' of the density
log_density = self.kde.score_samples(x_density).tolist()
density = list(map(math.exp, log_density))
self.ax.plot(x, density, label=dataset.label, color=dataset.color,
linewidth=linewidth, linestyle=dataset.linestyle)
示例7: test_scale_function_without_centering
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def test_scale_function_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_scaled = scale(X, with_mean=False)
assert_false(np.any(np.isnan(X_scaled)))
X_csr_scaled = scale(X_csr, with_mean=False)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
# test csc has same outcome
X_csc_scaled = scale(X_csr.tocsc(), with_mean=False)
assert_array_almost_equal(X_scaled, X_csc_scaled.toarray())
# raises value error on axis != 0
assert_raises(ValueError, scale, X_csr, with_mean=False, axis=1)
assert_array_almost_equal(X_scaled.mean(axis=0),
[0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is not X)
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis(X_csr_scaled, 0)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# null scale
X_csr_scaled = scale(X_csr, with_mean=False, with_std=False, copy=True)
assert_array_almost_equal(X_csr.toarray(), X_csr_scaled.toarray())
示例8: fit
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def fit(self, Z):
"""Learn empirical variances from X.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Sample vectors from which to compute variances.
y : any
Ignored. This parameter exists only for compatibility with
sklearn.pipeline.Pipeline.
Returns
-------
self
"""
X = Z[:, 'X'] if isinstance(Z, DictRDD) else Z
check_rdd(X, (np.ndarray, sp.spmatrix))
def mapper(X):
"""Calculate statistics for every numpy or scipy blocks."""
X = check_array(X, ('csr', 'csc'), dtype=np.float64)
if hasattr(X, "toarray"): # sparse matrix
mean, var = mean_variance_axis(X, axis=0)
else:
mean, var = np.mean(X, axis=0), np.var(X, axis=0)
return X.shape[0], mean, var
def reducer(a, b):
"""Calculate the combined statistics."""
n_a, mean_a, var_a = a
n_b, mean_b, var_b = b
n_ab = n_a + n_b
mean_ab = ((mean_a * n_a) + (mean_b * n_b)) / n_ab
var_ab = (((n_a * var_a) + (n_b * var_b)) / n_ab) + \
((n_a * n_b) * ((mean_b - mean_a) / n_ab) ** 2)
return (n_ab, mean_ab, var_ab)
_, _, self.variances_ = X.map(mapper).treeReduce(reducer)
if np.all(self.variances_ <= self.threshold):
msg = "No feature in X meets the variance threshold {0:.5f}"
if X.shape[0] == 1:
msg += " (X contains only one sample)"
raise ValueError(msg.format(self.threshold))
return self
示例9: fit
# 需要导入模块: from sklearn.utils import sparsefuncs [as 别名]
# 或者: from sklearn.utils.sparsefuncs import mean_variance_axis [as 别名]
def fit(self, Z):
"""Compute the mean and std to be used for later scaling.
Parameters
----------
Z : DictRDD containing (X, y) pairs
X - Training vector.
{array-like, sparse matrix}, shape [n_samples, n_features]
The data used to compute the mean and standard deviation
used for later scaling along the features axis.
y - Target labels
Passthrough for ``Pipeline`` compatibility.
"""
# Reset internal state before fitting
self._reset()
X = Z[:, 'X'] if isinstance(Z, DictRDD) else Z
check_rdd(X, (np.ndarray, sp.spmatrix))
def mapper(X):
"""Calculate statistics for every numpy or scipy blocks."""
X = check_array(X, ('csr', 'csc'), dtype=np.float64)
if hasattr(X, "toarray"): # sparse matrix
mean, var = mean_variance_axis(X, axis=0)
else:
mean, var = np.mean(X, axis=0), np.var(X, axis=0)
return X.shape[0], mean, var
def reducer(a, b):
"""Calculate the combined statistics."""
n_a, mean_a, var_a = a
n_b, mean_b, var_b = b
n_ab = n_a + n_b
mean_ab = ((mean_a * n_a) + (mean_b * n_b)) / n_ab
var_ab = (((n_a * var_a) + (n_b * var_b)) / n_ab) + \
((n_a * n_b) * ((mean_b - mean_a) / n_ab) ** 2)
return (n_ab, mean_ab, var_ab)
if check_rdd_dtype(X, (sp.spmatrix)):
if self.with_mean:
raise ValueError(
"Cannot center sparse matrices: pass `with_mean=False` "
"instead. See docstring for motivation and alternatives.")
self.n_samples_seen_, self.mean_, self.var_ = X.map(mapper).treeReduce(reducer)
if self.with_std:
self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))
else:
self.scale_ = None
return self