本文整理汇总了Python中sklearn.utils.gen_batches方法的典型用法代码示例。如果您正苦于以下问题:Python utils.gen_batches方法的具体用法?Python utils.gen_batches怎么用?Python utils.gen_batches使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils
的用法示例。
在下文中一共展示了utils.gen_batches方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_standard_scaler_trasform_with_partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def test_standard_scaler_trasform_with_partial_fit():
# Check some postconditions after applying partial_fit and transform
X = X_2d[:100, :]
scaler_incr = StandardScaler()
for i, batch in enumerate(gen_batches(X.shape[0], 1)):
X_sofar = X[:(i + 1), :]
chunks_copy = X_sofar.copy()
scaled_batch = StandardScaler().fit_transform(X_sofar)
scaler_incr = scaler_incr.partial_fit(X[batch])
scaled_incr = scaler_incr.transform(X_sofar)
assert_array_almost_equal(scaled_batch, scaled_incr)
assert_array_almost_equal(X_sofar, chunks_copy) # No change
right_input = scaler_incr.inverse_transform(scaled_incr)
assert_array_almost_equal(X_sofar, right_input)
zero = np.zeros(X.shape[1])
epsilon = np.finfo(float).eps
assert_array_less(zero, scaler_incr.var_ + epsilon) # as less or equal
assert_array_less(zero, scaler_incr.scale_ + epsilon)
# (i+1) because the Scaler has been already fitted
assert_equal((i + 1), scaler_incr.n_samples_seen_)
示例2: partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def partial_fit(self, X, sample_indices=None):
"""
Update the factorization using rows from X
Parameters
----------
X: ndarray, shape (n_samples, n_features)
Input data
sample_indices:
Indices for each row of X. If None, consider that row i index is i
(useful when providing the whole data to the function)
Returns
-------
self
"""
X = check_array(X, dtype=[np.float32, np.float64], order='C')
n_samples, n_features = X.shape
batches = gen_batches(n_samples, self.batch_size)
for batch in batches:
this_X = X[batch]
these_sample_indices = get_sub_slice(sample_indices, batch)
self._single_batch_fit(this_X, these_sample_indices)
return self
示例3: test_standard_scaler_trasform_with_partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def test_standard_scaler_trasform_with_partial_fit():
# Check some postconditions after applying partial_fit and transform
X = X_2d[:100, :]
scaler_incr = StandardScaler()
for i, batch in enumerate(gen_batches(X.shape[0], 1)):
X_sofar = X[:(i + 1), :]
chunks_copy = X_sofar.copy()
scaled_batch = StandardScaler().fit_transform(X_sofar)
scaler_incr = scaler_incr.partial_fit(X[batch])
scaled_incr = scaler_incr.transform(X_sofar)
assert_array_almost_equal(scaled_batch, scaled_incr)
assert_array_almost_equal(X_sofar, chunks_copy) # No change
right_input = scaler_incr.inverse_transform(scaled_incr)
assert_array_almost_equal(X_sofar, right_input)
zero = np.zeros(X.shape[1])
epsilon = np.nextafter(0, 1)
assert_array_less(zero, scaler_incr.var_ + epsilon) # as less or equal
assert_array_less(zero, scaler_incr.scale_ + epsilon)
# (i+1) because the Scaler has been already fitted
assert_equal((i + 1), scaler_incr.n_samples_seen_)
示例4: random_feature_subsets
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def random_feature_subsets(array, batch_size, random_state=1234):
""" Generate K subsets of the features in X """
random_state = check_random_state(random_state)
features = range(array.shape[1])
random_state.shuffle(features)
for batch in gen_batches(len(features), batch_size):
yield features[batch]
示例5: test_minmax_scaler_partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def test_minmax_scaler_partial_fit():
# Test if partial_fit run over many batches of size 1 and 50
# gives the same results as fit
X = X_2d
n = X.shape[0]
for chunk_size in [1, 2, 50, n, n + 42]:
# Test mean at the end of the process
scaler_batch = MinMaxScaler().fit(X)
scaler_incr = MinMaxScaler()
for batch in gen_batches(n_samples, chunk_size):
scaler_incr = scaler_incr.partial_fit(X[batch])
assert_array_almost_equal(scaler_batch.data_min_,
scaler_incr.data_min_)
assert_array_almost_equal(scaler_batch.data_max_,
scaler_incr.data_max_)
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
assert_array_almost_equal(scaler_batch.data_range_,
scaler_incr.data_range_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_)
# Test std after 1 step
batch0 = slice(0, chunk_size)
scaler_batch = MinMaxScaler().fit(X[batch0])
scaler_incr = MinMaxScaler().partial_fit(X[batch0])
assert_array_almost_equal(scaler_batch.data_min_,
scaler_incr.data_min_)
assert_array_almost_equal(scaler_batch.data_max_,
scaler_incr.data_max_)
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
assert_array_almost_equal(scaler_batch.data_range_,
scaler_incr.data_range_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_)
# Test std until the end of partial fits, and
scaler_batch = MinMaxScaler().fit(X)
scaler_incr = MinMaxScaler() # Clean estimator
for i, batch in enumerate(gen_batches(n_samples, chunk_size)):
scaler_incr = scaler_incr.partial_fit(X[batch])
assert_correct_incr(i, batch_start=batch.start,
batch_stop=batch.stop, n=n,
chunk_size=chunk_size,
n_samples_seen=scaler_incr.n_samples_seen_)
示例6: test_standard_scaler_partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def test_standard_scaler_partial_fit():
# Test if partial_fit run over many batches of size 1 and 50
# gives the same results as fit
X = X_2d
n = X.shape[0]
for chunk_size in [1, 2, 50, n, n + 42]:
# Test mean at the end of the process
scaler_batch = StandardScaler(with_std=False).fit(X)
scaler_incr = StandardScaler(with_std=False)
for batch in gen_batches(n_samples, chunk_size):
scaler_incr = scaler_incr.partial_fit(X[batch])
assert_array_almost_equal(scaler_batch.mean_, scaler_incr.mean_)
assert_equal(scaler_batch.var_, scaler_incr.var_) # Nones
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
# Test std after 1 step
batch0 = slice(0, chunk_size)
scaler_incr = StandardScaler().partial_fit(X[batch0])
if chunk_size == 1:
assert_array_almost_equal(np.zeros(n_features, dtype=np.float64),
scaler_incr.var_)
assert_array_almost_equal(np.ones(n_features, dtype=np.float64),
scaler_incr.scale_)
else:
assert_array_almost_equal(np.var(X[batch0], axis=0),
scaler_incr.var_)
assert_array_almost_equal(np.std(X[batch0], axis=0),
scaler_incr.scale_) # no constants
# Test std until the end of partial fits, and
scaler_batch = StandardScaler().fit(X)
scaler_incr = StandardScaler() # Clean estimator
for i, batch in enumerate(gen_batches(n_samples, chunk_size)):
scaler_incr = scaler_incr.partial_fit(X[batch])
assert_correct_incr(i, batch_start=batch.start,
batch_stop=batch.stop, n=n,
chunk_size=chunk_size,
n_samples_seen=scaler_incr.n_samples_seen_)
assert_array_almost_equal(scaler_batch.var_, scaler_incr.var_)
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
示例7: test_maxabs_scaler_partial_fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def test_maxabs_scaler_partial_fit():
# Test if partial_fit run over many batches of size 1 and 50
# gives the same results as fit
X = X_2d[:100, :]
n = X.shape[0]
for chunk_size in [1, 2, 50, n, n + 42]:
# Test mean at the end of the process
scaler_batch = MaxAbsScaler().fit(X)
scaler_incr = MaxAbsScaler()
scaler_incr_csr = MaxAbsScaler()
scaler_incr_csc = MaxAbsScaler()
for batch in gen_batches(n, chunk_size):
scaler_incr = scaler_incr.partial_fit(X[batch])
X_csr = sparse.csr_matrix(X[batch])
scaler_incr_csr = scaler_incr_csr.partial_fit(X_csr)
X_csc = sparse.csc_matrix(X[batch])
scaler_incr_csc = scaler_incr_csc.partial_fit(X_csc)
assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_)
assert_array_almost_equal(scaler_batch.max_abs_,
scaler_incr_csr.max_abs_)
assert_array_almost_equal(scaler_batch.max_abs_,
scaler_incr_csc.max_abs_)
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
assert_equal(scaler_batch.n_samples_seen_,
scaler_incr_csr.n_samples_seen_)
assert_equal(scaler_batch.n_samples_seen_,
scaler_incr_csc.n_samples_seen_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csr.scale_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csc.scale_)
assert_array_almost_equal(scaler_batch.transform(X),
scaler_incr.transform(X))
# Test std after 1 step
batch0 = slice(0, chunk_size)
scaler_batch = MaxAbsScaler().fit(X[batch0])
scaler_incr = MaxAbsScaler().partial_fit(X[batch0])
assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_)
assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
assert_array_almost_equal(scaler_batch.transform(X),
scaler_incr.transform(X))
# Test std until the end of partial fits, and
scaler_batch = MaxAbsScaler().fit(X)
scaler_incr = MaxAbsScaler() # Clean estimator
for i, batch in enumerate(gen_batches(n, chunk_size)):
scaler_incr = scaler_incr.partial_fit(X[batch])
assert_correct_incr(i, batch_start=batch.start,
batch_stop=batch.stop, n=n,
chunk_size=chunk_size,
n_samples_seen=scaler_incr.n_samples_seen_)
示例8: fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def fit(self, X, y=None):
"""Learns a dictionary from sparse matrix X
Parameters
----------
X: csr-matrix (n_samples, n_features)
Datset to learn the dictionary from
"""
if not sp.issparse(X):
X = sp.csr_matrix(X)
X = check_array(X, accept_sparse='csr',
dtype=[np.float32, np.float64], copy=True)
dtype = X.dtype
n_samples, n_features = X.shape
self.random_state = check_random_state(self.random_state)
if self.detrend:
self.row_mean_, self.col_mean_ = compute_biases(X,
beta=self.beta,
inplace=False)
for i in range(X.shape[0]):
X.data[X.indptr[i]:X.indptr[i + 1]] -= self.row_mean_[i]
X.data -= self.col_mean_.take(X.indices, mode='clip')
self.components_ = self.random_state.randn(self.n_components,
n_features).astype(dtype)
S = np.sqrt(np.sum(self.components_ ** 2, axis=1))
self.components_ /= S[:, np.newaxis]
self.code_ = np.zeros((n_samples, self.n_components), dtype=dtype)
self._refit(X)
self.feature_freq_ = np.bincount(X.indices) / n_samples
self.feature_n_iter_ = np.zeros(n_features, dtype=int)
sparsity = X.nnz / n_samples / n_features
if self.batch_size is None:
batch_size = int(ceil(1. / sparsity))
else:
batch_size = self.batch_size
self.comp_norm_ = np.zeros(self.n_components, dtype=dtype)
self.C_ = np.zeros((self.n_components, self.n_components), dtype=dtype)
self.B_ = np.zeros((self.n_components, n_features), dtype=dtype)
self.n_iter_ = 0
if self.verbose:
log_lim = log(n_samples * self.n_epochs / batch_size, 10)
self.verbose_iter_ = (np.logspace(0, log_lim, self.verbose,
base=10) - 1) * batch_size
self.verbose_iter_ = self.verbose_iter_.tolist()
for i in range(self.n_epochs):
permutation = self.random_state.permutation(n_samples)
batches = gen_batches(n_samples, batch_size)
for batch in batches:
self._single_batch_fit(X, permutation[batch])
self._refit(X)
return self
示例9: transform
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def transform(self, X):
"""
Compute the codes associated to input matrix X, decomposing it onto
the dictionary
Parameters
----------
X: ndarray, shape = (n_samples, n_features)
Returns
-------
code: ndarray, shape = (n_samples, n_components)
"""
check_is_fitted(self, 'components_')
dtype = self.components_.dtype
X = check_array(X, order='C', dtype=dtype.type)
if X.flags['WRITEABLE'] is False:
X = X.copy()
n_samples, n_features = X.shape
if not hasattr(self, 'G_agg') or self.G_agg != 'full':
G = self.components_.dot(self.components_.T)
else:
G = self.G_
Dx = X.dot(self.components_.T)
code = np.ones((n_samples, self.n_components), dtype=dtype)
sample_indices = np.arange(n_samples)
size_job = ceil(n_samples / self.n_threads)
batches = list(gen_batches(n_samples, size_job))
par_func = lambda batch: _enet_regression_single_gram(
G, Dx[batch], X[batch], code,
get_sub_slice(sample_indices, batch),
self.code_l1_ratio, self.code_alpha, self.code_pos,
self.tol, self.max_iter)
if self.n_threads > 1:
res = self._pool.map(par_func, batches)
_ = list(res)
else:
_enet_regression_single_gram(
G, Dx, X, code,
sample_indices,
self.code_l1_ratio, self.code_alpha, self.code_pos,
self.tol, self.max_iter)
return code
示例10: _fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import gen_batches [as 别名]
def _fit(self, X, y=None):
"""Fit the model with X, using minibatches of size batch_size.
Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and
n_features is the number of features.
y : Ignored
Returns
-------
self : object
Returns the instance itself.
"""
self.components_ = None
self.n_samples_seen_ = 0
self.mean_ = 0.0
self.var_ = 0.0
self.squared_sum_ = 0.0
self.sum_ = 0.0
self.singular_values_ = None
self.explained_variance_ = None
self.explained_variance_ratio_ = None
self.singular_values_ = None
self.noise_variance_ = None
X = check_array(
X,
accept_sparse=["csr", "csc", "lil"],
copy=self.copy,
dtype=[np.float64, np.float32],
accept_multiple_blocks=True,
)
n_samples, n_features = X.shape
if self.batch_size is None:
self.batch_size_ = 5 * n_features
else:
self.batch_size_ = self.batch_size
for batch in gen_batches(
n_samples, self.batch_size_, min_batch_size=self.n_components or 0
):
X_batch = X[batch]
if sparse.issparse(X_batch):
X_batch = X_batch.toarray()
self.partial_fit(X_batch, check_input=False)
return self