本文整理匯總了Python中sklearn.preprocessing.StandardScaler.partial_fit方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.partial_fit方法的具體用法?Python StandardScaler.partial_fit怎麽用?Python StandardScaler.partial_fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.partial_fit方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: fit_scaler
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit_scaler(data_dir, word2vec_model, batch_size=1024, persist_to_path=None):
""" Get all the word2vec vectors in a 2D matrix and fit the scaler on it.
This scaler can be used afterwards for normalizing feature matrices. """
if type(word2vec_model) == str:
word2vec_model = Word2Vec.load(word2vec_model)
doc_generator = get_documents(data_dir)
scaler = StandardScaler(copy=False)
no_more_samples = False
while not no_more_samples:
batch = []
for i in range(batch_size):
try:
batch.append(six.next(doc_generator))
except StopIteration:
no_more_samples = True
break
vectors = []
for doc in batch:
for word in doc.get_all_words():
if word in word2vec_model:
vectors.append(word2vec_model[word])
matrix = np.array(vectors)
print("Fitted to {} vectors".format(matrix.shape[0]))
scaler.partial_fit(matrix)
if persist_to_path:
save_to_disk(persist_to_path, scaler)
return scaler
示例2: out_of_core_x_normalisation
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def out_of_core_x_normalisation(data_dir=HEP_TRAIN_PATH, batch_size=1024,
persist=False):
""" Get all the word2vec vectors in a 2D matrix and fit the scaler on it.
This scaler can be used afterwards for normalizing feature matrices. """
doc_generator = get_documents(data_dir=data_dir)
word2vec_model = Word2Vec.load(WORD2VEC_MODELPATH)
scaler = StandardScaler(copy=False)
no_more_samples = False
while not no_more_samples:
batch = []
for i in xrange(batch_size):
try:
batch.append(doc_generator.next())
except StopIteration:
no_more_samples = True
break
vectors = []
for doc in batch:
for word in doc.get_all_words():
if word in word2vec_model:
vectors.append(word2vec_model[word])
matrix = np.array(vectors)
print "Matrix shape: {}".format(matrix.shape)
scaler.partial_fit(matrix)
if persist:
save_to_disk(SCALER_PATH, scaler)
return scaler
示例3: fit_scaler
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit_scaler(data_dir, word2vec_model=WORD2VEC_MODELPATH, batch_size=1024,
persist_to_path=SCALER_PATH):
if type(word2vec_model) == str:
word2vec_model = Word2Vec.load(word2vec_model)
doc_generator = get_documents(data_dir)
scaler = StandardScaler(copy=False)
no_more_samples = False
while not no_more_samples:
batch = []
for i in xrange(batch_size):
try:
batch.append(doc_generator.next())
except StopIteration:
no_more_samples = True
break
vectors = []
for doc in batch:
for word in doc.get_all_words():
if word in word2vec_model:
vectors.append(word2vec_model[word])
matrix = np.array(vectors)
print "Fitted to {} vectors".format(matrix.shape[0])
scaler.partial_fit(matrix)
if persist_to_path:
save_to_disk(persist_to_path, scaler)
return scaler
示例4: run_features
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def run_features(args):
"""Run image feature computation.
Parameters
----------
args : argparse.Namespace
The arguments parsed by the argparse library.
"""
if args.global_threshold:
images = map(io.imread, args.images)
thresholds = pre.global_threshold(images, args.random_seed)
else:
thresholds = None
images = map(io.imread, args.images)
screen_info = screens.d[args.screen]
index_function, fmap = screen_info['index'], screen_info['fmap']
fmap = tz.partial(fmap, threshold=thresholds,
sample_size=args.sample_size,
random_seed=args.random_seed)
indices = list(map(index_function, args.images))
f0, feature_names = fmap(next(images))
feature_vectors = tz.cons(f0, (fmap(im)[0] for im in images))
online_scaler = StandardScaler()
online_pca = cluster.OnlineIncrementalPCA(n_components=args.n_components,
batch_size=args.pca_batch_size)
nimages, nfeatures = len(args.images), len(f0)
emit = io.emitter_function(args.emitter)
with temporary_hdf5_dataset((nimages, nfeatures), 'float') as dset:
# First pass: compute the features, compute the mean and SD,
# compute the PCA
for i, (idx, v) in enumerate(zip(indices, feature_vectors)):
emit({'_id': idx, 'feature_vector': list(v)})
dset[i] = v
online_scaler.partial_fit(v.reshape(1, -1))
online_pca.add_sample(v)
# Second pass: standardise the feature vectors, compute PCA-transform
for i, (idx, v) in enumerate(zip(indices, dset)):
v_std = online_scaler.transform(v.reshape(1, -1))[0]
v_pca = online_pca.transform(v)
dset[i] = v_std
emit({'_id': idx, 'feature_vector_std': list(v_std),
'pca_vector': list(v_pca)})
online_pca.transform(v)
# Third pass: Compute the nearest neighbors graph.
# THIS ANNOYINGLY INSTANTIATES FULL ARRAY -- no out-of-core
# solution that I'm aware of...
ng = neighbors.kneighbors_graph(dset, args.num_neighbours,
include_self=False, mode='distance')
for idx, row in zip(indices, ng):
emit({'_id': idx, 'neighbours': [indices[i] for i in row.indices]})
示例5: FeatureExtraction
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
class FeatureExtraction(object):
"""
Data processing from pandas data frame
"""
def __init__(self, num_col, cat_col, col_types):
self.df = None
self.X_cat = None
self.X_num = None
self.num_col = num_col
self.cat_col = cat_col
self.col_types = col_types
self.h = FeatureHasher(n_features=10,
input_type='string',
non_negative=True)
self.s = StandardScaler()
self.init_standard_scaler()
def init_standard_scaler(self):
reader = pd.read_csv('test.csv', chunksize=1, usecols=self.num_col,
dtype=self.col_types)
for row in reader:
print row.as_matrix()
self.s.partial_fit(row.as_matrix())
def data_cleaning(self):
self.df['gender'].replace('N', 'M', inplace=True)
def get_features(self, df):
"""
:param df: pandas data frame
:return: x and y numpy arrays
"""
y = df['click'].as_matrix()
self.df = df.drop('click', 1)
self.data_cleaning()
self.X_num = self.s.transform(self.df[self.num_col].as_matrix())
self.X_cat = self.h.transform(np.asarray(
self.df[self.cat_col].astype(str))).toarray()
return np.concatenate((self.X_num, self.X_cat), axis=1), y
示例6: preprocess_data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def preprocess_data(X, scaler=None):
if not scaler:
scaler = StandardScaler()
scaler.partial_fit(X)
X = scaler.transform(X)
return X, scaler
示例7: fit
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit(self):
best_valid_loss = np.inf
best_train_loss = np.inf
train_history = []
standard_scaler = StandardScaler(copy=False)
# train standardizer
for Xb, yb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
standard_scaler.partial_fit(yb.reshape(Xb.shape[0], -1))
for epoch in range(0, self.max_epochs):
t0 = time()
train_losses = []
valid_losses = []
for Xb, yb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
loss = self.train_fn(Xb, yb)
train_losses.append(loss)
for Xb, yb, filename in tqdm(self.batch_iterator_test, total=self.n_val_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
loss, prediction = self.val_fn(Xb, yb)
valid_losses.append(loss)
# visualize sample
for j in range(10):
plt.clf()
plt.imshow(np.concatenate((Xb[j], np.ones((Xb.shape[1], 1)), yb[j], np.ones((Xb.shape[1], 1)), prediction[j]), axis=1), aspect='auto')
plt.axis('off')
plt.title('real/ target/ reconstruction')
plt.savefig('visualizations/' + 'sample_'+str(j)+'.png')
avg_train_loss = np.mean(train_losses)
avg_valid_loss = np.mean(valid_losses)
if avg_train_loss > best_train_loss * 0.999:
self.update_learning_rate.set_value(self.update_learning_rate.get_value() * np.float32(0.99))
print('new learning rate: ', self.update_learning_rate.get_value())
if avg_train_loss < best_train_loss:
best_train_loss = avg_train_loss
if avg_valid_loss < best_valid_loss:
best_valid_loss = avg_valid_loss
info = {
'epoch': epoch,
'train_loss': avg_train_loss,
'train_loss_best': best_train_loss,
'valid_loss': avg_valid_loss,
'valid_loss_best': best_valid_loss,
'valid_accuracy': 'N/A',
'duration': time() - t0,
}
train_history.append(info)
self.print_progress(train_history)
# Save to disk
vals = lasagne.layers.get_all_param_values(self.net['prob'])
with open('models/' + str(epoch) + '.pkl', 'wb') as f:
pickle.dump(vals, f, -1)
print('Saving denoised files to disk')
for Xb, yb, filename in tqdm(self.batch_iterator_total, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
loss, prediction = self.val_fn(Xb, yb)
# untransform before saving
prediction = standard_scaler.inverse_transform(prediction.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
for j in range(Xb.shape[0]):
with open('aurora2/train_denoised' + '/'+filename[j]+'.npy', 'wb') as f:
np.save(f, prediction[j])
示例8: fit
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit(self):
best_valid_loss = np.inf
best_train_loss = np.inf
train_history = []
standard_scaler = StandardScaler(copy=False)
# train standardizer
for Xb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
standard_scaler.partial_fit(Xb.reshape(Xb.shape[0], -1))
for epoch in range(0, self.max_epochs):
t0 = time()
train_losses = []
valid_losses = []
valid_accuracy = []
for Xb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
loss = self.train_fn(Xb, yb)
train_losses.append(loss)
for Xb, filename in tqdm(self.batch_iterator_test, total=self.n_val_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
loss, prediction = self.val_fn(Xb, yb)
acc = np.mean(np.argmax(prediction, axis=1)==yb)
valid_accuracy.append(acc)
valid_losses.append(loss)
# visualize sample
for j in range(10):
plt.clf()
plt.imshow(Xb[j], aspect='auto')
plt.axis('off')
plt.title('real')
plt.savefig('visualizations/' + 'sample_'+str(j)+'.png')
avg_train_loss = np.mean(train_losses)
avg_valid_loss = np.mean(valid_losses)
avg_valid_acc = np.mean(valid_accuracy)
if avg_train_loss > best_train_loss * 0.999:
self.update_learning_rate.set_value(self.update_learning_rate.get_value() * np.float32(0.99))
print('new learning rate: ', self.update_learning_rate.get_value())
if avg_train_loss < best_train_loss:
best_train_loss = avg_train_loss
if avg_valid_loss < best_valid_loss:
best_valid_loss = avg_valid_loss
info = {
'epoch': epoch,
'train_loss': avg_train_loss,
'train_loss_best': best_train_loss,
'valid_loss': avg_valid_loss,
'valid_loss_best': best_valid_loss,
'valid_accuracy': avg_valid_acc,
'duration': time() - t0,
}
train_history.append(info)
self.print_progress(train_history)
# Save to disk
vals = lasagne.layers.get_all_param_values(self.net['prob'])
with open('models/' + str(epoch) + '.pkl', 'wb') as f:
pickle.dump(vals, f, -1)
print('Calculating validation denoised clean accuracy')
#to check how good denoising was of the clean signal!
total_acc = 0
for Xb, filename in tqdm(self.batch_iterator_test_denoised, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
loss, prediction = self.val_fn(Xb, yb)
total_acc += np.sum(yb==np.argmax(prediction, axis=1))
print(' Denoised clean accuracy: ', total_acc/float(len(self.batch_iterator_test_denoised.X)))
print('Calculating final test accuracy')
total_acc = 0
for Xb, filename in tqdm(self.batch_iterator_total, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
loss, prediction = self.val_fn(Xb, yb)
total_acc += np.sum(yb==np.argmax(prediction, axis=1))
print(' Denoised multi accuracy: ', total_acc/float(len(self.batch_iterator_total.X)))
for j in range(4):
total_acc = 0
X = [s for s in self.batch_iterator_total.X if ('SNR' + str((j + 1) * 5)) in s]
batch_iterator = ParallelBatchIterator(X, par.BATCH_SIZE, 'train_denoised')
for Xb, filename in tqdm(batch_iterator, total=self.n_batches):
Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
loss, prediction = self.val_fn(Xb, yb)
total_acc += np.sum(yb == np.argmax(prediction, axis=1))
print(' Denoised multi accuracy for '+'SNR' + str((j + 1) * 5)+': ', total_acc / float(len(batch_iterator.X)))
print(' Datasize: ', len(X))
#.........這裏部分代碼省略.........