當前位置: 首頁>>代碼示例>>Python>>正文


Python StandardScaler.partial_fit方法代碼示例

本文整理匯總了Python中sklearn.preprocessing.StandardScaler.partial_fit方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.partial_fit方法的具體用法?Python StandardScaler.partial_fit怎麽用?Python StandardScaler.partial_fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.preprocessing.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.partial_fit方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: fit_scaler

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit_scaler(data_dir, word2vec_model, batch_size=1024, persist_to_path=None):
    """ Get all the word2vec vectors in a 2D matrix and fit the scaler on it.
     This scaler can be used afterwards for normalizing feature matrices. """
    if type(word2vec_model) == str:
        word2vec_model = Word2Vec.load(word2vec_model)

    doc_generator = get_documents(data_dir)
    scaler = StandardScaler(copy=False)

    no_more_samples = False
    while not no_more_samples:
        batch = []
        for i in range(batch_size):
            try:
                batch.append(six.next(doc_generator))
            except StopIteration:
                no_more_samples = True
                break

        vectors = []
        for doc in batch:
            for word in doc.get_all_words():
                if word in word2vec_model:
                    vectors.append(word2vec_model[word])

        matrix = np.array(vectors)
        print("Fitted to {} vectors".format(matrix.shape[0]))

        scaler.partial_fit(matrix)

    if persist_to_path:
        save_to_disk(persist_to_path, scaler)

    return scaler
開發者ID:netrasys,項目名稱:magpie,代碼行數:36,代碼來源:word2vec.py

示例2: out_of_core_x_normalisation

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def out_of_core_x_normalisation(data_dir=HEP_TRAIN_PATH, batch_size=1024,
                                persist=False):
    """ Get all the word2vec vectors in a 2D matrix and fit the scaler on it.
     This scaler can be used afterwards for normalizing feature matrices. """
    doc_generator = get_documents(data_dir=data_dir)
    word2vec_model = Word2Vec.load(WORD2VEC_MODELPATH)
    scaler = StandardScaler(copy=False)

    no_more_samples = False
    while not no_more_samples:
        batch = []
        for i in xrange(batch_size):
            try:
                batch.append(doc_generator.next())
            except StopIteration:
                no_more_samples = True
                break

        vectors = []
        for doc in batch:
            for word in doc.get_all_words():
                if word in word2vec_model:
                    vectors.append(word2vec_model[word])

        matrix = np.array(vectors)
        print "Matrix shape: {}".format(matrix.shape)

        scaler.partial_fit(matrix)

    if persist:
        save_to_disk(SCALER_PATH, scaler)

    return scaler
開發者ID:eamonnmag,項目名稱:magpie,代碼行數:35,代碼來源:word2vec.py

示例3: fit_scaler

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def fit_scaler(data_dir, word2vec_model=WORD2VEC_MODELPATH, batch_size=1024,
               persist_to_path=SCALER_PATH):
    if type(word2vec_model) == str:
        word2vec_model = Word2Vec.load(word2vec_model)

    doc_generator = get_documents(data_dir)

    scaler = StandardScaler(copy=False)

    no_more_samples = False
    while not no_more_samples:
        batch = []
        for i in xrange(batch_size):
            try:
                batch.append(doc_generator.next())
            except StopIteration:
                no_more_samples = True
                break

        vectors = []
        for doc in batch:
            for word in doc.get_all_words():
                if word in word2vec_model:
                    vectors.append(word2vec_model[word])

        matrix = np.array(vectors)
        print "Fitted to {} vectors".format(matrix.shape[0])

        scaler.partial_fit(matrix)

    if persist_to_path:
        save_to_disk(persist_to_path, scaler)

    return scaler
開發者ID:Rowl1ng,項目名稱:magpie,代碼行數:36,代碼來源:word2vec.py

示例4: run_features

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def run_features(args):
    """Run image feature computation.

    Parameters
    ----------
    args : argparse.Namespace
        The arguments parsed by the argparse library.
    """
    if args.global_threshold:
        images = map(io.imread, args.images)
        thresholds = pre.global_threshold(images, args.random_seed)
    else:
        thresholds = None
    images = map(io.imread, args.images)
    screen_info = screens.d[args.screen]
    index_function, fmap = screen_info['index'], screen_info['fmap']
    fmap = tz.partial(fmap, threshold=thresholds,
                            sample_size=args.sample_size,
                            random_seed=args.random_seed)
    indices = list(map(index_function, args.images))
    f0, feature_names = fmap(next(images))
    feature_vectors = tz.cons(f0, (fmap(im)[0] for im in images))
    online_scaler = StandardScaler()
    online_pca = cluster.OnlineIncrementalPCA(n_components=args.n_components,
                                              batch_size=args.pca_batch_size)
    nimages, nfeatures = len(args.images), len(f0)
    emit = io.emitter_function(args.emitter)
    with temporary_hdf5_dataset((nimages, nfeatures), 'float') as dset:
        # First pass: compute the features, compute the mean and SD,
        # compute the PCA
        for i, (idx, v) in enumerate(zip(indices, feature_vectors)):
            emit({'_id': idx, 'feature_vector': list(v)})
            dset[i] = v
            online_scaler.partial_fit(v.reshape(1, -1))
            online_pca.add_sample(v)
        # Second pass: standardise the feature vectors, compute PCA-transform
        for i, (idx, v) in enumerate(zip(indices, dset)):
            v_std = online_scaler.transform(v.reshape(1, -1))[0]
            v_pca = online_pca.transform(v)
            dset[i] = v_std
            emit({'_id': idx, 'feature_vector_std': list(v_std),
                              'pca_vector': list(v_pca)})
            online_pca.transform(v)
        # Third pass: Compute the nearest neighbors graph.
        # THIS ANNOYINGLY INSTANTIATES FULL ARRAY -- no out-of-core
        # solution that I'm aware of...
        ng = neighbors.kneighbors_graph(dset, args.num_neighbours,
                                        include_self=False, mode='distance')
        for idx, row in zip(indices, ng):
            emit({'_id': idx, 'neighbours': [indices[i] for i in row.indices]})
開發者ID:microscopium,項目名稱:microscopium,代碼行數:52,代碼來源:main.py

示例5: FeatureExtraction

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
class FeatureExtraction(object):
    """
    Data processing from pandas data frame
    """
    def __init__(self, num_col, cat_col, col_types):
        self.df = None
        self.X_cat = None
        self.X_num = None
        self.num_col = num_col
        self.cat_col = cat_col
        self.col_types = col_types
        self.h = FeatureHasher(n_features=10,
                               input_type='string',
                               non_negative=True)
        self.s = StandardScaler()
        self.init_standard_scaler()

    def init_standard_scaler(self):
        reader = pd.read_csv('test.csv', chunksize=1, usecols=self.num_col,
                             dtype=self.col_types)
        for row in reader:
            print row.as_matrix()
            self.s.partial_fit(row.as_matrix())

    def data_cleaning(self):
        self.df['gender'].replace('N', 'M', inplace=True)

    def get_features(self, df):
        """
        :param df: pandas data frame
        :return: x and y numpy arrays
        """
        y = df['click'].as_matrix()
        self.df = df.drop('click', 1)
        self.data_cleaning()
        self.X_num = self.s.transform(self.df[self.num_col].as_matrix())
        self.X_cat = self.h.transform(np.asarray(
                self.df[self.cat_col].astype(str))).toarray()
        return np.concatenate((self.X_num, self.X_cat), axis=1), y
開發者ID:DataScientistsUB,項目名稱:CTR_predictor,代碼行數:41,代碼來源:FeatureExtraction_prototype.py

示例6: preprocess_data

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
    scaler.partial_fit(X)
    X = scaler.transform(X)
    return X, scaler
開發者ID:aikiselev,項目名稱:GradientReversal,代碼行數:8,代碼來源:data.py

示例7: fit

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
    def fit(self):
        best_valid_loss = np.inf
        best_train_loss = np.inf
        train_history = []
        standard_scaler = StandardScaler(copy=False)
        # train standardizer
        for Xb, yb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
            standard_scaler.partial_fit(yb.reshape(Xb.shape[0], -1))

        for epoch in range(0, self.max_epochs):
            t0 = time()

            train_losses = []
            valid_losses = []

            for Xb, yb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
                Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                loss = self.train_fn(Xb, yb)
                train_losses.append(loss)

            for Xb, yb, filename in tqdm(self.batch_iterator_test, total=self.n_val_batches):
                Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                loss, prediction = self.val_fn(Xb, yb)
                valid_losses.append(loss)

            # visualize sample
            for j in range(10):
                plt.clf()
                plt.imshow(np.concatenate((Xb[j], np.ones((Xb.shape[1], 1)), yb[j], np.ones((Xb.shape[1], 1)), prediction[j]), axis=1), aspect='auto')
                plt.axis('off')
                plt.title('real/ target/ reconstruction')
                plt.savefig('visualizations/' + 'sample_'+str(j)+'.png')

            avg_train_loss = np.mean(train_losses)
            avg_valid_loss = np.mean(valid_losses)


            if avg_train_loss > best_train_loss * 0.999:
                self.update_learning_rate.set_value(self.update_learning_rate.get_value() * np.float32(0.99))
                print('new learning rate: ', self.update_learning_rate.get_value())
            if avg_train_loss < best_train_loss:
                best_train_loss = avg_train_loss
            if avg_valid_loss < best_valid_loss:
                best_valid_loss = avg_valid_loss

            info = {
                'epoch': epoch,
                'train_loss': avg_train_loss,
                'train_loss_best': best_train_loss,
                'valid_loss': avg_valid_loss,
                'valid_loss_best': best_valid_loss,
                'valid_accuracy': 'N/A',
                'duration': time() - t0,
            }

            train_history.append(info)

            self.print_progress(train_history)

            # Save to disk
            vals = lasagne.layers.get_all_param_values(self.net['prob'])
            with open('models/' + str(epoch) + '.pkl', 'wb') as f:
                pickle.dump(vals, f, -1)

        print('Saving denoised files to disk')
        for Xb, yb, filename in tqdm(self.batch_iterator_total, total=self.n_batches):
            Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
            yb = standard_scaler.transform(yb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
            loss, prediction = self.val_fn(Xb, yb)
            # untransform before saving
            prediction = standard_scaler.inverse_transform(prediction.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
            for j in range(Xb.shape[0]):
                with open('aurora2/train_denoised' + '/'+filename[j]+'.npy', 'wb') as f:
                    np.save(f, prediction[j])
開發者ID:StevenReitsma,項目名稱:speech-denoiser,代碼行數:78,代碼來源:neuralnet.py

示例8: fit

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import partial_fit [as 別名]
    def fit(self):
        best_valid_loss = np.inf
        best_train_loss = np.inf
        train_history = []
        standard_scaler = StandardScaler(copy=False)
        # train standardizer
        for Xb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
            standard_scaler.partial_fit(Xb.reshape(Xb.shape[0], -1))

        for epoch in range(0, self.max_epochs):
            t0 = time()

            train_losses = []
            valid_losses = []
            valid_accuracy = []

            for Xb, filename in tqdm(self.batch_iterator_train, total=self.n_batches):
                Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
                loss = self.train_fn(Xb, yb)
                train_losses.append(loss)

            for Xb, filename in tqdm(self.batch_iterator_test, total=self.n_val_batches):
                Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
                loss, prediction = self.val_fn(Xb, yb)
                acc = np.mean(np.argmax(prediction, axis=1)==yb)
                valid_accuracy.append(acc)
                valid_losses.append(loss)

            # visualize sample
            for j in range(10):
                plt.clf()
                plt.imshow(Xb[j], aspect='auto')
                plt.axis('off')
                plt.title('real')
                plt.savefig('visualizations/' + 'sample_'+str(j)+'.png')

            avg_train_loss = np.mean(train_losses)
            avg_valid_loss = np.mean(valid_losses)
            avg_valid_acc = np.mean(valid_accuracy)


            if avg_train_loss > best_train_loss * 0.999:
                self.update_learning_rate.set_value(self.update_learning_rate.get_value() * np.float32(0.99))
                print('new learning rate: ', self.update_learning_rate.get_value())
            if avg_train_loss < best_train_loss:
                best_train_loss = avg_train_loss
            if avg_valid_loss < best_valid_loss:
                best_valid_loss = avg_valid_loss

            info = {
                'epoch': epoch,
                'train_loss': avg_train_loss,
                'train_loss_best': best_train_loss,
                'valid_loss': avg_valid_loss,
                'valid_loss_best': best_valid_loss,
                'valid_accuracy': avg_valid_acc,
                'duration': time() - t0,
            }

            train_history.append(info)

            self.print_progress(train_history)

            # Save to disk
            vals = lasagne.layers.get_all_param_values(self.net['prob'])
            with open('models/' + str(epoch) + '.pkl', 'wb') as f:
                pickle.dump(vals, f, -1)

        print('Calculating validation denoised clean accuracy')
        #to check how good denoising was of the clean signal!
        total_acc = 0
        for Xb, filename in tqdm(self.batch_iterator_test_denoised, total=self.n_batches):
            Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
            yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
            loss, prediction = self.val_fn(Xb, yb)
            total_acc += np.sum(yb==np.argmax(prediction, axis=1))
        print(' Denoised clean accuracy: ', total_acc/float(len(self.batch_iterator_test_denoised.X)))

        print('Calculating final test accuracy')
        total_acc = 0
        for Xb, filename in tqdm(self.batch_iterator_total, total=self.n_batches):
            Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
            yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
            loss, prediction = self.val_fn(Xb, yb)
            total_acc += np.sum(yb==np.argmax(prediction, axis=1))
        print(' Denoised multi accuracy: ', total_acc/float(len(self.batch_iterator_total.X)))

        for j in range(4):
            total_acc = 0
            X = [s for s in self.batch_iterator_total.X if ('SNR' + str((j + 1) * 5)) in s]
            batch_iterator = ParallelBatchIterator(X, par.BATCH_SIZE, 'train_denoised')
            for Xb, filename in tqdm(batch_iterator, total=self.n_batches):
                Xb = standard_scaler.transform(Xb.reshape(Xb.shape[0], -1)).reshape(Xb.shape)
                yb = np.array([s[-5] if s[-5] != 'Z' and s[-5] != 'O' else 0 if s[-5] != 'O' else 10 for s in filename]).astype(np.int32)
                loss, prediction = self.val_fn(Xb, yb)
                total_acc += np.sum(yb == np.argmax(prediction, axis=1))
            print(' Denoised multi accuracy for '+'SNR' + str((j + 1) * 5)+': ', total_acc / float(len(batch_iterator.X)))
            print(' Datasize: ', len(X))
#.........這裏部分代碼省略.........
開發者ID:StevenReitsma,項目名稱:speech-denoiser,代碼行數:103,代碼來源:neuralnet.py


注:本文中的sklearn.preprocessing.StandardScaler.partial_fit方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。