Python preprocessing.scale方法代碼示例

本文整理匯總了Python中sklearn.preprocessing.scale方法的典型用法代碼示例。如果您正苦於以下問題：Python preprocessing.scale方法的具體用法？Python preprocessing.scale怎麽用？Python preprocessing.scale使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing的用法示例。

在下文中一共展示了preprocessing.scale方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: violin_jitter

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def violin_jitter(X, genes, gene, labels, focus, background=None,
                  xlabels=None):
    gidx = list(genes).index(gene)

    focus_idx = focus == labels
    if background is None:
        background_idx = focus != labels
    else:
        background_idx = background == labels

    if xlabels is None:
        xlabels = [ 'Background', 'Focus' ]

    x_gene = X[:, gidx].toarray().flatten()
    x_focus = x_gene[focus_idx]
    x_background = x_gene[background_idx]
    
    plt.figure()
    sns.violinplot(data=[ x_focus, x_background ], scale='width', cut=0)
    sns.stripplot(data=[ x_focus, x_background ], jitter=True, color='black', size=1)
    plt.xticks([0, 1], xlabels)
    plt.savefig('{}_violin_{}.png'.format(NAMESPACE, gene))

開發者ID:brianhie，項目名稱:geosketch，代碼行數:24，代碼來源:umbilical.py

示例2: train_FFM_model_demo

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def train_FFM_model_demo():

    # Step1: 導入數據
    x_train, y_train, x_test, y_test, feature2field = load_dataset()
    x_train = preprocessing.scale(x_train, with_mean=True, with_std=True)
    x_test = preprocessing.scale(x_test, with_mean=True, with_std=True)
    class_num = len(set([y for y in y_train] + [y for y in y_test]))

    # FFM模型
    ffm = FFM_layer(field_map_dict=feature2field, fea_num=x_train.shape[1], reg_l1=0.01, reg_l2=0.01,
                    class_num=class_num, latent_factor_dim=10).to(DEVICE)

    # 定義損失函數還有優化器
    optm = torch.optim.Adam(ffm.parameters())

    train_loader = get_batch_loader(x_train, y_train, BATCH_SIZE, shuffle=True)
    test_loader = get_batch_loader(x_test, y_test, BATCH_SIZE, shuffle=False)

    for epoch in range(1, EPOCHS + 1):
        train(ffm, DEVICE, train_loader, optm, epoch)
        test(ffm, DEVICE, test_loader)

開發者ID:JianzhouZhan，項目名稱:Awesome-RecSystem-Models，代碼行數:23，代碼來源:FFM_Multi_PyTorch.py

示例3: train_FM_model_demo

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def train_FM_model_demo():

    # Step1: 導入數據
    x_train, y_train, x_test, y_test = load_dataset()
    x_train = preprocessing.scale(x_train, with_mean=True, with_std=True)
    x_test = preprocessing.scale(x_test, with_mean=True, with_std=True)
    class_num = len(set([y for y in y_train] + [y for y in y_test]))

    # FM模型
    fm = FM_layer(class_num=class_num, feature_num=x_train.shape[1], latent_factor_dim=40).to(DEVICE)

    # 定義損失函數還有優化器
    optm = torch.optim.Adam(fm.parameters())

    train_loader = get_batch_loader(x_train, y_train, BATCH_SIZE, shuffle=True)
    test_loader = get_batch_loader(x_test, y_test, BATCH_SIZE, shuffle=False)

    for epoch in range(1, EPOCHS + 1):
        train(fm, DEVICE, train_loader, optm, epoch)
        test(fm, DEVICE, test_loader)

開發者ID:JianzhouZhan，項目名稱:Awesome-RecSystem-Models，代碼行數:22，代碼來源:FM_Multi_PyTorch.py

示例4: test_elastic_net_versus_sgd

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def test_elastic_net_versus_sgd(C, l1_ratio):
    # Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
    n_samples = 500
    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
                               n_informative=5, n_redundant=0, n_repeated=0,
                               random_state=1)
    X = scale(X)

    sgd = SGDClassifier(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
        max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
    log = LogisticRegression(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
        max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')

    sgd.fit(X, y)
    log.fit(X, y)
    assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1)

開發者ID:PacktPublishing，項目名稱:Mastering-Elasticsearch-7.0，代碼行數:20，代碼來源:test_logistic.py

示例5: run_pca

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def run_pca(self, whiten=True):
        # Normalize
        for_pca_df = self.features_df.T
        for_pca_df_scaled = pd.DataFrame(preprocessing.scale(for_pca_df), columns=for_pca_df.columns)

        # Run PCA
        self.num_components = min(len(for_pca_df.T.columns), len(for_pca_df.T.index))
        pca = PCA(n_components=self.num_components, whiten=whiten)
        pca_fit = pca.fit_transform(for_pca_df_scaled)
        self.pc_names_list = ['PC{} ({:.0%})'.format(x + 1, pca.explained_variance_ratio_[x]) for x in
                                  range(self.num_components)]
        self.pc_names_dict = {k.split(' ')[0]: k for k in self.pc_names_list}
        principal_df = pd.DataFrame(data=pca_fit, columns=self.pc_names_list, index=for_pca_df.index)
        principal_df.index.name = 'strain'

        self.principal_df = principal_df
        self.pca = pca
        # self.principal_observations_df = self.principal_df.join(self.observations_df, how='inner')
        #
        # # Make iterable list of markers
        # mks = itertools.cycle(["<", "+", "o", 'D', 'x', '^', '*', '8', 's', 'p', 'v', 'X', '_', 'h'])
        # self.markers = [next(mks) for i in range(len(self.principal_observations_df[self.observation_colname].unique()))]

開發者ID:SBRG，項目名稱:ssbio，代碼行數:24，代碼來源:atlas3.py

示例6: get_ind_return

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def get_ind_return(data):
    '''
    將從xlsx中讀取出來按列拚接好的數據進行重組，計算出每個行業每個月的收益率
    :param [DataFrame] data: 從xlsx文件中讀取的月份-交易數據
    :return: [DataFrame] ind_ret: 月份*行業 每個行業每個月的收益率
    '''
    # 讀入stk_ind_pair.xlsx，用作股票和其所屬行業的對照表
    stk_ind = pd.read_excel('E:\\QuantProject2\\temp_data\\stk_ind_pair.xlsx')
    # 把stk_ind裏麵股票代碼數字部分後麵的字母去掉
    stk_ind.Stkcd = stk_ind.Stkcd.apply(lambda x: x[:6])
    # 對stk_ind和data進行merge操作，將行業信息插入data
    data = pd.merge(data, stk_ind, on='Stkcd')
    # 按照月份和行業分組
    groups = data.groupby(['Trdmnt', 'ind'])
    # 分組計算每個月每個行業的總市值
    total_Ms = groups['Msmvttl'].sum()
    # 分組計算每個月每個行業按照市值加權的收益率
    total_Mr=groups['total_Mr'].sum()
    # 相除得到每個月每個行業的平均收益率
    ind_ret=total_Mr/total_Ms
    # 將ind_ret的內層level轉換為列
    ind_ret=ind_ret.unstack()
    #將ind_ret標準化
    ind_ret=pd.DataFrame(scale(ind_ret),columns=ind_ret.columns)
    return ind_ret

開發者ID:icezerowjj，項目名稱:MultipleFactorRiskModel，代碼行數:27，代碼來源:Get_flow_ev.py

示例7: do_pca

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def do_pca(X, c=3):
    """Do PCA"""

    from sklearn import preprocessing
    from sklearn.decomposition.pca import PCA, RandomizedPCA
    #do PCA
    #S = standardize_data(X)
    S = pd.DataFrame(preprocessing.scale(X),columns = X.columns)
    pca = PCA(n_components=c)
    pca.fit(S)
    print (pca.explained_variance_ratio_)
    #print pca.components_
    w = pd.DataFrame(pca.components_,columns=S.columns)#,index=['PC1','PC2'])
    #print w.T.max(1).sort_values()
    pX = pca.fit_transform(S)
    pX = pd.DataFrame(pX,index=X.index)
    return pX

開發者ID:dmnfarrell，項目名稱:smallrnaseq，代碼行數:19，代碼來源:analysis.py

示例8: train

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def train(train_data, outfile):
        """
        :param train_data: A Batcher object that delivers batches of train data.
        :param outfile: (str) Where to print results.
        """
        outfile.write('day user red loss\n')
        mat = train_data.next_batch()
        while mat is not None:
            datadict = {'features': mat[:, 3:], 'red': mat[:,2], 'user': mat[:,1], 'day': mat[:,0]}
            batch = scale(datadict['features'])
            pca = PCA(n_components=1)
            pca.fit(batch)
            data_reduced = np.dot(batch, pca.components_.T) # pca transform
            data_original = np.dot(data_reduced, pca.components_) # inverse_transform
            pointloss = np.mean(np.square(batch - data_original), axis=1)
            loss = np.mean(pointloss)
            for d, u, t, l, in zip(datadict['day'].tolist(), datadict['user'].tolist(),
                                   datadict['red'].tolist(), pointloss.flatten().tolist()):
                outfile.write('%s %s %s %s\n' % (d, u, t, l))
            print('loss: %.4f' % loss)
            mat = train_data.next_batch()

開發者ID:pnnl，項目名稱:safekit，代碼行數:23，代碼來源:pca_autoencoder.py

示例9: is_log_scale_needed

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def is_log_scale_needed(x_org):
        x = np.array(x_org[~pd.isnull(x_org)])
        # first scale on raw data
        x = preprocessing.scale(x)
        # second scale on log data
        x_log = preprocessing.scale(np.log(x - np.min(x) + 1))

        # the old approach, let's check how new approach will work
        # original_skew = np.abs(stats.skew(x))
        # log_skew = np.abs(stats.skew(x_log))
        # return log_skew < original_skew
        ########################################################################
        # p is probability of being normal distributions
        k2, p1 = stats.normaltest(x)
        k2, p2 = stats.normaltest(x_log)

        return p2 > p1

開發者ID:mljar，項目名稱:mljar-supervised，代碼行數:19，代碼來源:preprocessing_utils.py

示例10: setUpClass

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def setUpClass(cls):
        cls.X, cls.y = datasets.make_regression(
            n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
        )

        cls.params = {
            "dense_layers": 2,
            "dense_1_size": 8,
            "dense_2_size": 4,
            "dropout": 0,
            "learning_rate": 0.01,
            "momentum": 0.9,
            "decay": 0.001,
            "ml_task": "regression"
        }

        cls.y = preprocessing.scale(cls.y)

開發者ID:mljar，項目名稱:mljar-supervised，代碼行數:19，代碼來源:test_nn.py

示例11: train

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def train(self, df, shuffle=True, preprocess=False, *args, **kwargs):
        """
        Takes a dataframe of features + a 'label' column and trains the lobe
        """
        if self._trained:
            logger.warning('Overwriting an already trained brain!')
            self._trained = False

        # shuffle data for good luck
        if shuffle:
            df = shuffleDataFrame(df)
        # scale train data and fit lobe
        x = df.drop('label', axis=1).values
        y = df['label'].values
        del df
        if preprocess:
            x = preprocessing.scale(x)
        logger.info('Training with %d samples', len(x))
        self.lobe.fit(x, y)
        self._trained = True

開發者ID:s4w3d0ff，項目名稱:marconibot，代碼行數:22，代碼來源:__init__.py

示例12: pre_processing

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def pre_processing(dataset_file_list, pre_process_paras):
    """ pre-processing of multiple datasets
    Args:
        dataset_file_list: list of filenames of datasets
        pre_process_paras: dict, parameters for pre-processing
    Returns:
        dataset_list: list of datasets
    """
    # parameters
    take_log = pre_process_paras['take_log']
    standardization = pre_process_paras['standardization']
    scaling = pre_process_paras['scaling']

    dataset_list = []
    for data_file in dataset_file_list:
        dataset = read_csv(data_file, take_log)
        if standardization:
            scale(dataset['gene_exp'], axis=1, with_mean=True, with_std=True, copy=False)
        if scaling:  # scale to [0,1]
            minmax_scale(dataset['gene_exp'], feature_range=(0, 1), axis=1, copy=False)
        dataset_list.append(dataset)
    dataset_list = intersect_dataset(dataset_list)  # retain intersection of gene symbols

    return dataset_list

開發者ID:txWang，項目名稱:BERMUDA，代碼行數:26，代碼來源:pre_processing.py

示例13: estimate_k

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def estimate_k(data):
    """
    Estimate number of groups k:
        based on random matrix theory (RTM), borrowed from SC3
        input data is (p,n) matrix, p is feature, n is sample
    """
    p, n = data.shape
    if type(data) is not np.ndarray:
        data = data.toarray()
    x = scale(data)
    muTW = (np.sqrt(n-1) + np.sqrt(p)) ** 2
    sigmaTW = (np.sqrt(n-1) + np.sqrt(p)) * (1/np.sqrt(n-1) + 1/np.sqrt(p)) ** (1/3)
    sigmaHatNaive = x.T.dot(x)

    bd = np.sqrt(p) * sigmaTW + muTW
    evals = np.linalg.eigvalsh(sigmaHatNaive)

    k = 0
    for i in range(len(evals)):
        if evals[i] > bd:
            k += 1
    return k

開發者ID:jsxlei，項目名稱:SCALE，代碼行數:24，代碼來源:utils.py

示例14: kmeans_elbow

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def kmeans_elbow(data):
    bin_ = Bin(0, 0)
    # processed_data = scale(data)
    data = np.array(data)
    bin_.fit(data)
    processed_data = bin_.transform(data)
    # processed_data = scale(data)

    inertias = []
    for k in K_RANGE:
        kmeans = KMeans(init='k-means++', n_clusters=k)
        kmeans.fit(processed_data)
        inertias.append(kmeans.inertia_)

    fig = plt.figure()
    plt.scatter(K_RANGE, inertias)
    plt.plot(K_RANGE, inertias)
    fig.savefig('kmeans-elbow.png')

開發者ID:cmu-db，項目名稱:cmdbac，代碼行數:20，代碼來源:cluster.py

示例15: scale

# 需要導入模塊: from sklearn import preprocessing [as 別名]
# 或者: from sklearn.preprocessing import scale [as 別名]
def scale(self, scale_val=100.):
        """ Scale all values such that they are on the range [0, scale_val],
            via grand-mean scaling. This is NOT global-scaling/intensity
            normalization. This is useful for ensuring that data is on a
            common scale (e.g. good for multiple runs, participants, etc)
            and if the default value of 100 is used, can be interpreted as
            something akin to (but not exactly) "percent signal change."
            This is consistent with default behavior in AFNI and SPM.
            Change this value to 10000 to make consistent with FSL.

        Args:
            scale_val: (int/float) what value to send the grand-mean to;
                        default 100

        """

        out = deepcopy(self)
        out.data = out.data / out.data.mean() * scale_val

        return out

開發者ID:cosanlab，項目名稱:nltools，代碼行數:22，代碼來源:brain_data.py

注：本文中的sklearn.preprocessing.scale方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。