本文整理汇总了Python中sklearn.preprocessing.scale方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.scale方法的具体用法?Python preprocessing.scale怎么用?Python preprocessing.scale使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.scale方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: violin_jitter
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def violin_jitter(X, genes, gene, labels, focus, background=None,
xlabels=None):
gidx = list(genes).index(gene)
focus_idx = focus == labels
if background is None:
background_idx = focus != labels
else:
background_idx = background == labels
if xlabels is None:
xlabels = [ 'Background', 'Focus' ]
x_gene = X[:, gidx].toarray().flatten()
x_focus = x_gene[focus_idx]
x_background = x_gene[background_idx]
plt.figure()
sns.violinplot(data=[ x_focus, x_background ], scale='width', cut=0)
sns.stripplot(data=[ x_focus, x_background ], jitter=True, color='black', size=1)
plt.xticks([0, 1], xlabels)
plt.savefig('{}_violin_{}.png'.format(NAMESPACE, gene))
示例2: train_FFM_model_demo
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def train_FFM_model_demo():
# Step1: 导入数据
x_train, y_train, x_test, y_test, feature2field = load_dataset()
x_train = preprocessing.scale(x_train, with_mean=True, with_std=True)
x_test = preprocessing.scale(x_test, with_mean=True, with_std=True)
class_num = len(set([y for y in y_train] + [y for y in y_test]))
# FFM模型
ffm = FFM_layer(field_map_dict=feature2field, fea_num=x_train.shape[1], reg_l1=0.01, reg_l2=0.01,
class_num=class_num, latent_factor_dim=10).to(DEVICE)
# 定义损失函数还有优化器
optm = torch.optim.Adam(ffm.parameters())
train_loader = get_batch_loader(x_train, y_train, BATCH_SIZE, shuffle=True)
test_loader = get_batch_loader(x_test, y_test, BATCH_SIZE, shuffle=False)
for epoch in range(1, EPOCHS + 1):
train(ffm, DEVICE, train_loader, optm, epoch)
test(ffm, DEVICE, test_loader)
示例3: train_FM_model_demo
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def train_FM_model_demo():
# Step1: 导入数据
x_train, y_train, x_test, y_test = load_dataset()
x_train = preprocessing.scale(x_train, with_mean=True, with_std=True)
x_test = preprocessing.scale(x_test, with_mean=True, with_std=True)
class_num = len(set([y for y in y_train] + [y for y in y_test]))
# FM模型
fm = FM_layer(class_num=class_num, feature_num=x_train.shape[1], latent_factor_dim=40).to(DEVICE)
# 定义损失函数还有优化器
optm = torch.optim.Adam(fm.parameters())
train_loader = get_batch_loader(x_train, y_train, BATCH_SIZE, shuffle=True)
test_loader = get_batch_loader(x_test, y_test, BATCH_SIZE, shuffle=False)
for epoch in range(1, EPOCHS + 1):
train(fm, DEVICE, train_loader, optm, epoch)
test(fm, DEVICE, test_loader)
示例4: test_elastic_net_versus_sgd
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def test_elastic_net_versus_sgd(C, l1_ratio):
# Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
n_samples = 500
X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
n_informative=5, n_redundant=0, n_repeated=0,
random_state=1)
X = scale(X)
sgd = SGDClassifier(
penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
log = LogisticRegression(
penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')
sgd.fit(X, y)
log.fit(X, y)
assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1)
示例5: run_pca
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def run_pca(self, whiten=True):
# Normalize
for_pca_df = self.features_df.T
for_pca_df_scaled = pd.DataFrame(preprocessing.scale(for_pca_df), columns=for_pca_df.columns)
# Run PCA
self.num_components = min(len(for_pca_df.T.columns), len(for_pca_df.T.index))
pca = PCA(n_components=self.num_components, whiten=whiten)
pca_fit = pca.fit_transform(for_pca_df_scaled)
self.pc_names_list = ['PC{} ({:.0%})'.format(x + 1, pca.explained_variance_ratio_[x]) for x in
range(self.num_components)]
self.pc_names_dict = {k.split(' ')[0]: k for k in self.pc_names_list}
principal_df = pd.DataFrame(data=pca_fit, columns=self.pc_names_list, index=for_pca_df.index)
principal_df.index.name = 'strain'
self.principal_df = principal_df
self.pca = pca
# self.principal_observations_df = self.principal_df.join(self.observations_df, how='inner')
#
# # Make iterable list of markers
# mks = itertools.cycle(["<", "+", "o", 'D', 'x', '^', '*', '8', 's', 'p', 'v', 'X', '_', 'h'])
# self.markers = [next(mks) for i in range(len(self.principal_observations_df[self.observation_colname].unique()))]
示例6: get_ind_return
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def get_ind_return(data):
'''
将从xlsx中读取出来按列拼接好的数据进行重组,计算出每个行业每个月的收益率
:param [DataFrame] data: 从xlsx文件中读取的月份-交易数据
:return: [DataFrame] ind_ret: 月份*行业 每个行业每个月的收益率
'''
# 读入stk_ind_pair.xlsx,用作股票和其所属行业的对照表
stk_ind = pd.read_excel('E:\\QuantProject2\\temp_data\\stk_ind_pair.xlsx')
# 把stk_ind里面股票代码数字部分后面的字母去掉
stk_ind.Stkcd = stk_ind.Stkcd.apply(lambda x: x[:6])
# 对stk_ind和data进行merge操作,将行业信息插入data
data = pd.merge(data, stk_ind, on='Stkcd')
# 按照月份和行业分组
groups = data.groupby(['Trdmnt', 'ind'])
# 分组计算每个月每个行业的总市值
total_Ms = groups['Msmvttl'].sum()
# 分组计算每个月每个行业按照市值加权的收益率
total_Mr=groups['total_Mr'].sum()
# 相除得到每个月每个行业的平均收益率
ind_ret=total_Mr/total_Ms
# 将ind_ret的内层level转换为列
ind_ret=ind_ret.unstack()
#将ind_ret标准化
ind_ret=pd.DataFrame(scale(ind_ret),columns=ind_ret.columns)
return ind_ret
示例7: do_pca
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def do_pca(X, c=3):
"""Do PCA"""
from sklearn import preprocessing
from sklearn.decomposition.pca import PCA, RandomizedPCA
#do PCA
#S = standardize_data(X)
S = pd.DataFrame(preprocessing.scale(X),columns = X.columns)
pca = PCA(n_components=c)
pca.fit(S)
print (pca.explained_variance_ratio_)
#print pca.components_
w = pd.DataFrame(pca.components_,columns=S.columns)#,index=['PC1','PC2'])
#print w.T.max(1).sort_values()
pX = pca.fit_transform(S)
pX = pd.DataFrame(pX,index=X.index)
return pX
示例8: train
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def train(train_data, outfile):
"""
:param train_data: A Batcher object that delivers batches of train data.
:param outfile: (str) Where to print results.
"""
outfile.write('day user red loss\n')
mat = train_data.next_batch()
while mat is not None:
datadict = {'features': mat[:, 3:], 'red': mat[:,2], 'user': mat[:,1], 'day': mat[:,0]}
batch = scale(datadict['features'])
pca = PCA(n_components=1)
pca.fit(batch)
data_reduced = np.dot(batch, pca.components_.T) # pca transform
data_original = np.dot(data_reduced, pca.components_) # inverse_transform
pointloss = np.mean(np.square(batch - data_original), axis=1)
loss = np.mean(pointloss)
for d, u, t, l, in zip(datadict['day'].tolist(), datadict['user'].tolist(),
datadict['red'].tolist(), pointloss.flatten().tolist()):
outfile.write('%s %s %s %s\n' % (d, u, t, l))
print('loss: %.4f' % loss)
mat = train_data.next_batch()
示例9: is_log_scale_needed
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def is_log_scale_needed(x_org):
x = np.array(x_org[~pd.isnull(x_org)])
# first scale on raw data
x = preprocessing.scale(x)
# second scale on log data
x_log = preprocessing.scale(np.log(x - np.min(x) + 1))
# the old approach, let's check how new approach will work
# original_skew = np.abs(stats.skew(x))
# log_skew = np.abs(stats.skew(x_log))
# return log_skew < original_skew
########################################################################
# p is probability of being normal distributions
k2, p1 = stats.normaltest(x)
k2, p2 = stats.normaltest(x_log)
return p2 > p1
示例10: setUpClass
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def setUpClass(cls):
cls.X, cls.y = datasets.make_regression(
n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
)
cls.params = {
"dense_layers": 2,
"dense_1_size": 8,
"dense_2_size": 4,
"dropout": 0,
"learning_rate": 0.01,
"momentum": 0.9,
"decay": 0.001,
"ml_task": "regression"
}
cls.y = preprocessing.scale(cls.y)
示例11: train
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def train(self, df, shuffle=True, preprocess=False, *args, **kwargs):
"""
Takes a dataframe of features + a 'label' column and trains the lobe
"""
if self._trained:
logger.warning('Overwriting an already trained brain!')
self._trained = False
# shuffle data for good luck
if shuffle:
df = shuffleDataFrame(df)
# scale train data and fit lobe
x = df.drop('label', axis=1).values
y = df['label'].values
del df
if preprocess:
x = preprocessing.scale(x)
logger.info('Training with %d samples', len(x))
self.lobe.fit(x, y)
self._trained = True
示例12: pre_processing
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def pre_processing(dataset_file_list, pre_process_paras):
""" pre-processing of multiple datasets
Args:
dataset_file_list: list of filenames of datasets
pre_process_paras: dict, parameters for pre-processing
Returns:
dataset_list: list of datasets
"""
# parameters
take_log = pre_process_paras['take_log']
standardization = pre_process_paras['standardization']
scaling = pre_process_paras['scaling']
dataset_list = []
for data_file in dataset_file_list:
dataset = read_csv(data_file, take_log)
if standardization:
scale(dataset['gene_exp'], axis=1, with_mean=True, with_std=True, copy=False)
if scaling: # scale to [0,1]
minmax_scale(dataset['gene_exp'], feature_range=(0, 1), axis=1, copy=False)
dataset_list.append(dataset)
dataset_list = intersect_dataset(dataset_list) # retain intersection of gene symbols
return dataset_list
示例13: estimate_k
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def estimate_k(data):
"""
Estimate number of groups k:
based on random matrix theory (RTM), borrowed from SC3
input data is (p,n) matrix, p is feature, n is sample
"""
p, n = data.shape
if type(data) is not np.ndarray:
data = data.toarray()
x = scale(data)
muTW = (np.sqrt(n-1) + np.sqrt(p)) ** 2
sigmaTW = (np.sqrt(n-1) + np.sqrt(p)) * (1/np.sqrt(n-1) + 1/np.sqrt(p)) ** (1/3)
sigmaHatNaive = x.T.dot(x)
bd = np.sqrt(p) * sigmaTW + muTW
evals = np.linalg.eigvalsh(sigmaHatNaive)
k = 0
for i in range(len(evals)):
if evals[i] > bd:
k += 1
return k
示例14: kmeans_elbow
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def kmeans_elbow(data):
bin_ = Bin(0, 0)
# processed_data = scale(data)
data = np.array(data)
bin_.fit(data)
processed_data = bin_.transform(data)
# processed_data = scale(data)
inertias = []
for k in K_RANGE:
kmeans = KMeans(init='k-means++', n_clusters=k)
kmeans.fit(processed_data)
inertias.append(kmeans.inertia_)
fig = plt.figure()
plt.scatter(K_RANGE, inertias)
plt.plot(K_RANGE, inertias)
fig.savefig('kmeans-elbow.png')
示例15: scale
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import scale [as 别名]
def scale(self, scale_val=100.):
""" Scale all values such that they are on the range [0, scale_val],
via grand-mean scaling. This is NOT global-scaling/intensity
normalization. This is useful for ensuring that data is on a
common scale (e.g. good for multiple runs, participants, etc)
and if the default value of 100 is used, can be interpreted as
something akin to (but not exactly) "percent signal change."
This is consistent with default behavior in AFNI and SPM.
Change this value to 10000 to make consistent with FSL.
Args:
scale_val: (int/float) what value to send the grand-mean to;
default 100
"""
out = deepcopy(self)
out.data = out.data / out.data.mean() * scale_val
return out