本文整理汇总了Python中Preprocess.normalize_features_all方法的典型用法代码示例。如果您正苦于以下问题:Python Preprocess.normalize_features_all方法的具体用法?Python Preprocess.normalize_features_all怎么用?Python Preprocess.normalize_features_all使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Preprocess
的用法示例。
在下文中一共展示了Preprocess.normalize_features_all方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main():
# training parameter
result_path = 'results/housingLiR_1.mse'
model_name = 'housing_shiftAndScale'
# normalization = Preprocess.zero_mean_unit_var
normalization = Preprocess.shift_and_scale
# cols_not_norm = (0,7,12)
cols_not_norm = []
# laod and preprocess training data
training_data = loader.load_dataset('data/housing_train.txt')
testing_data = loader.load_dataset('data/housing_test.txt')
Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], cols_not_norm)
# start training
model = rm.LinearRegression()
model.build(training_data[0], training_data[1])
training_mse = model.test(training_data[0], training_data[1], util.mse)
testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
print 'Error for training data is:'
print training_mse
print 'Error for testing data is:'
print testing_mse
result = {}
result['TrainingMSE'] = str(training_mse)
result['TestingMSE'] = str(testing_mse)
result['Theta'] = str(model.theta)
# log the training result to file
util.write_result_to_file(result_path, model_name, result)
示例2: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main():
kernel = c.COSINE
# training parameter
result_path = 'results/PB2_spam.acc'
model_name = 'digits_' + kernel
tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
te_data_path = 'data\\digits\\te_f_l_10.pickle'
# laod and preprocess training data
tr_data = loader.load_pickle_file(tr_data_path)
te_data = loader.load_pickle_file(te_data_path)
# transpose label
tr_data[1] = np.transpose(tr_data[1])[0]
te_data[1] = np.transpose(te_data[1])[0]
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
# start training
st = time.time()
# start training
print('{:.2f} Start training.'.format(time.time() - st))
for r in (0.15, 0.1):
clf = kNN.kNN(kernel=kernel, dataset=c.DS_DIGITS)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0], r=r)
te_pred = clf.predict(te_data[0], r=r)
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
print('{} Final results with kernel {} and r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, r, tr_acc, te_acc))
示例3: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main():
is_sklearn = False
# kernel = c.COSINE
# kernel = c.GAUSSIAN
kernel = c.POLY
# training parameter
result_path = 'results/PB2_spam.acc'
model_name = 'digits_' + kernel
model_path = 'data/PB1_B_digits_sk_Gaussian_1.model'
# tr_data_path = 'data\\digits\\tr_f_l.pickle'
# te_data_path = 'data\\digits\\te_f_l.pickle'
tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
te_data_path = 'data\\digits\\te_f_l_10.pickle'
# laod and preprocess training data
tr_data = loader.load_pickle_file(tr_data_path)
te_data = loader.load_pickle_file(te_data_path)
# transpose label
tr_data[1] = np.transpose(tr_data[1])[0]
te_data[1] = np.transpose(te_data[1])[0]
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
# start training
models = []
st = time.time()
# start training
print('{:.2f} Start training.'.format(time.time() - st))
for k in (1, 3, 7):
if not is_sklearn:
clf = kNN.kNN(kernel=kernel)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0], k=k)
te_pred = clf.predict(te_data[0], k=k)
else:
clf = KNeighborsClassifier(n_neighbors=k, metric=cosine_distances)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0])
te_pred = clf.predict(te_data[0])
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
models.append(clf)
print('{} Final results with kernel {} and k={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, k, tr_acc, te_acc))
示例4: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main():
# training parameter
k = 8 # fold
result_path = 'results/PB2_spam.acc'
model_name = 'spam_' + str(k) + 'fold'
data_path = 'data/spam/data.pickle'
# laod and preprocess training data
training_data = loader.load_pickle_file(data_path)
# TODO convert labels from {0, 1} to {-1, 1}
# util.replace_zero_label_with_neg_one(training_data)
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, training_data[0])
# Preprocess.normalize_features_all(Preprocess.shifiat_and_scale, training_data[0])
# start training
training_accs = []
testing_accs = []
print('Preparing k fold data.')
k_folds = Preprocess.prepare_k_folds(training_data, k)
kernel = c.EUCLIDEAN
sst = time.time()
for i in (1,):
st = time.time()
tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
# start training
print('{:.2f} Start training.'.format(time.time() - st))
for r in (2.5, 2.7):
clf = kNN.kNN(kernel=kernel)
# clf.fit(training_data[0], training_data[1])
clf.fit(tr_data[0], tr_data[1])
# tr_pred = clf.predict(training_data[0], r=r)
tr_pred = clf.predict(tr_data[0], r=r)
te_pred = clf.predict(te_data[0], r=r)
# tr_acc = (training_data[1] == tr_pred).sum() / training_data[0].shape[0]
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
testing_accs.append(te_acc)
print('{} {}-fold results with kernel {}, r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, i, kernel, r, tr_acc, te_acc))
示例5: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main():
# training parameter
k = 10 # fold
result_path = "results/PB1_A_spam.acc"
model_name = "spam_" + str(k) + "fold"
threshes_path = "data/spambase.threshes"
data_path = "data/spam/data.pickle"
# kernel = 'poly'
kernel = "linear"
# kernel = 'rbf'
verbose = False
tol = 0.01
c = 0.1
# laod and preprocess training data
training_data = loader.load_pickle_file(data_path)
# TODO convert labels from {0, 1} to {-1, 1}
util.replace_zero_label_with_neg_one(training_data)
# normalize
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, training_data[0])
print("Preparing k fold data.")
k_folds = Preprocess.prepare_k_folds(training_data, k)
for i in range(1):
st = time.time()
tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
# start training
print("{:3f} Start training. Kernel: {}".format(time.time() - st, kernel))
clf = svm.SVC(C=c, kernel=kernel, tol=tol, verbose=verbose)
# clf = svm.NuSVC(kernel=kernel, tol=tol, verbose=verbose)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0])
te_pred = clf.predict(te_data[0])
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
print("{:3f} Final results. Train acc: {}, Test acc: {}".format(time.time() - st, tr_acc, te_acc))
示例6: open
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
from perceptron_dual import PerceptronDual
import csv
import Utilities as util
import numpy as np
import Consts as c
import Preprocess
data_file = 'data/twoSpirals.txt'
# load and preprocess data
features = []
labels = []
with open(data_file) as f:
for line in csv.reader(f, delimiter='\t'):
cur_l = int(float(line[-1]))
sign = 1
cur_f = [sign * float(l) for l in line[:-1]]
features.append(cur_f)
labels.append([cur_l])
features = np.array(features)
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, features)
# Preprocess.normalize_features_all(Preprocess.shift_and_scale, features)
labels = np.array(labels).transpose()[0]
# create perceptron
# kernel = c.LINEAR
kernel = c.GAUSSIAN
model = PerceptronDual(kernel_fun=kernel)
model.fit(features, labels)
示例7: print
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
# params
lamda = 0.5
tol = 0.92
normalize_method = prep.zero_mean_unit_var
term_method = util.acc_higher_than_ridge
# laod and preprocess training data
tr_data = loader.load_pickle_file(train_data_path)
te_data = loader.load_pickle_file(test_data_path)
print("{:.2f} Data loaded!".format(time.time() - st))
tr_data[0] = tr_data[0].tolist()
te_data[0] = te_data[0].tolist()
# normalize features
prep.normalize_features_all(normalize_method, tr_data[0], te_data[0])
print("{:.2f} Features normalized!".format(time.time() - st))
saved_model = loader.load_pickle_file(model_path) # load the model
theta = saved_model.theta
is_batch = True
penalty = "l2" # l2 for RIDGE
alpha = 0.05
model = gd.LogisticRegressionGD(theta, penalty, alpha)
# model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch)
model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch, te_data[0], te_data[1])
training_acc = model.test(tr_data[0], tr_data[1], util.acc)
testing_acc = model.test(te_data[0], te_data[1], util.acc)
print("{} Final results. Train acc: {}, Test acc: {}".format(time.time() - st, training_acc, testing_acc))
示例8:
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
# training parameter
result_path = 'results/housingLiRGD_1.mse'
model_name = 'housing'
lamda = 0.0001 # 0.000015
is_batch = False
# normalization = Preprocess.zero_mean_unit_var
normalization = Preprocess.shift_and_scale
term_fun = util.mse_less_than
term_thresh = 25
cols_not_norm = [0,7]
# laod and preprocess training data
training_data = loader.load_dataset('data/housing_train.txt')
testing_data = loader.load_dataset('data/housing_test.txt')
Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], not_norm=cols_not_norm)
# start training
model = gd.LinearRegressionGD()
model.build(training_data[0], training_data[1], lamda, term_fun, term_thresh, is_batch)
try:
pass
except KeyboardInterrupt:
print 'Interrupted'
finally:
training_mse = model.test(training_data[0], training_data[1], util.mse)
testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
print 'Error for training data is:'
print training_mse
print 'Error for testing data is:'
print testing_mse
示例9: str
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
import numpy as np
import Utilities as util
import RegressionModel as rm
import Consts as c
# training parameter
k = 50 # fold
result_path = "results/spamLiR_5.acc"
model_name = "spam_" + str(k) + "fold_zeroMean"
# normalization = Preprocess.zero_mean_unit_var
normalization = Preprocess.shift_and_scale
# laod and preprocess training data
training_data = loader.load_dataset("data/spambase.data")
Preprocess.normalize_features_all(normalization, training_data[0])
# start training
training_accs = []
training_cms = []
testing_accs = []
testing_cms = []
roc = []
auc = 0.0
for i in range(k):
(tr_data, te_data) = Preprocess.prepare_k_fold_data(training_data, k, i + 1)
model = rm.LinearRegression()
model.build(tr_data[0], tr_data[1])
training_test_res = model.test(tr_data[0], tr_data[1], util.compute_acc_confusion_matrix)
示例10: main
# 需要导入模块: import Preprocess [as 别名]
# 或者: from Preprocess import normalize_features_all [as 别名]
def main(config_path):
'''
Main script for classifier building and testing
'''
config = loader.load_config(config_path)
training_data = None
testing_data = None
# load training and testing data from files, normalize if necessary
if c.TRAINING_D in config.keys():
training_data = loader.load_dataset(config[c.TRAINING_D])
if c.TESTING_D in config.keys():
testing_data = loader.load_dataset(config[c.TESTING_D])
if c.NORM_METHOD in config.keys():
method = None
if config[c.NORM_METHOD] == c.SHIFT_SCALE:
method = Preprocess.shift_and_scale
elif config[c.NORM_METHOD] == c.ZERO_MEAN_UNIT_VAR:
method = Preprocess.zero_mean_unit_var
if c.TESTING_D in config.keys():
Preprocess.normalize_features_all(method, training_data[0], testing_data[0])
else:
Preprocess.normalize_features_all(method, training_data[0])
# generate thresholds file if needed
if c.THRESHS in config.keys() and not os.path.isfile(config[c.THRESHS]):
Preprocess.generate_thresholds(training_data[0], config[c.THRESHS])
# get path to store models and output results
model_path = config[c.MODEL_PATH]
output_path = config[c.OUTPUT_PATH]
# use different validation method base on the config
match = re.match(c.K_FOLD_RE, config[c.VALID_METHOD])
if match:
# perform k-fold validation
k = int(match.group(c.K_GROUP))
training_errs = []
testing_errs = []
for i in range(k):
(tr_data, te_data) = Preprocess.prepare_k_fold_data(training_data, k, i + 1)
model = builder.build_model(tr_data, config)
training_errs.append(model.test(tr_data[0], tr_data[1], Utilities.get_test_method(config)))
testing_errs.append(model.test(te_data[0], te_data[1], Utilities.get_test_method(config)))
mean_training_err = np.mean(training_errs)
mean_testing_err = np.mean(testing_errs)
print str(k) + '-fold validation done. Training errors are:'
print training_errs
print 'Mean training error is:'
print mean_training_err
print 'Testing errors are:'
print testing_errs
print 'Mean testing error is:'
print mean_testing_err
config['TrainingErrs'] = str(training_errs)
config['MeanTrainingErr'] = str(mean_training_err)
config['TestingErrs'] = str(testing_errs)
config['MeanTestingErr'] = str(mean_testing_err)
elif config[c.VALID_METHOD] == c.HAS_TESTING_DATA:
# perform testing with given testing dataset
model = builder.build_model(training_data, config)
training_err = model.test(training_data[0], training_data[1], Utilities.get_test_method(config))
testing_err = model.test(testing_data[0], testing_data[1], Utilities.get_test_method(config))
print 'Error for training data is:'
print training_err
print 'Error for testing data is:'
print testing_err
config['TrainingErr'] = str(training_err)
config['TestingErr'] = str(testing_err)
# Log the err
f = open(output_path, 'w+')
f.write(str(config))
f.close()
return