本文整理汇总了Python中DataLoader.save方法的典型用法代码示例。如果您正苦于以下问题:Python DataLoader.save方法的具体用法?Python DataLoader.save怎么用?Python DataLoader.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DataLoader
的用法示例。
在下文中一共展示了DataLoader.save方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_cs
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def get_cs(data_path, cs_path):
# dp compute cheat sheet
cs = None
if os.path.isfile(cs_path):
cs = loader.load_pickle_file(cs_path)
print('CS loaded.')
else:
print('Start compute cs.')
data = loader.load_pickle_file(data_path)
cs = dp_compute_cs(data[0])
loader.save(cs_path, cs)
print('CS saved.')
return cs
示例2: random_select_data
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def random_select_data(tr_save_path, sel_tr_save_path, percent):
all_tr = loader.load_pickle_file(tr_save_path)
tr_l_ind_dict = {}
selected_tr_data = [[], []]
for i in range(10):
tr_l_ind_dict[i] = [l_ind for l_ind, l in enumerate(all_tr[1]) if l == i]
for i in range(10):
i_n = len(tr_l_ind_dict[i])
pick_n = int(percent * i_n)
cur_pick_ind = np.random.choice(tr_l_ind_dict[i], pick_n, replace=False).tolist()
selected_tr_data[0].extend([x for x_ind, x in enumerate(all_tr[0]) if x_ind in cur_pick_ind])
selected_tr_data[1].extend([y for y_ind, y in enumerate(all_tr[1]) if y_ind in cur_pick_ind])
loader.save(sel_tr_save_path, selected_tr_data)
示例3: compute_feature_mean
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def compute_feature_mean(features, save_path):
n, d = np.shape(features)
means = []
for i in range(d):
cur_f = features[:, i]
means.append(np.nanmean(cur_f))
# cur_mean = 0
# for f in features:
# if not np.isnan(f[i]):
# cur_mean += f[i]
# means.append(cur_mean / n)
means = np.array(means)
loader.save(save_path, means)
return means
示例4: abstract_features
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def abstract_features(data_path, cs_path, rects_path, res_path):
# get cs
cs = get_cs(data_path, cs_path)
rects = loader.load_pickle_file(rects_path)
# 2 features for each rectangle
features = []
for i, ccs in enumerate(cs):
f = []
for rect in rects:
f.extend(compute_feature_with_cs(rect, ccs))
features.append(f)
print('{} rects finished.'.format(i))
# combine with labels
label = loader.load_pickle_file(data_path)[1]
f_l = [np.array(features), label]
loader.save(res_path, f_l)
return f_l
示例5: get_ecoc
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def get_ecoc(ecoc_path, num_ecoc, class_num):
if path.isfile(ecoc_path):
print('Loading the ecoc...')
best_ecoc = loader.load_pickle_file(ecoc_path)
else:
print('Creating the ecoc...')
best_ecoc = [0, [], []] # distance, ecoc for training, ecoc for predicting
for i in range(100):
n = int(math.pow(2, num_ecoc))
codes = choice(n, class_num)
ecoc_func_codes = []
for i in range(num_ecoc):
ecoc_func_codes.append([])
c_ecoc = []
for c in codes:
bin_s = '{0:0' + str(num_ecoc) + '10b}'.format(c)
bin_s = [int(ss) for ss in bin_s]
c_ecoc.append(bin_s)
for i in range(num_ecoc):
ecoc_func_codes[i].append(bin_s[i])
c_hamming_dist = 0
has_same_code = False
for j in range(len(c_ecoc)):
for k in range(len(c_ecoc)):
if j != k:
c_hd = hamming(c_ecoc[j], c_ecoc[k])
if c_hd == 0:
has_same_code = True
c_hamming_dist += c_hd
if has_same_code:
continue
if c_hamming_dist > best_ecoc[0]:
best_ecoc[0] = c_hamming_dist
best_ecoc[1] = ecoc_func_codes
best_ecoc[2] = c_ecoc
# serialize the best ecoc
loader.save(ecoc_path, best_ecoc)
return best_ecoc
示例6: random_select_rectangle
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def random_select_rectangle(h, w, n, pl, ph, save_path=None):
'''
:param h: height of the image in pixel
:param w: width of the image in pixel
:param n: number of rectangle
:param pl: min pixels of each rectangle
:param ph: max pixels of each rectangle
:return:
'''
sel_rects = []
for i in range(n):
a = -1
while a < pl or a > ph:
p1 = (random.randint(0, h - 1), random.randint(0, w - 1))
p2 = (random.randint(0, h - 1), random.randint(0, w - 1))
a = rect_area(p1, p2)
sel_rects.append(((min(p1[0], p2[0]), min(p1[1], p2[1])), (max(p1[0], p2[0]), max(p1[1], p2[1]))))
if save_path is not None:
loader.save(save_path, sel_rects)
return sel_rects
示例7: ecoc
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def ecoc():
# training parameter
c = 0.001
tol = 0.01
epsilon = 0.001
# kernel = 'rbf'
kernel = 'linear'
# laod and preprocess training data
print('Loading data...')
tr_data = loader.load_pickle_file(tr_data_path)
te_data= loader.load_pickle_file(te_data_path)
# randomly generate ECOC of 50 functions
num_ecoc = 10
class_num = 10
best_ecoc = util.get_ecoc(ecoc_path, num_ecoc, class_num)
# train 10 svm
print('Begin training...')
svms = [] # list of svm classifiers
function_tr_err = []
sst = time.time()
for ind, c_ecoc in enumerate(best_ecoc[1]):
st = time.time()
# prepare label
c_label = [-1 if c_ecoc[l] == 0 else 1 for l in tr_data[1]]
clf = svm.SVM(C=c, tol=tol, epsilon=epsilon, kernel=kernel)
clf.fit(tr_data[0], c_label)
tr_pred = clf.predict(tr_data)
tr_acc = (c_label == tr_pred).sum() / tr_data[0].shape[0]
print('{} Function {} done. Final results. Train acc: {}'.format(time.time() - st, ind, tr_acc))
svms.append(clf)
print('{} Training finished.'.format(time.time() - sst))
loader.save(model_path, svms)
示例8: open
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
flbl.close()
fimg = open(fname_img, 'rb')
magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = pyarray("B", fimg.read())
fimg.close()
ind = [ k for k in range(size) if lbl[k] in digits ]
N = len(ind)
images = zeros((N, rows, cols), dtype=uint8)
labels = zeros((N, 1), dtype=int8)
for i in range(len(ind)):
images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
labels[i] = lbl[ind[i]]
return images, labels
if __name__ == '__main__':
# load and store the training data
data_path = 'data\\digits'
tr_save_path = 'data\\digits\\tr_data.pickle'
te_save_path = 'data\\digits\\te_data.pickle'
save_path = tr_save_path
# images, labels = load_mnist('testing', path=data_path)
images, labels = load_mnist('training', path=data_path)
loader.save(save_path, (images, labels))
# imshow(images.mean(axis=0), cmap=cm.gray)
# show()
示例9: main
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
#.........这里部分代码省略.........
# TODO convert labels from {0, 1} to {-1, 1}
util.replace_zero_label_with_neg_one(tr_data)
util.replace_zero_label_with_neg_one(te_data)
print('{:.2f} Label converted!'.format(time.time() - st))
# load thresholds
threshes = loader.load_pickle_file(threshes_path)
print('{:.2f} Thresholds loaded!'.format(time.time() - st))
# start training
training_errs = []
testing_errs = []
round_err_1st_boost = None
tr_errs_1st_boost = None
te_errs_1st_boost = None
te_auc_1st_boost = None
te_roc_1st_boost = None
ranked_f = None
roc = []
auc = 0.0
thresh_cs = None
tr_n, f_d = np.shape(tr_data[0])
te_n, = np.shape(te_data[1])
# TODO prepare distribution
d = util.init_distribution(len(tr_data[0]))
# TODO compute thresholds cheat sheet (not a solution due to huge thresh_cs table)
# thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
# print('{:.2f} Thresholds cheat sheet computed!'.format(time.time() - st))
boost = b.Boosting(d)
testing_predict = np.zeros((1, te_n)).tolist()[0]
training_predict = np.zeros((1, tr_n)).tolist()[0]
round_tr_err = []
round_te_err = []
round_model_err = []
round_te_auc = []
converged = False
tol = 1e-5
te_auc = 2.
round = 0
while round < round_limit: # and not converged:
round += 1
boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
boost.update_predict(tr_data[0], training_predict)
boost.update_predict(te_data[0], testing_predict)
c_model_err = boost.model[-1].w_err
round_model_err.append(c_model_err)
c_f_ind = boost.model[-1].f_ind
c_thresh = boost.model[-1].thresh
c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
# TODO calculate the AUC for testing results
# c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
round_tr_err.append(c_tr_err)
round_te_err.append(c_te_err)
# round_te_auc.append(c_te_auc)
print('{:.2f} Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(time.time() - st, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
# converged = abs(c_te_auc - te_auc) / te_auc <= tol
# te_auc = c_te_auc
training_errs.append(round_tr_err[-1])
testing_errs.append(round_te_err[-1])
# TODO get feature ranking from the predictions
ranked_f = util.get_f_ranking_from_predictions(boost, threshes)
round_err_1st_boost = round_model_err
tr_errs_1st_boost = round_tr_err
te_errs_1st_boost = round_te_err
# te_auc_1st_boost = round_te_auc
# _, te_roc_1st_boost = util.get_auc_from_predict(testing_predict, te_data[1], True)
# break # for testing
mean_training_err = np.mean(training_errs)
mean_testing_err = np.mean(testing_errs)
print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, mean_testing_err))
print('Top 10 features: ')
# print(ranked_f[:10])
result = {}
result['Trainingerrs'] = training_errs
result['MeanTrainingAcc'] = mean_training_err
result['Testingerrs'] = testing_errs
result['MeanTestingAcc'] = mean_testing_err
result['1stBoostTrainingError'] = tr_errs_1st_boost
result['1stBoostTestingError'] = te_errs_1st_boost
result['1stBoostModelError'] = round_err_1st_boost
result['1stBoostTestingAUC'] = te_auc_1st_boost
result['1stBoostTestingROC'] = te_roc_1st_boost
result['rankedFeatures'] = ranked_f
# result['ROC'] = str(roc)
result['AUC'] = auc
# store the model
loader.save(model_path, boost)
# log the training result to file
util.write_result_to_file(result_path, model_name, result, True)
示例10: range
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
means = []
for i in range(d):
cur_f = features[:, i]
means.append(np.nanmean(cur_f))
# cur_mean = 0
# for f in features:
# if not np.isnan(f[i]):
# cur_mean += f[i]
# means.append(cur_mean / n)
means = np.array(means)
loader.save(save_path, means)
return means
if __name__ == '__main__':
# generate means for the features, missing
path = 'data/spam_polluted_missing/train/data.pickle'
mean_path = 'data/spam_polluted_missing/train/f_mean.pickle'
features = loader.load_pickle_file(path)[0]
means = np.nanmean(features, axis=0)
loader.save(mean_path, means)
# generate means for the features, polluted
# path = 'data/spam_polluted/train/data.pickle'
# mean_path = 'data/spam_polluted/train/f_mean.pickle'
# features = loader.load_pickle_file(path)[0]
# means = np.nanmean(features, axis=0)
# loader.save(mean_path, means)
示例11: convert_to_np_array
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def convert_to_np_array(path):
data = loader.load_pickle_file(path)
# convert labels
np_label = np.array(data[1])
np_features = np.array(data[0])
loader.save(path, [np_features, np_label])
示例12: range
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
for j in range(len(c_ecoc)):
for k in range(len(c_ecoc)):
if j != k:
c_hd = hamming(c_ecoc[j], c_ecoc[k])
if c_hd == 0:
has_same_code = True
c_hamming_dist += c_hd
if has_same_code:
continue
if c_hamming_dist > best_ecoc[0]:
best_ecoc[0] = c_hamming_dist
best_ecoc[1] = ecoc_func_codes
best_ecoc[2] = c_ecoc
# serialize the best ecoc
loader.save(ecoc_path, best_ecoc)
print('Init ecoc done!')
# train 50 boosts
print('Begin training...')
boosts = []
function_tr_err = []
max_round = 200
if wl_type == 'random_':
max_round = 2000
for ind, c_ecoc in enumerate(best_ecoc[1]):
print('Training function {}...'.format(ind))
# TODO preprocess labels, so that labels match ecoc, {0, 1} -> {-1, 1}
示例13: print
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
print('{:.2f} Data loaded!'.format(time.time() - st))
tr_data[0] = tr_data[0].tolist()
te_data[0] = te_data[0].tolist()
# normalize features
prep.normalize_features_all(normalize_method, tr_data[0], te_data[0])
print('{:.2f} Features normalized!'.format(time.time() - st))
theta = None
is_batch = True
penalty = 'l2' # l2 for RIDGE
alpha = 0.05
model = gd.LogisticRegressionGD(theta, penalty, alpha)
# model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch)
model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch, te_data[0], te_data[1])
training_acc = model.test(tr_data[0], tr_data[1], util.acc)
testing_acc = model.test(te_data[0], te_data[1], util.acc)
print('{} Final results. Train acc: {}, Test acc: {}'.format(time.time() - st, training_acc, testing_acc))
result = {}
result['TrainingAcc'] = training_acc
result['TestingAcc'] = testing_acc
# log the training result to file
util.write_result_to_file(result_path, model_name, result, True)
# save the model
loader.save(model_path, model)
print('{} Model saved.'.format(time.time() - st))
示例14: main
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def main():
# training parameter
c = 0.1
tol = 0.01
epsilon = 0.001
# kernel = 'rbf'
kernel = 'linear'
# laod and preprocess training data
tr_data = loader.load_pickle_file(tr_data_path)
te_data= loader.load_pickle_file(te_data_path)
# transpose label
# tr_data[1] = np.transpose(tr_data[1])[0]
# te_data[1] = np.transpose(te_data[1])[0]
# load thresholds
# threshes = loader.load_pickle_file(threshes_path)
# start training
tr_n = len(tr_data[0])
te_n = len(te_data[1])
# train 45 svm
print('Begin training...')
svm_dict = {} # list of svm classifiers
function_tr_err = []
# test the svms
test_pred_dict = {}
st = time.time()
# prepare 45 datasets
fn_count = 0
for i in range(9):
svm_dict[i] = {}
test_pred_dict[i] = {}
for j in range(i + 1, 10):
if i == j:
continue
# get training data for this class
c_tr_f, c_tr_y = data_i_j(tr_data[0], tr_data[1], i, j)
# train svm
print('{:.2f} Start training.'.format(time.time() - st))
clf = svm.SVM(C=c, tol=tol, epsilon=epsilon, kernel=kernel)
clf.fit(c_tr_f, c_tr_y)
tr_pred = clf.predict(c_tr_f)
tr_acc = (c_tr_y == tr_pred).sum() / c_tr_f.shape[0]
fn_count += 1
print('{} Function {} done. Final results. Train acc: {}'.format(time.time() - st, fn_count, tr_acc))
svm_dict[i][j] = clf
te_pred = clf.predict(te_data[0])
test_pred_dict[i][j] = te_pred
print('{} Training finished.'.format(time.time() - st))
loader.save(model_path, svm_dict)
loader.save(te_pred_dict_path, test_pred_dict)
示例15: generate_thresholds
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
# generate thresholds for spambase polluted train data
# data_path = 'data/spam_polluted/train/data.pickle'
# features_path = 'data/spam_polluted/train/train_feature.txt'
# thresh_path = 'data/spambase_polluted.threshes'
# label_path = 'data/spam_polluted/train/train_label.txt'
# features = loader.load_dataset(features_path, False)
# generate_thresholds(features, thresh_path)
# label = loader.load_label(label_path)
# loader.save(data_path, [features, label])
# load and save spam polluted test data
# data_save_path = 'data/spam_polluted/test/data.pickle'
# features_path = 'data/spam_polluted/test/test_feature.txt'
# label_path = 'data/spam_polluted/test/test_label.txt'
# features = loader.load_dataset(features_path, False)
# label = loader.load_label(label_path)
# loader.save(data_save_path, [features, label])
# load and save spam polluted missing test data
# data_save_path = 'data/spam_polluted_missing/test/data.pickle'
# data_path = 'data/spam_polluted_missing/test/20_percent_missing_test.txt'
# data = loader.load_dataset(data_path)
# loader.save(data_save_path, data)
# load and save spam polluted missing train data
data_save_path = 'data/spam_polluted_missing/train/data.pickle'
data_path = 'data/spam_polluted_missing/train/20_percent_missing_train.txt'
data = loader.load_dataset(data_path)
loader.save(data_save_path, data)