Python DataLoader.save方法代码示例

本文整理汇总了Python中DataLoader.save方法的典型用法代码示例。


示例1: get_cs

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def get_cs(data_path, cs_path):
    # dp compute cheat sheet
    cs = None
    if os.path.isfile(cs_path):
        cs = loader.load_pickle_file(cs_path)
        print('CS loaded.')
        print('Start compute cs.')
        data = loader.load_pickle_file(data_path)
        cs = dp_compute_cs(data[0])
        loader.save(cs_path, cs)
        print('CS saved.')
    return cs

示例2: random_select_data

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def random_select_data(tr_save_path, sel_tr_save_path, percent):
    all_tr = loader.load_pickle_file(tr_save_path)

    tr_l_ind_dict = {}
    selected_tr_data = [[], []]
    for i in range(10):
        tr_l_ind_dict[i] = [l_ind for l_ind, l in enumerate(all_tr[1]) if l == i]
    for i in range(10):
        i_n = len(tr_l_ind_dict[i])
        pick_n = int(percent * i_n)
        cur_pick_ind = np.random.choice(tr_l_ind_dict[i], pick_n, replace=False).tolist()
        selected_tr_data[0].extend([x for x_ind, x in enumerate(all_tr[0]) if x_ind in cur_pick_ind])
        selected_tr_data[1].extend([y for y_ind, y in enumerate(all_tr[1]) if y_ind in cur_pick_ind])
    loader.save(sel_tr_save_path, selected_tr_data)

示例3: compute_feature_mean

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def compute_feature_mean(features, save_path):
    n, d = np.shape(features)

    means = []
    for i in range(d):

        cur_f = features[:, i]

        # cur_mean = 0
        # for f in features:
        #     if not np.isnan(f[i]):
        #         cur_mean += f[i]
        # means.append(cur_mean / n)
    means = np.array(means)
    loader.save(save_path, means)
    return means

示例4: abstract_features

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def abstract_features(data_path, cs_path, rects_path, res_path):
    # get cs
    cs = get_cs(data_path, cs_path)
    rects = loader.load_pickle_file(rects_path)

    # 2 features for each rectangle
    features = []
    for i, ccs in enumerate(cs):
        f = []
        for rect in rects:
            f.extend(compute_feature_with_cs(rect, ccs))
        print('{} rects finished.'.format(i))

    # combine with labels
    label = loader.load_pickle_file(data_path)[1]
    f_l = [np.array(features), label]
    loader.save(res_path, f_l)

    return f_l

示例5: get_ecoc

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def get_ecoc(ecoc_path, num_ecoc, class_num):
    if path.isfile(ecoc_path):
        print('Loading the ecoc...')
        best_ecoc = loader.load_pickle_file(ecoc_path)
        print('Creating the ecoc...')
        best_ecoc = [0, [], []]     # distance, ecoc for training, ecoc for predicting
        for i in range(100):
            n = int(math.pow(2, num_ecoc))
            codes = choice(n, class_num)
            ecoc_func_codes = []
            for i in range(num_ecoc):
            c_ecoc = []
            for c in codes:
                bin_s = '{0:0' + str(num_ecoc) + '10b}'.format(c)
                bin_s = [int(ss) for ss in bin_s]
                for i in range(num_ecoc):
            c_hamming_dist = 0
            has_same_code = False
            for j in range(len(c_ecoc)):
                for k in range(len(c_ecoc)):
                    if j != k:
                        c_hd = hamming(c_ecoc[j], c_ecoc[k])
                        if c_hd == 0:
                            has_same_code = True
                        c_hamming_dist += c_hd
            if has_same_code:
            if c_hamming_dist > best_ecoc[0]:
                best_ecoc[0] = c_hamming_dist
                best_ecoc[1] = ecoc_func_codes
                best_ecoc[2] = c_ecoc

        # serialize the best ecoc
        loader.save(ecoc_path, best_ecoc)
    return best_ecoc

示例6: random_select_rectangle

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def random_select_rectangle(h, w, n, pl, ph, save_path=None):

    :param h: height of the image in pixel
    :param w: width of the image in pixel
    :param n: number of rectangle
    :param pl: min pixels of each rectangle
    :param ph: max pixels of each rectangle
    sel_rects = []
    for i in range(n):
        a = -1
        while a < pl or a > ph:
            p1 = (random.randint(0, h - 1), random.randint(0, w - 1))
            p2 = (random.randint(0, h - 1), random.randint(0, w - 1))
            a = rect_area(p1, p2)
        sel_rects.append(((min(p1[0], p2[0]), min(p1[1], p2[1])), (max(p1[0], p2[0]), max(p1[1], p2[1]))))

    if save_path is not None:
        loader.save(save_path, sel_rects)

    return sel_rects

示例7: ecoc

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def ecoc():

    # training parameter
    c = 0.001
    tol = 0.01
    epsilon = 0.001
    # kernel = 'rbf'
    kernel = 'linear'

    # laod and preprocess training data
    print('Loading data...')
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data= loader.load_pickle_file(te_data_path)

    # randomly generate ECOC of 50 functions
    num_ecoc = 10
    class_num = 10
    best_ecoc = util.get_ecoc(ecoc_path, num_ecoc, class_num)

    # train 10 svm
    print('Begin training...')
    svms = []  # list of svm classifiers
    function_tr_err = []
    sst = time.time()
    for ind, c_ecoc in enumerate(best_ecoc[1]):
        st = time.time()
        # prepare label
        c_label = [-1 if c_ecoc[l] == 0 else 1 for l in tr_data[1]]
        clf = svm.SVM(C=c, tol=tol, epsilon=epsilon, kernel=kernel)
        clf.fit(tr_data[0], c_label)
        tr_pred = clf.predict(tr_data)
        tr_acc = (c_label == tr_pred).sum() / tr_data[0].shape[0]
        print('{} Function {} done. Final results. Train acc: {}'.format(time.time() - st, ind, tr_acc))

    print('{} Training finished.'.format(time.time() - sst))
    loader.save(model_path, svms)

示例8: open

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels

if __name__ == '__main__':
    # load and store the training data
    data_path = 'data\\digits'
    tr_save_path = 'data\\digits\\tr_data.pickle'
    te_save_path = 'data\\digits\\te_data.pickle'
    save_path = tr_save_path
    # images, labels = load_mnist('testing', path=data_path)
    images, labels = load_mnist('training', path=data_path)
    loader.save(save_path, (images, labels))
    # imshow(images.mean(axis=0), cmap=cm.gray)
    # show()

示例9: main

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]

    # TODO convert labels from {0, 1} to {-1, 1}
    print('{:.2f} Label converted!'.format(time.time() - st))

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)
    print('{:.2f} Thresholds loaded!'.format(time.time() - st))
    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    ranked_f = None
    roc = []
    auc = 0.0
    thresh_cs = None

    tr_n, f_d = np.shape(tr_data[0])
    te_n, = np.shape(te_data[1])
    # TODO prepare distribution
    d = util.init_distribution(len(tr_data[0]))

    # TODO compute thresholds cheat sheet (not a solution due to huge thresh_cs table)
    # thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
    # print('{:.2f} Thresholds cheat sheet computed!'.format(time.time() - st))

    boost = b.Boosting(d)
    testing_predict = np.zeros((1, te_n)).tolist()[0]
    training_predict = np.zeros((1, tr_n)).tolist()[0]
    round_tr_err = []
    round_te_err = []
    round_model_err = []
    round_te_auc = []
    converged = False
    tol = 1e-5
    te_auc = 2.
    round = 0
    while round < round_limit:  # and not converged:
        round += 1
        boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
        boost.update_predict(tr_data[0], training_predict)
        boost.update_predict(te_data[0], testing_predict)
        c_model_err = boost.model[-1].w_err
        c_f_ind = boost.model[-1].f_ind
        c_thresh = boost.model[-1].thresh
        c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
        c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
        # TODO calculate the AUC for testing results
        # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
        # round_te_auc.append(c_te_auc)
        print('{:.2f} Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(time.time() - st, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
        # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
        # te_auc = c_te_auc

    # TODO get feature ranking from the predictions
    ranked_f = util.get_f_ranking_from_predictions(boost, threshes)
    round_err_1st_boost = round_model_err
    tr_errs_1st_boost = round_tr_err
    te_errs_1st_boost = round_te_err
    # te_auc_1st_boost = round_te_auc

    # _, te_roc_1st_boost = util.get_auc_from_predict(testing_predict, te_data[1], True)

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, mean_testing_err))
    print('Top 10 features: ')
    # print(ranked_f[:10])

    result = {}
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost
    result['rankedFeatures'] = ranked_f

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # store the model
    loader.save(model_path, boost)
    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

示例10: range

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
    means = []
    for i in range(d):

        cur_f = features[:, i]

        # cur_mean = 0
        # for f in features:
        #     if not np.isnan(f[i]):
        #         cur_mean += f[i]
        # means.append(cur_mean / n)
    means = np.array(means)
    loader.save(save_path, means)
    return means

if __name__ == '__main__':
    # generate means for the features, missing
    path = 'data/spam_polluted_missing/train/data.pickle'
    mean_path = 'data/spam_polluted_missing/train/f_mean.pickle'
    features = loader.load_pickle_file(path)[0]
    means = np.nanmean(features, axis=0)
    loader.save(mean_path, means)

    # generate means for the features, polluted
    # path = 'data/spam_polluted/train/data.pickle'
    # mean_path = 'data/spam_polluted/train/f_mean.pickle'
    # features = loader.load_pickle_file(path)[0]
    # means = np.nanmean(features, axis=0)
    # loader.save(mean_path, means)

示例11: convert_to_np_array

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def convert_to_np_array(path):
    data = loader.load_pickle_file(path)
    # convert labels
    np_label = np.array(data[1])
    np_features = np.array(data[0])
    loader.save(path, [np_features, np_label])

示例12: range

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
        for j in range(len(c_ecoc)):
            for k in range(len(c_ecoc)):
                if j != k:
                    c_hd = hamming(c_ecoc[j], c_ecoc[k])
                    if c_hd == 0:
                        has_same_code = True
                    c_hamming_dist += c_hd
        if has_same_code:
        if c_hamming_dist > best_ecoc[0]:
            best_ecoc[0] = c_hamming_dist
            best_ecoc[1] = ecoc_func_codes
            best_ecoc[2] = c_ecoc

    # serialize the best ecoc
    loader.save(ecoc_path, best_ecoc)

print('Init ecoc done!')

# train 50 boosts
print('Begin training...')
boosts = []
function_tr_err = []

max_round = 200
if wl_type == 'random_':
    max_round = 2000

for ind, c_ecoc in enumerate(best_ecoc[1]):
    print('Training function {}...'.format(ind))
    # TODO preprocess labels, so that labels match ecoc, {0, 1} -> {-1, 1}

示例13: print

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
print('{:.2f} Data loaded!'.format(time.time() - st))

tr_data[0] = tr_data[0].tolist()
te_data[0] = te_data[0].tolist()

# normalize features
prep.normalize_features_all(normalize_method, tr_data[0], te_data[0])
print('{:.2f} Features normalized!'.format(time.time() - st))

theta = None
is_batch = True
penalty = 'l2'  # l2 for RIDGE
alpha = 0.05
model = gd.LogisticRegressionGD(theta, penalty, alpha)
# model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch)
model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch, te_data[0], te_data[1])
training_acc = model.test(tr_data[0], tr_data[1], util.acc)
testing_acc = model.test(te_data[0], te_data[1], util.acc)

print('{} Final results. Train acc: {}, Test acc: {}'.format(time.time() - st, training_acc, testing_acc))

result = {}
result['TrainingAcc'] = training_acc
result['TestingAcc'] = testing_acc

# log the training result to file
util.write_result_to_file(result_path, model_name, result, True)

# save the model
loader.save(model_path, model)
print('{} Model saved.'.format(time.time() - st))

示例14: main

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
def main():
    # training parameter
    c = 0.1
    tol = 0.01
    epsilon = 0.001
    # kernel = 'rbf'
    kernel = 'linear'

    # laod and preprocess training data
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data= loader.load_pickle_file(te_data_path)

    # transpose label
    # tr_data[1] = np.transpose(tr_data[1])[0]
    # te_data[1] = np.transpose(te_data[1])[0]

    # load thresholds
    # threshes = loader.load_pickle_file(threshes_path)

    # start training
    tr_n = len(tr_data[0])
    te_n = len(te_data[1])

    # train 45 svm
    print('Begin training...')
    svm_dict = {}  # list of svm classifiers

    function_tr_err = []

    # test the svms
    test_pred_dict = {}

    st = time.time()
    # prepare 45 datasets
    fn_count = 0
    for i in range(9):
        svm_dict[i] = {}
        test_pred_dict[i] = {}
        for j in range(i + 1, 10):
            if i == j:
            # get training data for this class
            c_tr_f, c_tr_y = data_i_j(tr_data[0], tr_data[1], i, j)
            # train svm
            print('{:.2f} Start training.'.format(time.time() - st))
            clf = svm.SVM(C=c, tol=tol, epsilon=epsilon, kernel=kernel)
            clf.fit(c_tr_f, c_tr_y)
            tr_pred = clf.predict(c_tr_f)

            tr_acc = (c_tr_y == tr_pred).sum() / c_tr_f.shape[0]

            fn_count += 1
            print('{} Function {} done. Final results. Train acc: {}'.format(time.time() - st, fn_count, tr_acc))

            svm_dict[i][j] = clf

            te_pred = clf.predict(te_data[0])
            test_pred_dict[i][j] = te_pred

    print('{} Training finished.'.format(time.time() - st))
    loader.save(model_path, svm_dict)
    loader.save(te_pred_dict_path, test_pred_dict)

示例15: generate_thresholds

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import save [as 别名]
    # generate thresholds for spambase polluted train data
    # data_path = 'data/spam_polluted/train/data.pickle'
    # features_path = 'data/spam_polluted/train/train_feature.txt'
    # thresh_path = 'data/spambase_polluted.threshes'
    # label_path = 'data/spam_polluted/train/train_label.txt'
    # features = loader.load_dataset(features_path, False)
    # generate_thresholds(features, thresh_path)
    # label = loader.load_label(label_path)
    # loader.save(data_path, [features, label])

    # load and save spam polluted test data
    # data_save_path = 'data/spam_polluted/test/data.pickle'
    # features_path = 'data/spam_polluted/test/test_feature.txt'
    # label_path = 'data/spam_polluted/test/test_label.txt'
    # features = loader.load_dataset(features_path, False)
    # label = loader.load_label(label_path)
    # loader.save(data_save_path, [features, label])

    # load and save spam polluted missing test data
    # data_save_path = 'data/spam_polluted_missing/test/data.pickle'
    # data_path = 'data/spam_polluted_missing/test/20_percent_missing_test.txt'
    # data = loader.load_dataset(data_path)
    # loader.save(data_save_path, data)

    # load and save spam polluted missing train data
    data_save_path = 'data/spam_polluted_missing/train/data.pickle'
    data_path = 'data/spam_polluted_missing/train/20_percent_missing_train.txt'
    data = loader.load_dataset(data_path)
    loader.save(data_save_path, data)
