当前位置: 首页>>代码示例>>Python>>正文


Python DataLoader类代码示例

本文整理汇总了Python中DataLoader的典型用法代码示例。如果您正苦于以下问题:Python DataLoader类的具体用法?Python DataLoader怎么用?Python DataLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DataLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_data

def get_data():
    file_paths = DataLoader.get_all_files('Data')

    X_list, y_list = [], []

    for file_path in file_paths:
        data_frame = pd.read_csv(file_path)

        abstract_text, abstract_labels = DataLoader.extract_abstract_and_labels(data_frame)
        mesh_terms, title = DataLoader.extract_mesh_and_title(data_frame)

        X = []
        y = []

        for i in range(abstract_text.shape[0]):
            abstract_str = abstract_text[i]
            mesh_str = mesh_terms[i]
            title_str = title[i]
            label = abstract_labels[i]

            text = "".join([abstract_str, " ", mesh_str, " ", title_str])

            X.append(text)
            y.append(label)
        X_list.append(X)
        y_list.append(y)

    return X_list, y_list
开发者ID:ericrincon,项目名称:DeepAbstractRelevance,代码行数:28,代码来源:BaselineExperiment.py

示例2: predict

def predict():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """
    # load the saved model
    classifier = pickle.load(open("best_model.p", "rb"))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)

    # We can test it on some examples from test test
    dataset = 'mnist_train.csv'
    datasets = DataLoader.load_kaggle_mnist(dataset)

    test_set_x, test_set_y = datasets[2]
    print(type(test_set_x))
    print(type(test_set_y))
    test_set_x = test_set_x.get_value()
    test_set_y = test_set_y.eval()


    predicted_values = predict_model(test_set_x[20:30])
    print("Sample Neural Prediction")
    print ("Predicted values for the first 20 examples in test set:")
    print(predicted_values)
    print ("The actual values are")
    print(test_set_y[20:30])
开发者ID:hariravi,项目名称:KaggleMLYH,代码行数:30,代码来源:NeuralNet.py

示例3: predict_main

def predict_main(classifier_pickle):
    data = DataLoader.load_kaggle_mnist("mnist_train.csv", neural=False)
    X = numpy.array(data[2][0])
    X = X/255.0*2 - 1
    Y = numpy.array(data[2][1])
    predictor = MLutil.Predictor(classifier_pickle, 'SVM')
    predicted_values = predictor.make_prediction(X)

    predAnalysis = MLutil.PredictionAccuracies(predicted_values, Y)
    print(predAnalysis.get_misclass_rate())
    print(predAnalysis.get_indicies_misclassifications())

    pickle.dump(predAnalysis.get_indicies_misclassifications(), open("svm_indicies.p", "wb"))
    return predAnalysis.get_indicies_misclassifications()
开发者ID:hariravi,项目名称:KaggleMLYH,代码行数:14,代码来源:svm.py

示例4: main

def main():
    st = time.time()
    # training parameter
    result_path = 'results/PB2_A_spam_polluted_NB_Gaussian.acc'
    model_name = 'spam_'
    train_data_path = 'data/spam_polluted/train/data.pickle'
    test_data_path = 'data/spam_polluted/test/data.pickle'

    tr_data = loader.load_pickle_file(train_data_path)
    te_data = loader.load_pickle_file(test_data_path)
    print('{:.2f} Data loaded!'.format(time.time() - st))

    # start training
    print('{:.2f} Building model...'.format(time.time() - st))
    model = m.NBGaussian()
    model.build(tr_data[0], tr_data[1])

    print('{:.2f} Predicting...'.format(time.time() - st))
    tr_pred = model.predict(tr_data[0])
    te_pred = model.predict(te_data[0])

    print('{:.2f} Calculating results...'.format(time.time() - st))
    tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
    te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]


    print('{} Final results. Train acc: {}, Test acc: {}'.format(time.time() - st, tr_acc, te_acc))

    result = {}
    result['TrainingAcc'] = tr_acc
    result['TestingAcc'] = te_acc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)
开发者ID:Juncai,项目名称:CS6140,代码行数:34,代码来源:PB2_A_polluted_NB_Gaussian.py

示例5: build_model

def build_model(training_data, config):
    '''
    Build model from the config and training data
    '''
    m_type = config[c.CLSFR_TYPE]
    if m_type == c.DT_WITH_IG:
        # for decision tree
        # load thresholds
        threshs = loader.load_arrays(config[c.THRESHS])

        tree = Tree.Tree()
        tree.build(utils.split_on_ig, training_data[0],
                   training_data[1], threshs, config[c.TERM_CON], int(config[c.TERM_THRESH]))
        return tree
    elif m_type == c.REGRESSION_TREE:
        # for regression tree
        # load thresholds
        threshs = loader.load_arrays(config[c.THRESHS])

        tree = Tree.Tree()
        tree.build(utils.split_on_mse, training_data[0],
                  training_data[1], threshs, config[c.TERM_CON], float(config[c.TERM_THRESH]))
        return tree
    elif m_type == c.REGRESSION:
        # for linear regression
        reg_model = rmodel.Regression()
        reg_model.build(training_data[0], training_data[1])
        return reg_model
开发者ID:Juncai,项目名称:CS6140,代码行数:28,代码来源:ModelBuilder.py

示例6: main

def main():
    kernel = c.COSINE
    # training parameter
    result_path = 'results/PB2_spam.acc'
    model_name = 'digits_' + kernel

    tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
    te_data_path = 'data\\digits\\te_f_l_10.pickle'
    # laod and preprocess training data
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data = loader.load_pickle_file(te_data_path)

    # transpose label
    tr_data[1] = np.transpose(tr_data[1])[0]
    te_data[1] = np.transpose(te_data[1])[0]

    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
    # start training

    st = time.time()

    # start training
    print('{:.2f} Start training.'.format(time.time() - st))

    for r in (0.15, 0.1):
        clf = kNN.kNN(kernel=kernel, dataset=c.DS_DIGITS)
        clf.fit(tr_data[0], tr_data[1])
        tr_pred = clf.predict(tr_data[0], r=r)
        te_pred = clf.predict(te_data[0], r=r)

        tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
        te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]

        print('{} Final results with kernel {} and r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, r, tr_acc, te_acc))
开发者ID:Juncai,项目名称:CS6140,代码行数:35,代码来源:PB2_A_digits.py

示例7: main

def main():

    target = 'v2'
    # training parameter
    k = 10  # fold
    layer_thresh = 2
    T = 50
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    k_folds = Preprocess.prepare_k_folds(training_data, k)
    tr_data, te_data = Preprocess.get_i_fold(k_folds, 0)
    f_cur = [x[0] for x in tr_data[0]]

    t = dt.DecisionTree()
    if target == 'v1':
        for i in range(100):
            h_y = t.compute_entropy(tr_data[1])
            thresh = threshes[0][30]
            ig = t.compute_ig(f_cur, tr_data[1], thresh, h_y)
    else:
        h_y = t.compute_entropy_v2(tr_data[1])
        thresh = threshes[0][0]
        ig = t.compute_ig_v2(f_cur, tr_data[1], thresh, h_y)
开发者ID:Juncai,项目名称:CS6140,代码行数:30,代码来源:PB6_test.py

示例8: main

def main():
    # training parameter
    result_path = 'results/housingLiR_1.mse'
    model_name = 'housing_shiftAndScale'
    # normalization = Preprocess.zero_mean_unit_var
    normalization = Preprocess.shift_and_scale
    # cols_not_norm = (0,7,12)
    cols_not_norm = []

    # laod and preprocess training data
    training_data = loader.load_dataset('data/housing_train.txt')
    testing_data = loader.load_dataset('data/housing_test.txt')
    Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], cols_not_norm)


    # start training
    model = rm.LinearRegression()
    model.build(training_data[0], training_data[1])
    training_mse = model.test(training_data[0], training_data[1], util.mse)
    testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
    print 'Error for training data is:'
    print training_mse
    print 'Error for testing data is:'
    print testing_mse

    result = {}
    result['TrainingMSE'] = str(training_mse)
    result['TestingMSE'] = str(testing_mse)
    result['Theta'] = str(model.theta)

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result)
开发者ID:Juncai,项目名称:CS6140,代码行数:32,代码来源:train_housing_LiR.py

示例9: test

def test():

    # laod and preprocess training data
    # tr_data = loader.load_pickle_file(tr_data_path)
    te_data= loader.load_pickle_file(te_data_path)
    model = loader.load_pickle_file(model_path)
    # te_pred_dict = loader.load_pickle_file(te_pred_dict_path)

    test_pred_dict = {}
    for i in range(9):
        test_pred_dict[i] = {}
        for j in range(i + 1, 10):
            if i == j:
                continue
            # get training data for this class
            clf = model[i][j]
            te_pred = clf.predict(te_data[0])
            test_pred_dict[i][j] = te_pred


    te_n = len(te_data[1])
    te_pred = np.zeros((1, te_n))[0]

    for i in range(te_n):
        votes = np.zeros((10,), dtype=np.int)
        for j in range(9):
            for k in range(j):
                votes[j] += 1 if test_pred_dict[k][j][i] == -1 else 0
            for kk in test_pred_dict[j]:
                votes[j] += 1 if test_pred_dict[j][kk][i] == 1 else 0
        count = np.bincount(votes)
        if count[-1] == 1:
            te_pred[i] = votes.argmax()
        else:
            te_pred[i] = votes.argmax()
            tie_ind = [votes.argmax()]
            cc = 0
            for ind_v, v in enumerate(votes):
                if v == votes.max():
                    if cc == 1:
                        tie_ind.append(ind_v)
                        break
                    else:
                        cc += 1
            te_pred[i] = tie_ind[0] if test_pred_dict[tie_ind[0]][tie_ind[1]][i] == 1 else tie_ind[1]
            print('{} Tie! {} wins.'.format(count[-1], te_pred[i]))


    acc = 0
    acc_n = 0
    for ind_l, l in enumerate(te_data[1]):
        acc += 1 if l == te_pred[ind_l] else 0

    acc /= te_n
    # acc = (te_data[1] == te_pred).sum() / te_n

    print('Acc: {}'.format(acc))
开发者ID:Juncai,项目名称:CS6140,代码行数:57,代码来源:PB3_digit_mySVM.py

示例10: get_cs

def get_cs(data_path, cs_path):
    # dp compute cheat sheet
    cs = None
    if os.path.isfile(cs_path):
        cs = loader.load_pickle_file(cs_path)
        print('CS loaded.')
    else:
        print('Start compute cs.')
        data = loader.load_pickle_file(data_path)
        cs = dp_compute_cs(data[0])
        loader.save(cs_path, cs)
        print('CS saved.')
    return cs
开发者ID:Juncai,项目名称:CS6140,代码行数:13,代码来源:PB5_prepare_data.py

示例11: random_select_data

def random_select_data(tr_save_path, sel_tr_save_path, percent):
    all_tr = loader.load_pickle_file(tr_save_path)

    tr_l_ind_dict = {}
    selected_tr_data = [[], []]
    for i in range(10):
        tr_l_ind_dict[i] = [l_ind for l_ind, l in enumerate(all_tr[1]) if l == i]
    for i in range(10):
        i_n = len(tr_l_ind_dict[i])
        pick_n = int(percent * i_n)
        cur_pick_ind = np.random.choice(tr_l_ind_dict[i], pick_n, replace=False).tolist()
        selected_tr_data[0].extend([x for x_ind, x in enumerate(all_tr[0]) if x_ind in cur_pick_ind])
        selected_tr_data[1].extend([y for y_ind, y in enumerate(all_tr[1]) if y_ind in cur_pick_ind])
    loader.save(sel_tr_save_path, selected_tr_data)
开发者ID:Juncai,项目名称:CS6140,代码行数:14,代码来源:PB5_prepare_data.py

示例12: main

def main():
    is_sklearn = False
    # kernel = c.COSINE
    # kernel = c.GAUSSIAN
    kernel = c.POLY
    # training parameter
    result_path = 'results/PB2_spam.acc'
    model_name = 'digits_' + kernel
    model_path = 'data/PB1_B_digits_sk_Gaussian_1.model'

    # tr_data_path = 'data\\digits\\tr_f_l.pickle'
    # te_data_path = 'data\\digits\\te_f_l.pickle'
    tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
    te_data_path = 'data\\digits\\te_f_l_10.pickle'
    # laod and preprocess training data
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data = loader.load_pickle_file(te_data_path)

    # transpose label
    tr_data[1] = np.transpose(tr_data[1])[0]
    te_data[1] = np.transpose(te_data[1])[0]

    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])


    # start training
    models = []
    st = time.time()

    # start training
    print('{:.2f} Start training.'.format(time.time() - st))

    for k in (1, 3, 7):
        if not is_sklearn:
            clf = kNN.kNN(kernel=kernel)
            clf.fit(tr_data[0], tr_data[1])
            tr_pred = clf.predict(tr_data[0], k=k)
            te_pred = clf.predict(te_data[0], k=k)
        else:
            clf = KNeighborsClassifier(n_neighbors=k, metric=cosine_distances)
            clf.fit(tr_data[0], tr_data[1])
            tr_pred = clf.predict(tr_data[0])
            te_pred = clf.predict(te_data[0])

        tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
        te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
        models.append(clf)
        print('{} Final results with kernel {} and k={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, k, tr_acc, te_acc))
开发者ID:Juncai,项目名称:CS6140,代码行数:49,代码来源:PB1_B_digits_kNN.py

示例13: get_distance

def get_distance(directory) :
    '''returns the distance in m'''
    file = directory + '/info.dat'
    info = dl.load(file)[0]
    distancestr = info['Distance']
    distance = 0.01*float(distancestr.replace('cm',''))
    return distance
开发者ID:fillchen,项目名称:litoralis-synchronization,代码行数:7,代码来源:simple-plot-transferfunction-date.py

示例14: compute_feature_mean

def compute_feature_mean(features, save_path):
    n, d = np.shape(features)

    means = []
    for i in range(d):

        cur_f = features[:, i]
        means.append(np.nanmean(cur_f))

        # cur_mean = 0
        # for f in features:
        #     if not np.isnan(f[i]):
        #         cur_mean += f[i]
        # means.append(cur_mean / n)
    means = np.array(means)
    loader.save(save_path, means)
    return means
开发者ID:Juncai,项目名称:CS6140,代码行数:17,代码来源:prepare_data.py

示例15: train

    def train(self, X, y, model, batch_generator, n_epochs=50, optim_algo='adam',
              criterion='categorical_crossentropy', save_model=True, verbose=2,
              plot=True, batch_size=64,):

        if optim_algo == 'adam':
            optim_algo = Adam()
        elif optim_algo == 'sgd':
            optim_algo = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
        elif optim_algo == 'adagrad':
            optim_algo = Adagrad()

        self.model.compile(optimizer=optim_algo, loss=criterion)

        loss_train_history = []
        loss_val_history = []
        batch_history = {'f1': [], 'recall': [], 'precision': []}

        for epoch in range(1, n_epochs + 1):
            batch_f1_history = []
            batch_precision_history = []
            batch_recall_history = []

            for X, y in batch_generator.next_batch():
                history = self.model.fit(X, y, nb_epoch=1, batch_size=batch_size,
                                         validation_split=0.2, verbose=0)

                val_loss, loss = history.history['val_loss'][0], history.history['loss'][0]

                loss_train_history.append(loss)
                loss_val_history.append(val_loss)

                truth = self.model.validation_data[3]
                truth = dl.onehot2list(truth)
                batch_prediction = self.predict_classes(self.model.validation_data[0:3])

                batch_f1 = metrics.f1_score(truth, batch_prediction)
                batch_recall = metrics.recall_score(truth, batch_prediction)
                batch_precision = metrics.precision_score(truth, batch_prediction)

                batch_f1_history.append(batch_f1)
                batch_recall_history.append(batch_recall)
                batch_precision_history.append(batch_precision)

            batch_history['f1'].append(batch_f1_history)
            batch_history['recall'].append(batch_recall_history)
            batch_history['precision'].append(batch_precision_history)

            print('Epoch: {} | Train loss: {} | Valid loss: {}'.format(epoch, loss, val_loss))
            print("Epoch Metrics | F1: {} | Recall {} | Precision: {}".format(np.mean(batch_history['f1'][epoch - 1]),
                                                                              np.mean(batch_history['recall'][epoch - 1]),
                                                                              np.mean(batch_history['precision'][epoch - 1])))
            a_max = np.argmax(batch_history['f1'][epoch - 1])
            print("Best F1 at Epoch {} Minibatch {}: {}\n".format(epoch, a_max, batch_history['f1'][epoch-1][a_max]))


        if save_model:
            self.model.save_weights(self.model_name + '.h5', overwrite=True)
开发者ID:ericrincon,项目名称:DeepAbstractRelevance,代码行数:57,代码来源:CNN.py


注:本文中的DataLoader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。