本文整理汇总了Python中DataLoader类的典型用法代码示例。如果您正苦于以下问题:Python DataLoader类的具体用法?Python DataLoader怎么用?Python DataLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_data
def get_data():
file_paths = DataLoader.get_all_files('Data')
X_list, y_list = [], []
for file_path in file_paths:
data_frame = pd.read_csv(file_path)
abstract_text, abstract_labels = DataLoader.extract_abstract_and_labels(data_frame)
mesh_terms, title = DataLoader.extract_mesh_and_title(data_frame)
X = []
y = []
for i in range(abstract_text.shape[0]):
abstract_str = abstract_text[i]
mesh_str = mesh_terms[i]
title_str = title[i]
label = abstract_labels[i]
text = "".join([abstract_str, " ", mesh_str, " ", title_str])
X.append(text)
y.append(label)
X_list.append(X)
y_list.append(y)
return X_list, y_list
示例2: predict
def predict():
"""
An example of how to load a trained model and use it
to predict labels.
"""
# load the saved model
classifier = pickle.load(open("best_model.p", "rb"))
# compile a predictor function
predict_model = theano.function(
inputs=[classifier.input],
outputs=classifier.y_pred)
# We can test it on some examples from test test
dataset = 'mnist_train.csv'
datasets = DataLoader.load_kaggle_mnist(dataset)
test_set_x, test_set_y = datasets[2]
print(type(test_set_x))
print(type(test_set_y))
test_set_x = test_set_x.get_value()
test_set_y = test_set_y.eval()
predicted_values = predict_model(test_set_x[20:30])
print("Sample Neural Prediction")
print ("Predicted values for the first 20 examples in test set:")
print(predicted_values)
print ("The actual values are")
print(test_set_y[20:30])
示例3: predict_main
def predict_main(classifier_pickle):
data = DataLoader.load_kaggle_mnist("mnist_train.csv", neural=False)
X = numpy.array(data[2][0])
X = X/255.0*2 - 1
Y = numpy.array(data[2][1])
predictor = MLutil.Predictor(classifier_pickle, 'SVM')
predicted_values = predictor.make_prediction(X)
predAnalysis = MLutil.PredictionAccuracies(predicted_values, Y)
print(predAnalysis.get_misclass_rate())
print(predAnalysis.get_indicies_misclassifications())
pickle.dump(predAnalysis.get_indicies_misclassifications(), open("svm_indicies.p", "wb"))
return predAnalysis.get_indicies_misclassifications()
示例4: main
def main():
st = time.time()
# training parameter
result_path = 'results/PB2_A_spam_polluted_NB_Gaussian.acc'
model_name = 'spam_'
train_data_path = 'data/spam_polluted/train/data.pickle'
test_data_path = 'data/spam_polluted/test/data.pickle'
tr_data = loader.load_pickle_file(train_data_path)
te_data = loader.load_pickle_file(test_data_path)
print('{:.2f} Data loaded!'.format(time.time() - st))
# start training
print('{:.2f} Building model...'.format(time.time() - st))
model = m.NBGaussian()
model.build(tr_data[0], tr_data[1])
print('{:.2f} Predicting...'.format(time.time() - st))
tr_pred = model.predict(tr_data[0])
te_pred = model.predict(te_data[0])
print('{:.2f} Calculating results...'.format(time.time() - st))
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
print('{} Final results. Train acc: {}, Test acc: {}'.format(time.time() - st, tr_acc, te_acc))
result = {}
result['TrainingAcc'] = tr_acc
result['TestingAcc'] = te_acc
# log the training result to file
util.write_result_to_file(result_path, model_name, result, True)
示例5: build_model
def build_model(training_data, config):
'''
Build model from the config and training data
'''
m_type = config[c.CLSFR_TYPE]
if m_type == c.DT_WITH_IG:
# for decision tree
# load thresholds
threshs = loader.load_arrays(config[c.THRESHS])
tree = Tree.Tree()
tree.build(utils.split_on_ig, training_data[0],
training_data[1], threshs, config[c.TERM_CON], int(config[c.TERM_THRESH]))
return tree
elif m_type == c.REGRESSION_TREE:
# for regression tree
# load thresholds
threshs = loader.load_arrays(config[c.THRESHS])
tree = Tree.Tree()
tree.build(utils.split_on_mse, training_data[0],
training_data[1], threshs, config[c.TERM_CON], float(config[c.TERM_THRESH]))
return tree
elif m_type == c.REGRESSION:
# for linear regression
reg_model = rmodel.Regression()
reg_model.build(training_data[0], training_data[1])
return reg_model
示例6: main
def main():
kernel = c.COSINE
# training parameter
result_path = 'results/PB2_spam.acc'
model_name = 'digits_' + kernel
tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
te_data_path = 'data\\digits\\te_f_l_10.pickle'
# laod and preprocess training data
tr_data = loader.load_pickle_file(tr_data_path)
te_data = loader.load_pickle_file(te_data_path)
# transpose label
tr_data[1] = np.transpose(tr_data[1])[0]
te_data[1] = np.transpose(te_data[1])[0]
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
# start training
st = time.time()
# start training
print('{:.2f} Start training.'.format(time.time() - st))
for r in (0.15, 0.1):
clf = kNN.kNN(kernel=kernel, dataset=c.DS_DIGITS)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0], r=r)
te_pred = clf.predict(te_data[0], r=r)
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
print('{} Final results with kernel {} and r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, r, tr_acc, te_acc))
示例7: main
def main():
target = 'v2'
# training parameter
k = 10 # fold
layer_thresh = 2
T = 50
threshes_path = 'data/spambase.threshes'
# laod and preprocess training data
training_data = loader.load_dataset('data/spambase.data')
# load thresholds
threshes = loader.load_pickle_file(threshes_path)
# start training
k_folds = Preprocess.prepare_k_folds(training_data, k)
tr_data, te_data = Preprocess.get_i_fold(k_folds, 0)
f_cur = [x[0] for x in tr_data[0]]
t = dt.DecisionTree()
if target == 'v1':
for i in range(100):
h_y = t.compute_entropy(tr_data[1])
thresh = threshes[0][30]
ig = t.compute_ig(f_cur, tr_data[1], thresh, h_y)
else:
h_y = t.compute_entropy_v2(tr_data[1])
thresh = threshes[0][0]
ig = t.compute_ig_v2(f_cur, tr_data[1], thresh, h_y)
示例8: main
def main():
# training parameter
result_path = 'results/housingLiR_1.mse'
model_name = 'housing_shiftAndScale'
# normalization = Preprocess.zero_mean_unit_var
normalization = Preprocess.shift_and_scale
# cols_not_norm = (0,7,12)
cols_not_norm = []
# laod and preprocess training data
training_data = loader.load_dataset('data/housing_train.txt')
testing_data = loader.load_dataset('data/housing_test.txt')
Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], cols_not_norm)
# start training
model = rm.LinearRegression()
model.build(training_data[0], training_data[1])
training_mse = model.test(training_data[0], training_data[1], util.mse)
testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
print 'Error for training data is:'
print training_mse
print 'Error for testing data is:'
print testing_mse
result = {}
result['TrainingMSE'] = str(training_mse)
result['TestingMSE'] = str(testing_mse)
result['Theta'] = str(model.theta)
# log the training result to file
util.write_result_to_file(result_path, model_name, result)
示例9: test
def test():
# laod and preprocess training data
# tr_data = loader.load_pickle_file(tr_data_path)
te_data= loader.load_pickle_file(te_data_path)
model = loader.load_pickle_file(model_path)
# te_pred_dict = loader.load_pickle_file(te_pred_dict_path)
test_pred_dict = {}
for i in range(9):
test_pred_dict[i] = {}
for j in range(i + 1, 10):
if i == j:
continue
# get training data for this class
clf = model[i][j]
te_pred = clf.predict(te_data[0])
test_pred_dict[i][j] = te_pred
te_n = len(te_data[1])
te_pred = np.zeros((1, te_n))[0]
for i in range(te_n):
votes = np.zeros((10,), dtype=np.int)
for j in range(9):
for k in range(j):
votes[j] += 1 if test_pred_dict[k][j][i] == -1 else 0
for kk in test_pred_dict[j]:
votes[j] += 1 if test_pred_dict[j][kk][i] == 1 else 0
count = np.bincount(votes)
if count[-1] == 1:
te_pred[i] = votes.argmax()
else:
te_pred[i] = votes.argmax()
tie_ind = [votes.argmax()]
cc = 0
for ind_v, v in enumerate(votes):
if v == votes.max():
if cc == 1:
tie_ind.append(ind_v)
break
else:
cc += 1
te_pred[i] = tie_ind[0] if test_pred_dict[tie_ind[0]][tie_ind[1]][i] == 1 else tie_ind[1]
print('{} Tie! {} wins.'.format(count[-1], te_pred[i]))
acc = 0
acc_n = 0
for ind_l, l in enumerate(te_data[1]):
acc += 1 if l == te_pred[ind_l] else 0
acc /= te_n
# acc = (te_data[1] == te_pred).sum() / te_n
print('Acc: {}'.format(acc))
示例10: get_cs
def get_cs(data_path, cs_path):
# dp compute cheat sheet
cs = None
if os.path.isfile(cs_path):
cs = loader.load_pickle_file(cs_path)
print('CS loaded.')
else:
print('Start compute cs.')
data = loader.load_pickle_file(data_path)
cs = dp_compute_cs(data[0])
loader.save(cs_path, cs)
print('CS saved.')
return cs
示例11: random_select_data
def random_select_data(tr_save_path, sel_tr_save_path, percent):
all_tr = loader.load_pickle_file(tr_save_path)
tr_l_ind_dict = {}
selected_tr_data = [[], []]
for i in range(10):
tr_l_ind_dict[i] = [l_ind for l_ind, l in enumerate(all_tr[1]) if l == i]
for i in range(10):
i_n = len(tr_l_ind_dict[i])
pick_n = int(percent * i_n)
cur_pick_ind = np.random.choice(tr_l_ind_dict[i], pick_n, replace=False).tolist()
selected_tr_data[0].extend([x for x_ind, x in enumerate(all_tr[0]) if x_ind in cur_pick_ind])
selected_tr_data[1].extend([y for y_ind, y in enumerate(all_tr[1]) if y_ind in cur_pick_ind])
loader.save(sel_tr_save_path, selected_tr_data)
示例12: main
def main():
is_sklearn = False
# kernel = c.COSINE
# kernel = c.GAUSSIAN
kernel = c.POLY
# training parameter
result_path = 'results/PB2_spam.acc'
model_name = 'digits_' + kernel
model_path = 'data/PB1_B_digits_sk_Gaussian_1.model'
# tr_data_path = 'data\\digits\\tr_f_l.pickle'
# te_data_path = 'data\\digits\\te_f_l.pickle'
tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
te_data_path = 'data\\digits\\te_f_l_10.pickle'
# laod and preprocess training data
tr_data = loader.load_pickle_file(tr_data_path)
te_data = loader.load_pickle_file(te_data_path)
# transpose label
tr_data[1] = np.transpose(tr_data[1])[0]
te_data[1] = np.transpose(te_data[1])[0]
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
# start training
models = []
st = time.time()
# start training
print('{:.2f} Start training.'.format(time.time() - st))
for k in (1, 3, 7):
if not is_sklearn:
clf = kNN.kNN(kernel=kernel)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0], k=k)
te_pred = clf.predict(te_data[0], k=k)
else:
clf = KNeighborsClassifier(n_neighbors=k, metric=cosine_distances)
clf.fit(tr_data[0], tr_data[1])
tr_pred = clf.predict(tr_data[0])
te_pred = clf.predict(te_data[0])
tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
models.append(clf)
print('{} Final results with kernel {} and k={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, k, tr_acc, te_acc))
示例13: get_distance
def get_distance(directory) :
'''returns the distance in m'''
file = directory + '/info.dat'
info = dl.load(file)[0]
distancestr = info['Distance']
distance = 0.01*float(distancestr.replace('cm',''))
return distance
示例14: compute_feature_mean
def compute_feature_mean(features, save_path):
n, d = np.shape(features)
means = []
for i in range(d):
cur_f = features[:, i]
means.append(np.nanmean(cur_f))
# cur_mean = 0
# for f in features:
# if not np.isnan(f[i]):
# cur_mean += f[i]
# means.append(cur_mean / n)
means = np.array(means)
loader.save(save_path, means)
return means
示例15: train
def train(self, X, y, model, batch_generator, n_epochs=50, optim_algo='adam',
criterion='categorical_crossentropy', save_model=True, verbose=2,
plot=True, batch_size=64,):
if optim_algo == 'adam':
optim_algo = Adam()
elif optim_algo == 'sgd':
optim_algo = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
elif optim_algo == 'adagrad':
optim_algo = Adagrad()
self.model.compile(optimizer=optim_algo, loss=criterion)
loss_train_history = []
loss_val_history = []
batch_history = {'f1': [], 'recall': [], 'precision': []}
for epoch in range(1, n_epochs + 1):
batch_f1_history = []
batch_precision_history = []
batch_recall_history = []
for X, y in batch_generator.next_batch():
history = self.model.fit(X, y, nb_epoch=1, batch_size=batch_size,
validation_split=0.2, verbose=0)
val_loss, loss = history.history['val_loss'][0], history.history['loss'][0]
loss_train_history.append(loss)
loss_val_history.append(val_loss)
truth = self.model.validation_data[3]
truth = dl.onehot2list(truth)
batch_prediction = self.predict_classes(self.model.validation_data[0:3])
batch_f1 = metrics.f1_score(truth, batch_prediction)
batch_recall = metrics.recall_score(truth, batch_prediction)
batch_precision = metrics.precision_score(truth, batch_prediction)
batch_f1_history.append(batch_f1)
batch_recall_history.append(batch_recall)
batch_precision_history.append(batch_precision)
batch_history['f1'].append(batch_f1_history)
batch_history['recall'].append(batch_recall_history)
batch_history['precision'].append(batch_precision_history)
print('Epoch: {} | Train loss: {} | Valid loss: {}'.format(epoch, loss, val_loss))
print("Epoch Metrics | F1: {} | Recall {} | Precision: {}".format(np.mean(batch_history['f1'][epoch - 1]),
np.mean(batch_history['recall'][epoch - 1]),
np.mean(batch_history['precision'][epoch - 1])))
a_max = np.argmax(batch_history['f1'][epoch - 1])
print("Best F1 at Epoch {} Minibatch {}: {}\n".format(epoch, a_max, batch_history['f1'][epoch-1][a_max]))
if save_model:
self.model.save_weights(self.model_name + '.h5', overwrite=True)