本文整理汇总了Python中dataset.load_data方法的典型用法代码示例。如果您正苦于以下问题:Python dataset.load_data方法的具体用法?Python dataset.load_data怎么用?Python dataset.load_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dataset
的用法示例。
在下文中一共展示了dataset.load_data方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def __init__(self, model_folder, checkpoint_file):
sys.path.append(model_folder)
from model import get_model
from dataset import load_data
self.dataset = load_data('validation')
self.sess = tf.InteractiveSession()
self.model = get_model('policy')
saver = tf.train.Saver()
saver.restore(self.sess, checkpoint_file)
示例2: reload_state
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def reload_state(checkpoint, training_state, config, args):
"""
Reload state when resuming training.
"""
model, id_to_token, id_to_char = BidafModel.from_checkpoint(
config['bidaf'], checkpoint)
if torch.cuda.is_available() and args.cuda:
model.cuda()
model.train()
optimizer = get_optimizer(model, config, training_state)
token_to_id = {tok: id_ for id_, tok in id_to_token.items()}
char_to_id = {char: id_ for id_, char in id_to_char.items()}
len_tok_voc = len(token_to_id)
len_char_voc = len(char_to_id)
with open(args.data) as f_o:
data, _ = load_data(json.load(f_o),
span_only=True, answered_only=True)
limit_passage = config.get('training', {}).get('limit')
data = tokenize_data(data, token_to_id, char_to_id, limit_passage)
data = get_loader(data, config)
assert len(token_to_id) == len_tok_voc
assert len(char_to_id) == len_char_voc
return model, id_to_token, id_to_char, optimizer, data
示例3: __init__
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def __init__(self, config, **opt):
# Load config used for training and merge with testing options
self.config = yaml.load(open(config, "r"))
self.config = Namespace(**{**self.config, **opt})
# Load training data.pkl for src and tgt vocabs
self.data = load_data(self.config)
# Load trained model checkpoints
device, devices_ids = misc_utils.set_cuda(self.config)
self.model, _ = build_model(None, self.config, device)
self.model.eval()
示例4: train
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):
device = args.device
load_path = args.load_path
# load data
train_data = load_data('train')
val_data = load_data('validation')
# load model
with tf.device('/gpu:%d' % device):
model = get_model('train')
# trainer init
optimizer = Config.optimizer
train_step = optimizer.minimize(model.loss)
# init session and server
sess = tf.InteractiveSession()
saver = tf.train.Saver()
if load_path==None:
sess.run(tf.initialize_all_variables())
else:
saver.restore(sess, load_path)
print("Model restored from %s" % load_path)
# accuracy
pred = tf.reshape(model.pred, [-1, 9*10*16])
label = tf.reshape(model.label, [-1, 9*10*16])
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
logging.basicConfig(filename='log.txt', level=logging.DEBUG)
# train steps
for i in range(Config.n_epoch):
# training step
batch_data, batch_label = train_data.next_batch(Config.minibatch_size)
input_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
input_dict[var]=data
#from IPython import embed;embed()
sess.run(train_step, feed_dict=input_dict)
# evalue step
if (i+1)%Config.evalue_point == 0:
batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
val_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
val_dict[var]=data
score = accuracy.eval(feed_dict=val_dict)
print("epoch %d, accuracy is %.2f" % (i,score))
logging.info("epoch %d, accuracy is %.2f" % (i,score))
# save step
if (i+1)%Config.check_point == 0:
save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
print("Model saved in file: %s" % save_path)
logging.info("Model saved in file: %s" % save_path)
示例5: train
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):
device = args.device
load_path = args.load_path
# load data
train_data = load_data('train')
val_data = load_data('validation')
# load model
with tf.device('/gpu:%d' % device):
model = get_model('train')
# trainer init
optimizer = Config.optimizer
train_step = optimizer.minimize(model.loss)
# init session and server
sess = tf.InteractiveSession()
saver = tf.train.Saver()
if load_path==None:
sess.run(tf.initialize_all_variables())
else:
saver.restore(sess, load_path)
print("Model restored from %s" % load_path)
# accuracy
pred = tf.reshape(model.pred, [-1, 9*10*16])
label = tf.reshape(model.label, [-1, 9*10*16])
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train steps
for i in range(Config.n_epoch):
# training step
batch_data, batch_label = train_data.next_batch(Config.minibatch_size)
input_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
input_dict[var]=data
#from IPython import embed;embed()
sess.run(train_step, feed_dict=input_dict)
# evalue step
if (i+1)%Config.evalue_point == 0:
batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
val_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
val_dict[var]=data
score = accuracy.eval(feed_dict=val_dict)
print("epoch %d, accuracy is %.2f" % (i,score))
# save step
if (i+1)%Config.check_point == 0:
save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
print("Model saved in file: %s" % save_path)
示例6: train
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):
device = args.device
load_path = args.load_path
# load data
train_data = load_data('train')
val_data = load_data('validation')
# load model
with tf.device('/gpu:%d' % device):
model = get_model('policy')
# trainer init
optimizer = Config.optimizer
train_step = optimizer.minimize(model.loss)
# init session and server
sess = tf.InteractiveSession()
saver = tf.train.Saver()
if load_path==None:
sess.run(tf.initialize_all_variables())
else:
saver.restore(sess, load_path)
print("Model restored from %s" % load_path)
# accuracy
pred = tf.reshape(model.pred, [-1, 9*10*16])
label = tf.reshape(model.label, [-1, 9*10*16])
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
logging.basicConfig(filename='log.txt', level=logging.DEBUG)
# train steps
for i in range(Config.n_epoch):
# training step
batch_data, batch_label = train_data.next_batch(Config.minibatch_size)
input_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
input_dict[var]=data
#from IPython import embed;embed()
sess.run(train_step, feed_dict=input_dict)
# evalue step
if (i+1)%Config.evalue_point == 0:
batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
val_dict = {model.label:batch_label}
for var, data in zip(model.inputs, batch_data):
val_dict[var]=data
score = accuracy.eval(feed_dict=val_dict)
print("epoch %d, accuracy is %.2f" % (i,score))
logging.info("epoch %d, accuracy is %.2f" % (i,score))
# save step
if (i+1)%Config.check_point == 0:
save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
print("Model saved in file: %s" % save_path)
logging.info("Model saved in file: %s" % save_path)
示例7: init_state
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def init_state(config, args):
token_to_id = {'': 0}
char_to_id = {'': 0}
print('Loading data...')
with open(args.data) as f_o:
data, _ = load_data(json.load(f_o), span_only=True, answered_only=True)
print('Tokenizing data...')
data = tokenize_data(data, token_to_id, char_to_id)
data = get_loader(data, config)
id_to_token = {id_: tok for tok, id_ in token_to_id.items()}
id_to_char = {id_: char for char, id_ in char_to_id.items()}
print('Creating model...')
model = BidafModel.from_config(config['bidaf'], id_to_token, id_to_char)
if args.word_rep:
print('Loading pre-trained embeddings...')
with open(args.word_rep) as f_o:
pre_trained = SymbolEmbSourceText(
f_o,
set(tok for id_, tok in id_to_token.items() if id_ != 0))
mean, cov = pre_trained.get_norm_stats(args.use_covariance)
rng = np.random.RandomState(2)
oovs = SymbolEmbSourceNorm(mean, cov, rng, args.use_covariance)
model.embedder.embeddings[0].embeddings.weight.data = torch.from_numpy(
symbol_injection(
id_to_token, 0,
model.embedder.embeddings[0].embeddings.weight.data.numpy(),
pre_trained, oovs))
else:
pass # No pretraining, just keep the random values.
# Char embeddings are already random, so we don't need to update them.
if torch.cuda.is_available() and args.cuda:
model.cuda()
model.train()
optimizer = get_optimizer(model, config, state=None)
return model, id_to_token, id_to_char, optimizer, data
示例8: main
# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def main():
print('Loading data...')
train, valid, test = load_data(args.dataset_path, valid_portion=args.valid_portion)
train_data = RecSysDataset(train)
valid_data = RecSysDataset(valid)
test_data = RecSysDataset(test)
train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn)
valid_loader = DataLoader(valid_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)
test_loader = DataLoader(test_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)
if args.dataset_path.split('/')[-2] == 'diginetica':
n_items = 43098
elif args.dataset_path.split('/')[-2] in ['yoochoose1_64', 'yoochoose1_4']:
n_items = 37484
else:
raise Exception('Unknown Dataset!')
model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device)
if args.test:
ckpt = torch.load('latest_checkpoint.pth.tar')
model.load_state_dict(ckpt['state_dict'])
recall, mrr = validate(test_loader, model)
print("Test: Recall@{}: {:.4f}, MRR@{}: {:.4f}".format(args.topk, recall, args.topk, mrr))
return
optimizer = optim.Adam(model.parameters(), args.lr)
criterion = nn.CrossEntropyLoss()
scheduler = StepLR(optimizer, step_size = args.lr_dc_step, gamma = args.lr_dc)
for epoch in tqdm(range(args.epoch)):
# train for one epoch
scheduler.step(epoch = epoch)
trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr = 200)
recall, mrr = validate(valid_loader, model)
print('Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'.format(epoch, args.topk, recall, args.topk, mrr))
# store best loss and save a model checkpoint
ckpt_dict = {
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict()
}
torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')