本文整理汇总了Python中utils.Dataset方法的典型用法代码示例。如果您正苦于以下问题:Python utils.Dataset方法的具体用法?Python utils.Dataset怎么用?Python utils.Dataset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils
的用法示例。
在下文中一共展示了utils.Dataset方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _gather_rollouts
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def _gather_rollouts(self, policy, num_rollouts):
dataset = utils.Dataset()
for _ in range(num_rollouts):
state = self._env.reset()
done = False
t = 0
while not done:
if self._render:
timeit.start('render')
self._env.render()
timeit.stop('render')
timeit.start('get action')
action = policy.get_action(state)
timeit.stop('get action')
timeit.start('env step')
next_state, reward, done, _ = self._env.step(action)
timeit.stop('env step')
done = done or (t >= self._max_rollout_length)
dataset.add(state, action, next_state, reward, done)
state = next_state
t += 1
return dataset
示例2: get_ax
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def get_ax(rows=1, cols=1, size=8):
"""Return a Matplotlib Axes array to be used in
all visualizations in the notebook. Provide a
central point to control graph sizes.
Change the default size attribute to control the size
of rendered images
"""
_, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
return ax
# ## Dataset
#
# Create a synthetic dataset
#
# Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:
#
# * load_image()
# * load_mask()
# * image_reference()
# In[4]:
示例3: eval_bs
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params):
test_gen = test_set.generator(1, vocab, None, True if params.pointer else False)
n_samples = int(params.test_sample_ratio * len(test_set.pairs))
if params.test_save_results and params.model_path_prefix:
result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz')
else:
result_file = None
model.eval()
r1, r2, rl, rsu4 = 0, 0, 0, 0
prog_bar = tqdm(range(1, n_samples + 1))
for i in prog_bar:
batch = next(test_gen)
scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq,
beam_size=params.beam_size,
min_out_len=params.min_out_len,
max_out_len=params.max_out_len,
len_in_words=params.out_len_in_words,
details=result_file is not None)
if file_content:
file_content = file_content.encode('utf-8')
file_info = tarfile.TarInfo(name='%06d.txt' % i)
file_info.size = len(file_content)
result_file.addfile(file_info, fileobj=BytesIO(file_content))
if scores:
r1 += scores[0]['1_f']
r2 += scores[0]['2_f']
rl += scores[0]['l_f']
rsu4 += scores[0]['su4_f']
prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100),
RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))
示例4: read_data_coco
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def read_data_coco(datajson, config, add_gt=False, load_coco_class=False):
with open(datajson, "r") as f:
dj = json.load(f)
if load_coco_class:
add_coco(config, datajson)
data = {"imgs":[], "ids":[]}
if add_gt:
data = {"imgs":[], "ids":[], "gt":[]}
# read coco annotation file
for one in dj["images"]:
imgid = int(one["id"])
imgfile = os.path.join(config.imgpath, one["file_name"])
if config.coco2014_to_2017:
imgfile = os.path.join(config.imgpath, one["file_name"].split("_")[-1])
data["imgs"].append(imgfile)
data["ids"].append(imgid)
if add_gt:
# load the bounding box and so on
pass
return Dataset(data, add_gt=add_gt)
# for testing, dataset -> {"imgs":[],"ids":[]}, imgs is the image file path,
示例5: main
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def main(_):
"""Begins the execution of the program
Args:
_ : Tensorflow flags app instance
"""
if FLAGS.create != "":
dataset = utils.Dataset(FLAGS)
dataset.create_records(FLAGS.create)
exit()
if not FLAGS.test:
priliminary_checks(FLAGS)
idx = get_runid(FLAGS)
create_rundirs(FLAGS, idx)
dump_model_params(FLAGS)
log_config(idx, FLAGS.__flags)
if FLAGS.archi:
net = nnet.Model(FLAGS, is_training=False)
net.test_graph()
exit()
FLAGS.h = 600 if FLAGS.dataset == 'maps' else 256
FLAGS.w = FLAGS.h
if FLAGS.train or FLAGS.resume:
net = nnet.Model(FLAGS, is_training=True)
net.train()
print ' - Done training the network...'
else:
print ' - Testing the model...'
net = nnet.Model(FLAGS, is_training=False)
net.test(FLAGS.test_source)
示例6: __init__
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def __init__(self, rootDir):
utils.Dataset.__init__(self)
self.ROOT_DIR = rootDir
示例7: evaluate_coco
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
"""Runs official COCO evaluation.
dataset: A Dataset object with valiadtion data
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
limit: if not 0, it's the number of images to use for evaluation
"""
# Pick COCO images from the dataset
image_ids = image_ids or dataset.image_ids
# Limit to a subset
if limit:
image_ids = image_ids[:limit]
# Get corresponding COCO image IDs.
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
t_prediction = 0
t_start = time.time()
results = []
for i, image_id in enumerate(image_ids):
# Load image
image = dataset.load_image(image_id)
# Run detection
t = time.time()
r = model.detect([image], verbose=0)[0]
t_prediction += (time.time() - t)
# Convert results to COCO format
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
r["rois"], r["class_ids"],
r["scores"], r["masks"])
results.extend(image_results)
# Load results. This modifies results with additional attributes.
coco_results = coco.loadRes(results)
# Evaluate
cocoEval = COCOeval(coco, coco_results, eval_type)
cocoEval.params.imgIds = coco_image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
print("Prediction time: {}. Average {}/image".format(
t_prediction, t_prediction / len(image_ids)))
print("Total time: ", time.time() - t_start)
############################################################
# Training
############################################################
示例8: evaluate_coco
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
"""Runs official COCO evaluation.
dataset: A Dataset object with valiadtion data
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
limit: if not 0, it's the number of images to use for evaluation
"""
# Pick COCO images from the dataset
image_ids = image_ids or dataset.image_ids
# Limit to a subset
if limit:
image_ids = image_ids[:limit]
# Get corresponding COCO image IDs.
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
t_prediction = 0
t_start = time.time()
results = []
for i, image_id in enumerate(image_ids):
# Load image
image = dataset.load_image(image_id)
# Run detection
t = time.time()
r = model.detect([image])[0]
t_prediction += (time.time() - t)
# Convert results to COCO format
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
r["rois"], r["class_ids"],
r["scores"], r["masks"])
results.extend(image_results)
# Load results. This modifies results with additional attributes.
coco_results = coco.loadRes(results)
# Evaluate
cocoEval = COCOeval(coco, coco_results, eval_type)
cocoEval.params.imgIds = coco_image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
print("Prediction time: {}. Average {}/image".format(
t_prediction, t_prediction / len(image_ids)))
print("Total time: ", time.time() - t_start)
############################################################
# Training
############################################################
示例9: read_data
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def read_data(config,datatype,loadExistModelShared=False):
data_path = os.path.join(config.prepropath,"%s_data.p"%datatype)
shared_path = os.path.join(config.prepropath,"%s_shared.p"%datatype)
with open(data_path,"rb")as f:
data = pickle.load(f)
with open(shared_path,"rb") as f:
shared = pickle.load(f) # this will be added later with word id, either new or load from exists
num_examples = len(data['q'])
valid_idxs = range(num_examples)
print "loaded %s/%s data points for %s"%(len(valid_idxs),num_examples,datatype)
# this is the file for the model' training, with word ID and stuff, if set load in config, will read from existing, otherwise write a new one
# load the word2idx info into shared[]
model_shared_path = os.path.join(config.outpath,"shared.p")
if(loadExistModelShared):
with open(model_shared_path,"rb") as f:
model_shared = pickle.load(f)
for key in model_shared:
shared[key] = model_shared[key]
else:
# no fine tuning of word vector
# the word larger than word_count_thres and not in the glove word2vec
# word2idx -> the idx is the wordCounter's item() idx
# the new word to index
#
shared['word2idx'] = {word:idx+2 for idx,word in enumerate([word for word,count in shared['wordCounter'].items() if (count > config.word_count_thres) and not shared['word2vec'].has_key(word)])}
shared['char2idx'] = {char:idx+2 for idx,char in enumerate([char for char,count in shared['charCounter'].items() if count > config.char_count_thres])}
#print "len of shared['word2idx']:%s"%len(shared['word2idx'])
NULL = "<NULL>"
UNK = "<UNK>"
shared['word2idx'][NULL] = 0
shared['char2idx'][NULL] = 0
shared['word2idx'][UNK] = 1
shared['char2idx'][UNK] = 1
# existing word in word2vec will be put after len(new word)+2
pickle.dump({"word2idx":shared['word2idx'],'char2idx':shared['char2idx']},open(model_shared_path,"wb"))
# load the word embedding for word in word2vec
shared['existing_word2idx'] = {word:idx for idx,word in enumerate([word for word in sorted(shared['word2vec'].keys()) if not shared['word2idx'].has_key(word)])}
# idx -> vector
idx2vec = {idx:shared['word2vec'][word] for word,idx in shared['existing_word2idx'].items()}
# load all this vector into a matrix
# so you can use word -> idx -> vector
# using xrange(len) so that the idx is 0,1,2,3...
# then it could be call with embedding lookup with the correct idx
shared['existing_emb_mat'] = np.array([idx2vec[idx] for idx in xrange(len(idx2vec))],dtype="float32")
assert config.image_feat_dim == shared['pid2feat'][shared['pid2feat'].keys()[0]].shape[0], ("image dim is not %s, it is %s"%(config.image_feat_dim,shared['pid2feat'][shared['pid2feat'].keys()[0]].shape[0]))
return Dataset(data,datatype,shared=shared,valid_idxs=valid_idxs)
示例10: test
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def test():
embed = torch.Tensor(np.load(args.embedding)['embedding'])
with open(args.word2id) as f:
word2id = json.load(f)
vocab = utils.Vocab(embed, word2id)
with open(args.test_dir) as f:
examples = [json.loads(line) for line in f]
test_dataset = utils.Dataset(examples)
test_iter = DataLoader(dataset=test_dataset,
batch_size=args.batch_size,
shuffle=False)
if use_gpu:
checkpoint = torch.load(args.load_dir)
else:
checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage)
# checkpoint['args']['device'] saves the device used as train time
# if at test time, we are using a CPU, we must override device to None
if not use_gpu:
checkpoint['args'].device = None
net = getattr(models,checkpoint['args'].model)(checkpoint['args'])
net.load_state_dict(checkpoint['model'])
if use_gpu:
net.cuda()
net.eval()
doc_num = len(test_dataset)
time_cost = 0
file_id = 1
for batch in tqdm(test_iter):
features,_,summaries,doc_lens = vocab.make_features(batch)
t1 = time()
if use_gpu:
probs = net(Variable(features).cuda(), doc_lens)
else:
probs = net(Variable(features), doc_lens)
t2 = time()
time_cost += t2 - t1
start = 0
for doc_id,doc_len in enumerate(doc_lens):
stop = start + doc_len
prob = probs[start:stop]
topk = min(args.topk,doc_len)
topk_indices = prob.topk(topk)[1].cpu().data.numpy()
topk_indices.sort()
doc = batch['doc'][doc_id].split('\n')[:doc_len]
hyp = [doc[index] for index in topk_indices]
ref = summaries[doc_id]
with open(os.path.join(args.ref,str(file_id)+'.txt'), 'w') as f:
f.write(ref)
with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w') as f:
f.write('\n'.join(hyp))
start = stop
file_id = file_id + 1
print('Speed: %.2f docs / s' % (doc_num / time_cost))
示例11: predict
# 需要导入模块: import utils [as 别名]
# 或者: from utils import Dataset [as 别名]
def predict(examples):
embed = torch.Tensor(np.load(args.embedding)['embedding'])
with open(args.word2id) as f:
word2id = json.load(f)
vocab = utils.Vocab(embed, word2id)
pred_dataset = utils.Dataset(examples)
pred_iter = DataLoader(dataset=pred_dataset,
batch_size=args.batch_size,
shuffle=False)
if use_gpu:
checkpoint = torch.load(args.load_dir)
else:
checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage)
# checkpoint['args']['device'] saves the device used as train time
# if at test time, we are using a CPU, we must override device to None
if not use_gpu:
checkpoint['args'].device = None
net = getattr(models,checkpoint['args'].model)(checkpoint['args'])
net.load_state_dict(checkpoint['model'])
if use_gpu:
net.cuda()
net.eval()
doc_num = len(pred_dataset)
time_cost = 0
file_id = 1
for batch in tqdm(pred_iter):
features, doc_lens = vocab.make_predict_features(batch)
t1 = time()
if use_gpu:
probs = net(Variable(features).cuda(), doc_lens)
else:
probs = net(Variable(features), doc_lens)
t2 = time()
time_cost += t2 - t1
start = 0
for doc_id,doc_len in enumerate(doc_lens):
stop = start + doc_len
prob = probs[start:stop]
topk = min(args.topk,doc_len)
topk_indices = prob.topk(topk)[1].cpu().data.numpy()
topk_indices.sort()
doc = batch[doc_id].split('. ')[:doc_len]
hyp = [doc[index] for index in topk_indices]
with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w') as f:
f.write('. '.join(hyp))
start = stop
file_id = file_id + 1
print('Speed: %.2f docs / s' % (doc_num / time_cost))