本文整理汇总了Python中torch.utils.data.SequentialSampler方法的典型用法代码示例。如果您正苦于以下问题:Python data.SequentialSampler方法的具体用法?Python data.SequentialSampler怎么用?Python data.SequentialSampler使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.utils.data
的用法示例。
在下文中一共展示了data.SequentialSampler方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _pre_process
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _pre_process(self, input):
# Record the time spent in the prediction functions
self.start_time = time.time()
# Converting the input to features
test_examples = [InputExample(guid=i, text_a=x, labels=[]) for i, x in enumerate(input)]
test_features = convert_examples_to_features(test_examples, self.max_seq_length, self.tokenizer)
all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long)
# Turn input examples into batches
test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids)
test_sampler = SequentialSampler(test_data)
self.test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=self.eval_batch_size)
return test_examples
示例2: prepare_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def prepare_data_loader(self, dataset, batch_size, rand_flag=True):
# prepare data loader
if rand_flag:
data_sampler = RandomSampler(dataset)
else:
data_sampler = SequentialSampler(dataset)
if self.custom_collate_fn is None:
dataloader = DataLoader(dataset,
batch_size=batch_size,
sampler=data_sampler)
else:
dataloader = DataLoader(dataset,
batch_size=batch_size,
sampler=data_sampler,
collate_fn=self.custom_collate_fn)
return dataloader
示例3: auto_add_sampler
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def auto_add_sampler(self, dataloader: DataLoader, train: bool) -> DataLoader:
# don't do anything if it's not a dataloader
is_dataloader = isinstance(dataloader, DataLoader)
# don't manipulate iterable datasets
is_iterable_ds = _has_iterable_dataset(dataloader)
if not is_dataloader or is_iterable_ds:
return dataloader
need_dist_sampler = (self.use_ddp or self.use_ddp2 or self.use_horovod or self.use_tpu)
if self.replace_sampler_ddp and need_dist_sampler:
if not isinstance(dataloader.sampler, (SequentialSampler, RandomSampler)):
raise MisconfigurationException(
'You seem to have configured a sampler in your DataLoader. This will be replaced '
' by `DistributedSampler` since `replace_sampler_ddp` is True and you are using'
' distributed training. Either remove the sampler from your DataLoader or set'
' `replace_sampler_ddp`=False if you want to use your custom sampler.')
# replace with distributed sampler
sampler = self._get_distributed_sampler(dataloader)
dataloader = self.replace_sampler(dataloader, sampler)
return dataloader
示例4: read_eval_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def read_eval_data(args, tokenizer, logger):
eval_path = os.path.join(args.data_dir, args.predict_file)
eval_set = read_absa_data(eval_path)
eval_examples = convert_absa_data(dataset=eval_set, verbose_logging=args.verbose_logging)
eval_features = convert_examples_to_features(eval_examples, tokenizer, args.max_seq_length,
args.verbose_logging, logger)
logger.info("Num orig examples = %d", len(eval_examples))
logger.info("Num split features = %d", len(eval_features))
logger.info("Batch size = %d", args.predict_batch_size)
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
if args.local_rank == -1:
eval_sampler = SequentialSampler(eval_data)
else:
eval_sampler = DistributedSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size)
return eval_examples, eval_features, eval_dataloader
示例5: get_dl_from_texts
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def get_dl_from_texts(self, texts):
test_examples = []
input_data = []
for index, text in enumerate(texts):
test_examples.append(InputExample(index, text, label=None))
input_data.append({"id": index, "text": text})
test_dataset = self.get_dataset_from_examples(
test_examples, "test", is_test=True, no_cache=True
)
test_sampler = SequentialSampler(test_dataset)
return DataLoader(
test_dataset, sampler=test_sampler, batch_size=self.batch_size_per_gpu
)
示例6: _predict_features
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _predict_features(self, features, tokens):
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
predict_data = TensorDataset(all_input_ids, all_input_mask)
predict_sampler = SequentialSampler(predict_data)
predict_dataloader = DataLoader(predict_data, sampler=predict_sampler, batch_size=self._batch_size)
self._model.eval()
predict_ids = []
for batch in predict_dataloader:
batch = tuple(t.to(self._device) for t in batch)
input_ids, input_mask = batch
logits = self._model(input_ids, input_mask)
logits = logits.detach().cpu().numpy()
predict_ids.extend(np.argmax(logits, -1).tolist())
predictions = []
for token_line, predict_line in zip(tokens, predict_ids):
predictions.append([self._label_list[label_id] for label_id in predict_line[1: 1+len(token_line)]])
return predictions
示例7: test
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(model, tokenizer, test_data, args):
logger.info("Test starts!")
model_load(args.model_dir, model)
model = model.to(device)
test_dataset = QueryDataset(test_data)
test_data_loader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset),
batch_size=args.bsz, num_workers=args.num_workers,
collate_fn=lambda x: collate_fn(x, tokenizer, args.sample, args.max_seq_len))
test_loss, test_str = evaluate(model, test_data_loader)
logger.info(f"| test | {test_str}")
示例8: get_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def get_data_loader(dataset, batch_size, evaluation=False,
custom_dataset=False, num_worker=6, local_rank=-1):
if evaluation:
sampler = SequentialSampler(dataset)
else:
if not custom_dataset:
# 使用 DistributedSampler 对数据集进行划分
sampler = RandomSampler(dataset) if local_rank == -1 else DistributedSampler(dataset)
else:
sampler = None
print(f'get_data_loader: training:{not evaluation}; sampler:{sampler}')
data_loader = DataLoader(dataset, sampler=sampler, batch_size=batch_size, num_workers=num_worker)
return data_loader
示例9: predict
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def predict(
self, text_tuples: Union[List[Tuple[str, str, str]], TripletTextDataset]
) -> List[Tuple[str, float]]:
if isinstance(text_tuples, Dataset):
data = text_tuples
else:
text_a_list, text_b_list, text_c_list = [list(i) for i in zip(*text_tuples)]
data = TripletTextDataset(text_a_list, text_b_list, text_c_list, None)
sampler = SequentialSampler(data)
collate_fn = get_collator(
self.max_length, self.device, self.tokenizer, self.model_class
)
dataloader = DataLoader(
data, sampler=sampler, batch_size=8, collate_fn=collate_fn
)
final_results = []
for batch in dataloader:
with torch.no_grad():
predict_results = self.model(*batch, mode="prob").cpu().numpy()
cata_indexes = np.argmax(predict_results, axis=1)
for i_sample, cata_index in enumerate(cata_indexes):
prob = predict_results[i_sample][cata_index]
label = "B" if cata_index == 0 else "C"
final_results.append((str(label), float(prob)))
return final_results
示例10: test
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(args): # Load a trained model that you have fine-tuned (we assume evaluate on cpu)
processor = data_utils.AscProcessor()
label_list = processor.get_labels()
tokenizer = BertTokenizer.from_pretrained(modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
eval_examples = processor.get_test_examples(args.data_dir)
eval_features = data_utils.convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, "asc")
logger.info("***** Running evaluation *****")
logger.info(" Num examples = %d", len(eval_examples))
logger.info(" Batch size = %d", args.eval_batch_size)
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids)
# Run prediction for full data
eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)
model = torch.load(os.path.join(args.output_dir, "model.pt") )
model.cuda()
model.eval()
full_logits=[]
full_label_ids=[]
for step, batch in enumerate(eval_dataloader):
batch = tuple(t.cuda() for t in batch)
input_ids, segment_ids, input_mask, label_ids = batch
with torch.no_grad():
logits = model(input_ids, segment_ids, input_mask)
logits = logits.detach().cpu().numpy()
label_ids = label_ids.cpu().numpy()
full_logits.extend(logits.tolist() )
full_label_ids.extend(label_ids.tolist() )
output_eval_json = os.path.join(args.output_dir, "predictions.json")
with open(output_eval_json, "w") as fw:
json.dump({"logits": full_logits, "label_ids": full_label_ids}, fw)
示例11: test
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(self, args, test_file):
processor = getattr(data_util, args.task.upper() + "Processor")(args)
label_list = processor.get_labels()
config_class, tokenizer_class = MODEL_CLASSES[args.model_type]
tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, do_lower_case = args.do_lower_case)
eval_examples = processor.get_test_examples(test_file)
eval_features = self._convert_examples_to_features(args, eval_examples, tokenizer, args.max_seq_length, label_list, args.model_type)
logger.info("***** Running evaluation *****")
logger.info(" Num examples = %d", len(eval_examples))
logger.info(" Batch size = %d", args.eval_batch_size)
eval_dataset = data_util.build_dataset(eval_features)
# Run prediction for full data
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
model = torch.load(os.path.join(args.output_dir, "model.pt") )
model.to(args.device)
model.eval()
self._predict(args, model, eval_examples, eval_dataloader, label_list)
示例12: evaluate
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def evaluate(self, args, eval_dataset, eval_masker, model, prefix=""):
# Loop to handle MNLI double evaluation (matched, mis-matched)
eval_output_dir = args.output_dir
if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
os.makedirs(eval_output_dir)
args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
# Note that DistributedSampler samples randomly
eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, drop_last=True)
# Eval!
logger.info("***** Running evaluation {} *****".format(prefix))
logger.info(" Num examples = %d", len(eval_dataset))
logger.info(" Batch size = %d", args.eval_batch_size)
model.eval()
result = self._eval(args, eval_dataloader, eval_masker, model)
output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt")
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results {} *****".format(prefix))
for key in sorted(result.keys()):
logger.info(" %s = %s", key, str(result[key]))
writer.write("%s = %s\n" % (key, str(result[key])))
return result
示例13: encode_candidate
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def encode_candidate(
reranker,
candidate_pool,
encode_batch_size,
silent,
logger,
):
reranker.model.eval()
device = reranker.device
#for cand_pool in candidate_pool:
#logger.info("Encoding candidate pool %s" % src)
sampler = SequentialSampler(candidate_pool)
data_loader = DataLoader(
candidate_pool, sampler=sampler, batch_size=encode_batch_size
)
if silent:
iter_ = data_loader
else:
iter_ = tqdm(data_loader)
cand_encode_list = None
for step, batch in enumerate(iter_):
cands = batch
cands = cands.to(device)
cand_encode = reranker.encode_candidate(cands)
if cand_encode_list is None:
cand_encode_list = cand_encode
else:
cand_encode_list = torch.cat((cand_encode_list, cand_encode))
return cand_encode_list
示例14: _process_biencoder_dataloader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _process_biencoder_dataloader(samples, tokenizer, biencoder_params):
_, tensor_data = process_mention_data(
samples,
tokenizer,
biencoder_params["max_context_length"],
biencoder_params["max_cand_length"],
silent=True,
logger=None,
debug=biencoder_params["debug"],
)
sampler = SequentialSampler(tensor_data)
dataloader = DataLoader(
tensor_data, sampler=sampler, batch_size=biencoder_params["eval_batch_size"]
)
return dataloader
示例15: _process_crossencoder_dataloader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _process_crossencoder_dataloader(context_input, label_input, crossencoder_params):
tensor_data = TensorDataset(context_input, label_input)
sampler = SequentialSampler(tensor_data)
dataloader = DataLoader(
tensor_data, sampler=sampler, batch_size=crossencoder_params["eval_batch_size"]
)
return dataloader