当前位置: 首页>>代码示例>>Python>>正文


Python data.SequentialSampler方法代码示例

本文整理汇总了Python中torch.utils.data.SequentialSampler方法的典型用法代码示例。如果您正苦于以下问题:Python data.SequentialSampler方法的具体用法?Python data.SequentialSampler怎么用?Python data.SequentialSampler使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.utils.data的用法示例。


在下文中一共展示了data.SequentialSampler方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _pre_process

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _pre_process(self, input):
        # Record the time spent in the prediction functions
        self.start_time = time.time()

        # Converting the input to features
        test_examples = [InputExample(guid=i, text_a=x, labels=[]) for i, x in enumerate(input)]
        test_features = convert_examples_to_features(test_examples, self.max_seq_length, self.tokenizer)

        all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long)

        # Turn input examples into batches
        test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids)
        test_sampler = SequentialSampler(test_data)
        self.test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=self.eval_batch_size)

        return test_examples 
开发者ID:IBM,项目名称:MAX-Toxic-Comment-Classifier,代码行数:20,代码来源:model.py

示例2: prepare_data_loader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def prepare_data_loader(self, dataset, batch_size, rand_flag=True):
        # prepare data loader
        if rand_flag:
            data_sampler = RandomSampler(dataset)
        else:
            data_sampler = SequentialSampler(dataset)

        if self.custom_collate_fn is None:
            dataloader = DataLoader(dataset,
                                    batch_size=batch_size,
                                    sampler=data_sampler)
        else:
            dataloader = DataLoader(dataset,
                                    batch_size=batch_size,
                                    sampler=data_sampler,
                                    collate_fn=self.custom_collate_fn)

        return dataloader 
开发者ID:dolphin-zs,项目名称:Doc2EDAG,代码行数:20,代码来源:base_task.py

示例3: auto_add_sampler

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def auto_add_sampler(self, dataloader: DataLoader, train: bool) -> DataLoader:

        # don't do anything if it's not a dataloader
        is_dataloader = isinstance(dataloader, DataLoader)
        # don't manipulate iterable datasets
        is_iterable_ds = _has_iterable_dataset(dataloader)

        if not is_dataloader or is_iterable_ds:
            return dataloader
        need_dist_sampler = (self.use_ddp or self.use_ddp2 or self.use_horovod or self.use_tpu)

        if self.replace_sampler_ddp and need_dist_sampler:
            if not isinstance(dataloader.sampler, (SequentialSampler, RandomSampler)):
                raise MisconfigurationException(
                    'You seem to have configured a sampler in your DataLoader. This will be replaced '
                    ' by `DistributedSampler` since `replace_sampler_ddp` is True and you are using'
                    ' distributed training. Either remove the sampler from your DataLoader or set'
                    ' `replace_sampler_ddp`=False if you want to use your custom sampler.')

            # replace with distributed sampler
            sampler = self._get_distributed_sampler(dataloader)
            dataloader = self.replace_sampler(dataloader, sampler)

        return dataloader 
开发者ID:PyTorchLightning,项目名称:pytorch-lightning,代码行数:26,代码来源:data_loading.py

示例4: read_eval_data

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def read_eval_data(args, tokenizer, logger):
    eval_path = os.path.join(args.data_dir, args.predict_file)
    eval_set = read_absa_data(eval_path)
    eval_examples = convert_absa_data(dataset=eval_set, verbose_logging=args.verbose_logging)

    eval_features = convert_examples_to_features(eval_examples, tokenizer, args.max_seq_length,
                                                 args.verbose_logging, logger)

    logger.info("Num orig examples = %d", len(eval_examples))
    logger.info("Num split features = %d", len(eval_features))
    logger.info("Batch size = %d", args.predict_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
    if args.local_rank == -1:
        eval_sampler = SequentialSampler(eval_data)
    else:
        eval_sampler = DistributedSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size)
    return eval_examples, eval_features, eval_dataloader 
开发者ID:huminghao16,项目名称:SpanABSA,代码行数:24,代码来源:run_joint_span.py

示例5: get_dl_from_texts

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def get_dl_from_texts(self, texts):

        test_examples = []
        input_data = []

        for index, text in enumerate(texts):
            test_examples.append(InputExample(index, text, label=None))
            input_data.append({"id": index, "text": text})

        test_dataset = self.get_dataset_from_examples(
            test_examples, "test", is_test=True, no_cache=True
        )

        test_sampler = SequentialSampler(test_dataset)
        return DataLoader(
            test_dataset, sampler=test_sampler, batch_size=self.batch_size_per_gpu
        ) 
开发者ID:kaushaltrivedi,项目名称:fast-bert,代码行数:19,代码来源:data_cls.py

示例6: _predict_features

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _predict_features(self, features, tokens):
        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
        predict_data = TensorDataset(all_input_ids, all_input_mask)
        predict_sampler = SequentialSampler(predict_data)
        predict_dataloader = DataLoader(predict_data, sampler=predict_sampler, batch_size=self._batch_size)
        self._model.eval()
        predict_ids = []
        for batch in predict_dataloader:
            batch = tuple(t.to(self._device) for t in batch)
            input_ids, input_mask = batch
            logits = self._model(input_ids, input_mask)
            logits = logits.detach().cpu().numpy()
            predict_ids.extend(np.argmax(logits, -1).tolist())
        predictions = []
        for token_line, predict_line in zip(tokens, predict_ids):
            predictions.append([self._label_list[label_id] for label_id in predict_line[1: 1+len(token_line)]])
        return predictions 
开发者ID:ericput,项目名称:bert-ner,代码行数:20,代码来源:njuner.py

示例7: test

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(model, tokenizer, test_data, args):
    logger.info("Test starts!")
    model_load(args.model_dir, model)
    model = model.to(device)

    test_dataset = QueryDataset(test_data)
    test_data_loader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset),
                                  batch_size=args.bsz, num_workers=args.num_workers,
                                  collate_fn=lambda x: collate_fn(x, tokenizer, args.sample, args.max_seq_len))

    test_loss, test_str = evaluate(model, test_data_loader)
    logger.info(f"| test  | {test_str}") 
开发者ID:clovaai,项目名称:subword-qac,代码行数:14,代码来源:train.py

示例8: get_data_loader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def get_data_loader(dataset, batch_size, evaluation=False,
                    custom_dataset=False, num_worker=6, local_rank=-1):
    if evaluation:
        sampler = SequentialSampler(dataset)
    else:
        if not custom_dataset:
            # 使用 DistributedSampler 对数据集进行划分
            sampler = RandomSampler(dataset) if local_rank == -1 else DistributedSampler(dataset)
        else:
            sampler = None
    print(f'get_data_loader: training:{not evaluation}; sampler:{sampler}')
    data_loader = DataLoader(dataset, sampler=sampler, batch_size=batch_size, num_workers=num_worker)
    return data_loader 
开发者ID:NLPInBLCU,项目名称:BiaffineDependencyParsing,代码行数:15,代码来源:bertology_loader.py

示例9: predict

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def predict(
        self, text_tuples: Union[List[Tuple[str, str, str]], TripletTextDataset]
    ) -> List[Tuple[str, float]]:
        if isinstance(text_tuples, Dataset):
            data = text_tuples
        else:
            text_a_list, text_b_list, text_c_list = [list(i) for i in zip(*text_tuples)]

            data = TripletTextDataset(text_a_list, text_b_list, text_c_list, None)
        sampler = SequentialSampler(data)
        collate_fn = get_collator(
            self.max_length, self.device, self.tokenizer, self.model_class
        )
        dataloader = DataLoader(
            data, sampler=sampler, batch_size=8, collate_fn=collate_fn
        )

        final_results = []

        for batch in dataloader:
            with torch.no_grad():
                predict_results = self.model(*batch, mode="prob").cpu().numpy()
                cata_indexes = np.argmax(predict_results, axis=1)

                for i_sample, cata_index in enumerate(cata_indexes):
                    prob = predict_results[i_sample][cata_index]
                    label = "B" if cata_index == 0 else "C"
                    final_results.append((str(label), float(prob)))

        return final_results 
开发者ID:padeoe,项目名称:cail2019,代码行数:32,代码来源:model.py

示例10: test

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(args):  # Load a trained model that you have fine-tuned (we assume evaluate on cpu)    
    processor = data_utils.AscProcessor()
    label_list = processor.get_labels()
    tokenizer = BertTokenizer.from_pretrained(modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
    eval_examples = processor.get_test_examples(args.data_dir)
    eval_features = data_utils.convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, "asc")

    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", args.eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids)
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

    model = torch.load(os.path.join(args.output_dir, "model.pt") )
    model.cuda()
    model.eval()
    
    full_logits=[]
    full_label_ids=[]
    for step, batch in enumerate(eval_dataloader):
        batch = tuple(t.cuda() for t in batch)
        input_ids, segment_ids, input_mask, label_ids = batch
        
        with torch.no_grad():
            logits = model(input_ids, segment_ids, input_mask)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.cpu().numpy()

        full_logits.extend(logits.tolist() )
        full_label_ids.extend(label_ids.tolist() )

    output_eval_json = os.path.join(args.output_dir, "predictions.json") 
    with open(output_eval_json, "w") as fw:
        json.dump({"logits": full_logits, "label_ids": full_label_ids}, fw) 
开发者ID:howardhsu,项目名称:BERT-for-RRC-ABSA,代码行数:43,代码来源:run_asc.py

示例11: test

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def test(self, args, test_file):
        processor = getattr(data_util, args.task.upper() + "Processor")(args)
        label_list = processor.get_labels()

        config_class, tokenizer_class = MODEL_CLASSES[args.model_type]
        tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, do_lower_case = args.do_lower_case)
        
        eval_examples = processor.get_test_examples(test_file)
        
        eval_features = self._convert_examples_to_features(args, eval_examples, tokenizer, args.max_seq_length, label_list, args.model_type)

        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)

        eval_dataset = data_util.build_dataset(eval_features)

        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

        model = torch.load(os.path.join(args.output_dir, "model.pt") )
        model.to(args.device)
        model.eval()
        
        self._predict(args, model, eval_examples, eval_dataloader, label_list) 
开发者ID:howardhsu,项目名称:BERT-for-RRC-ABSA,代码行数:28,代码来源:trainer.py

示例12: evaluate

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def evaluate(self, args, eval_dataset, eval_masker, model, prefix=""):
        # Loop to handle MNLI double evaluation (matched, mis-matched)
        eval_output_dir = args.output_dir

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, drop_last=True)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        model.eval()

        result = self._eval(args, eval_dataloader, eval_masker, model)
        
        output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

        return result 
开发者ID:howardhsu,项目名称:BERT-for-RRC-ABSA,代码行数:30,代码来源:trainer.py

示例13: encode_candidate

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def encode_candidate(
    reranker,
    candidate_pool,
    encode_batch_size,
    silent,
    logger,
):
    reranker.model.eval()
    device = reranker.device
    #for cand_pool in candidate_pool:
    #logger.info("Encoding candidate pool %s" % src)
    sampler = SequentialSampler(candidate_pool)
    data_loader = DataLoader(
        candidate_pool, sampler=sampler, batch_size=encode_batch_size
    )
    if silent:
        iter_ = data_loader
    else:
        iter_ = tqdm(data_loader)

    cand_encode_list = None
    for step, batch in enumerate(iter_):
        cands = batch
        cands = cands.to(device)
        cand_encode = reranker.encode_candidate(cands)
        if cand_encode_list is None:
            cand_encode_list = cand_encode
        else:
            cand_encode_list = torch.cat((cand_encode_list, cand_encode))

    return cand_encode_list 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:33,代码来源:eval_biencoder.py

示例14: _process_biencoder_dataloader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _process_biencoder_dataloader(samples, tokenizer, biencoder_params):
    _, tensor_data = process_mention_data(
        samples,
        tokenizer,
        biencoder_params["max_context_length"],
        biencoder_params["max_cand_length"],
        silent=True,
        logger=None,
        debug=biencoder_params["debug"],
    )
    sampler = SequentialSampler(tensor_data)
    dataloader = DataLoader(
        tensor_data, sampler=sampler, batch_size=biencoder_params["eval_batch_size"]
    )
    return dataloader 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:17,代码来源:main_dense.py

示例15: _process_crossencoder_dataloader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import SequentialSampler [as 别名]
def _process_crossencoder_dataloader(context_input, label_input, crossencoder_params):
    tensor_data = TensorDataset(context_input, label_input)
    sampler = SequentialSampler(tensor_data)
    dataloader = DataLoader(
        tensor_data, sampler=sampler, batch_size=crossencoder_params["eval_batch_size"]
    )
    return dataloader 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:9,代码来源:main_dense.py


注:本文中的torch.utils.data.SequentialSampler方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。