当前位置: 首页>>代码示例>>Python>>正文


Python data.TensorDataset方法代码示例

本文整理汇总了Python中torch.utils.data.TensorDataset方法的典型用法代码示例。如果您正苦于以下问题:Python data.TensorDataset方法的具体用法?Python data.TensorDataset怎么用?Python data.TensorDataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.utils.data的用法示例。


在下文中一共展示了data.TensorDataset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: feature_to_dataset

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def feature_to_dataset(features):
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
    all_start_pos = torch.tensor([t.start_pos for t in features], dtype=torch.long)
    # print([t.end_pos for t in features])
    all_end_pos = torch.tensor([t.end_pos for t in features], dtype=torch.long)
    all_dep_ids = torch.tensor([t.dep_ids for t in features], dtype=torch.long)
    tensors = [all_input_ids, all_input_mask, all_segment_ids, all_start_pos, all_end_pos, all_dep_ids]
    if hasattr(features[0], 'pos_ids'):
        all_pos_ids = torch.tensor([t.pos_ids for t in features], dtype=torch.long)
        tensors.append(all_pos_ids)
    dataset = TensorDataset(*tensors)
    # Input Tensors:
    #   all_input_ids,
    #   all_input_mask,
    #   all_segment_ids,
    #   all_start_pos,
    #   all_end_pos,
    #   all_dep_ids,
    #   all_pos_ids, (如果有)
    return dataset 
开发者ID:NLPInBLCU,项目名称:BiaffineDependencyParsing,代码行数:24,代码来源:bertology_loader.py

示例2: _unpack_batch

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def _unpack_batch(self, batch: TensorDataset) -> Dict:
        """
        拆分batch,得到encoder的输入和word mask,sentence length,以及dep ids,以及其他输入信息
        eg:
            dataset = TensorDataset(all_input_ids, all_input_mask,
                        all_segment_ids, all_start_pos,
                        all_end_pos, all_dep_ids,
                        all_pos_ids)

        Args:
            batch: 输入的单个batch,类型为TensorDataset(或者torchtext.dataset),可用索引分别取值

        Returns:
            返回一个字典,[1]是inputs,类型为字典;[2]是word mask;[3]是sentence length,python 列表;[4]是dep ids,
            根据实际情况可能还包含其他输入信息
        """
        raise NotImplementedError('must implement in sub class') 
开发者ID:NLPInBLCU,项目名称:BiaffineDependencyParsing,代码行数:19,代码来源:base_trainer.py

示例3: with_test_data

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def with_test_data(self, x, batch_size=1, num_workers=1, steps=None):
        """Use this trial with the given test data. Returns self so that methods can be chained for convenience.

        Example: ::

            # Simple trial that runs for 10 test iterations on some random data
            >>> from torchbearer import Trial
            >>> data = torch.rand(10, 1)
            >>> trial = Trial(None).with_test_data(data).for_test_steps(10).run(1)

        Args:
            x (torch.Tensor): The test x data to use during calls to :meth:`.predict`
            batch_size (int): The size of each batch to sample from the data
            num_workers (int): Number of worker threads to use in the data loader
            steps (int): The number of steps per epoch to take when using this data

        Returns:
            Trial: self
        """
        dataset = TensorDataset(x)
        dataloader = DataLoader(dataset, batch_size, num_workers=num_workers)
        self.with_test_generator(dataloader, steps=steps)

        return self 
开发者ID:pytorchbearer,项目名称:torchbearer,代码行数:26,代码来源:trial.py

示例4: test_callbacks

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def test_callbacks(self):
        from torch.utils.data import TensorDataset
        traingen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
        valgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
        testgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))

        model = torch.nn.Linear(3, 1)
        optim = torch.optim.SGD(model.parameters(), lr=0.01)
        cbs = []
        cbs.extend([c.EarlyStopping(), c.GradientClipping(10, model.parameters()), c.Best('test.pt'),
                    c.MostRecent('test.pt'), c.ReduceLROnPlateau(), c.CosineAnnealingLR(0.1, 0.01),
                    c.ExponentialLR(1), c.Interval('test.pt'), c.CSVLogger('test_csv.pt'),
                    c.L1WeightDecay(), c.L2WeightDecay(), c.TerminateOnNaN(monitor='fail_metric')])

        trial = torchbearer.Trial(model, optim, torch.nn.MSELoss(), metrics=['loss'], callbacks=cbs)
        trial = trial.with_generators(traingen, valgen, testgen)
        trial.run(2)
        trial.predict()
        trial.evaluate(data_key=torchbearer.TEST_DATA)
        trial.evaluate()

        import os
        os.remove('test.pt')
        os.remove('test_csv.pt') 
开发者ID:pytorchbearer,项目名称:torchbearer,代码行数:26,代码来源:test_end_to_end.py

示例5: load_data

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def load_data(hdf5_file, ndata, batch_size, only_input=True, return_stats=False):
    with h5py.File(hdf5_file, 'r') as f:
        x_data = f['input'][:ndata]
        print(f'x_data: {x_data.shape}')    
        if not only_input:
            y_data = f['output'][:ndata]
            print(f'y_data: {y_data.shape}')    

    stats = {}
    if return_stats:
        y_variation = ((y_data - y_data.mean(0, keepdims=True)) ** 2).sum(
            axis=(0, 2, 3))
        stats['y_variation'] = y_variation
    
    data_tuple = (torch.FloatTensor(x_data), ) if only_input else (
            torch.FloatTensor(x_data), torch.FloatTensor(y_data))
    data_loader = DataLoader(TensorDataset(*data_tuple),
        batch_size=batch_size, shuffle=True, drop_last=True)
    print(f'Loaded dataset: {hdf5_file}')
    return data_loader, stats 
开发者ID:cics-nd,项目名称:pde-surrogate,代码行数:22,代码来源:load.py

示例6: train

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def train(self, num_epochs=3, batch_size=32):
        """
        Trains the LR model.

        :param num_epochs: (int) number of epochs.
        """
        batch_losses = []
        # We train only on cells that do not have their initial value as NULL.
        X_train, Y_train = self._X.index_select(0, self._train_idx), self._Y.index_select(0, self._train_idx)
        torch_ds = TensorDataset(X_train, Y_train)

        # Main training loop.
        for epoch_idx in range(1, num_epochs+1):
            logging.debug("Logistic: epoch %d", epoch_idx)
            batch_cnt = 0
            for batch_X, batch_Y in tqdm(DataLoader(torch_ds, batch_size=batch_size)):
                batch_pred = self.forward(batch_X)
                batch_loss = self._loss(batch_pred, batch_Y.reshape(-1,1))
                batch_losses.append(float(batch_loss))
                self.zero_grad()
                batch_loss.backward()
                self._optimizer.step()
                batch_cnt += 1
            logging.debug('Logistic: average batch loss: %f', sum(batch_losses[-1 * batch_cnt:]) / batch_cnt)
        return batch_losses 
开发者ID:HoloClean,项目名称:holoclean,代码行数:27,代码来源:logistic.py

示例7: create_dataset

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def create_dataset(self, features, is_sorted=False):
        # Convert to Tensors and build dataset
        if is_sorted:
            logger.info("sorted data by th length of input")
            features = sorted(
                features, key=lambda x: x.input_len, reverse=True)
        all_input_ids = torch.tensor(
            [f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor(
            [f.input_mask for f in features], dtype=torch.long)
        all_segment_ids = torch.tensor(
            [f.segment_ids for f in features], dtype=torch.long)
        all_label_ids = torch.tensor(
            [f.label_id for f in features], dtype=torch.long)
        dataset = TensorDataset(
            all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        return dataset 
开发者ID:hscspring,项目名称:Multi-Label-Text-Classification-for-Chinese,代码行数:19,代码来源:bert.py

示例8: get_tensor_dataset

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_tensor_dataset(numpy_array):
    """
    Gets a numpy array of indices, convert it into a Torch tensor,
    divided it into inputs and targets and wrap it
    into a TensorDataset

    Args:
        numpy_array: to be converted

    Returns: a TensorDataset
    """

    tensor = torch.from_numpy(numpy_array).long()

    inp = tensor[:, :-1]
    target = tensor[:, 1:]

    return TensorDataset(inp, target) 
开发者ID:BenevolentAI,项目名称:guacamol_baselines,代码行数:20,代码来源:rnn_utils.py

示例9: generate_images

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def generate_images(self, z_batch, intervention=None):
        '''
        Makes some images.
        '''
        with torch.no_grad(), self.modellock:
            batch_size = 10
            self.apply_intervention(intervention)
            test_loader = DataLoader(TensorDataset(z_batch[:,:,None,None]),
                batch_size=batch_size,
                pin_memory=('cuda' == self.device.type
                            and z_batch.device.type == 'cpu'))
            result_img = torch.zeros(
                    *((len(z_batch), 3) + self.model.output_shape[2:]),
                    dtype=torch.uint8, device=self.device)
            for batch_num, [batch_z,] in enumerate(test_loader):
                batch_z = batch_z.to(self.device)
                out = self.model(batch_z)
                result_img[batch_num*batch_size:
                        batch_num*batch_size+len(batch_z)] = (
                                (((out + 1) / 2) * 255).clamp(0, 255).byte())
            return result_img 
开发者ID:CSAILVision,项目名称:gandissect,代码行数:23,代码来源:serverstate.py

示例10: feature_maps

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def feature_maps(self, z_batch, intervention=None, layers=None,
            quantiles=True):
        feature_map = defaultdict(list)
        with torch.no_grad(), self.modellock:
            batch_size = 10
            self.apply_intervention(intervention)
            test_loader = DataLoader(
                TensorDataset(z_batch[:,:,None,None]),
                batch_size=batch_size,
                pin_memory=('cuda' == self.device.type
                    and z_batch.device.type == 'cpu'))
            processed = 0
            for batch_num, [batch_z] in enumerate(test_loader):
                batch_z = batch_z.to(self.device)
                # Run model but disregard output
                self.model(batch_z)
                processing = batch_z.shape[0]
                for layer, feature in self.model.retained_features().items():
                    for single_featuremap in feature:
                        if quantiles:
                            feature_map[layer].append(self.quantiles[layer]
                                    .normalize(single_featuremap))
                        else:
                            feature_map[layer].append(single_featuremap)
        return feature_map 
开发者ID:CSAILVision,项目名称:gandissect,代码行数:27,代码来源:serverstate.py

示例11: get_folds

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_folds(self, folds):
        indices = np.hstack([self.folds[f] for f in folds]).reshape(-1)
        if self.__class__.__bases__[0].__name__ == 'TensorDataset':
            indices = torch.from_numpy(indices).to(opt.device)
            # if opt.use_cuda:
            #     indices = indices.cuda()
            X = torch.index_select(self.tensors[0], 0, indices)
            Y = torch.index_select(self.tensors[1], 0, indices)
            return TensorDataset(X, Y)
        else:
            X = [self.X[i] for i in indices]
            indices = torch.from_numpy(indices).to(opt.device)
            # if opt.use_cuda:
            #     indices = indices.cuda()
            Y = torch.index_select(self.Y, 0, indices)
        return AmazonDataset(X, Y, self.max_seq_len) 
开发者ID:ccsasuke,项目名称:man,代码行数:18,代码来源:folded_dataset.py

示例12: get_msda_amazon_datasets

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_msda_amazon_datasets(data_file, domain, kfold, feature_num):
    print(f'Loading mSDA Preprocessed Multi-Domain Amazon data for {domain} Domain')
    dataset = pickle.load(open(data_file, 'rb'))[domain]

    lx, ly = dataset['labeled']
    if feature_num > 0:
        lx = lx[:, : feature_num]
    lx = torch.from_numpy(lx.toarray()).float().to(opt.device)
    ly = torch.from_numpy(ly).long().to(opt.device)
    print(f'{domain} Domain has {len(ly)} labeled instances.')
    # if opt.use_cuda:
    #     lx, ly = lx.cuda(), ly.cuda()
    labeled_set = FoldedDataset(TensorDataset, kfold, lx, ly)

    ux, uy = dataset['unlabeled']
    if feature_num > 0:
        ux = ux[:, : feature_num]
    ux = torch.from_numpy(ux.toarray()).float().to(opt.device)
    uy = torch.from_numpy(uy).long().to(opt.device)
    print(f'{domain} Domain has {len(uy)} unlabeled instances.')
    # if opt.use_cuda:
    #     ux, uy = ux.cuda(), uy.cuda()
    unlabeled_set = TensorDataset(ux, uy)

    return labeled_set, unlabeled_set 
开发者ID:ccsasuke,项目名称:man,代码行数:27,代码来源:msda_preprocessed_amazon_dataset.py

示例13: read_data

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def read_data(max_seq_len):
    # in和out分别是input和output的缩写
    in_tokens, out_tokens, in_seqs, out_seqs = [], [], [], []
    with io.open('./data/translation/fr-en-small.txt') as f:
        lines = f.readlines()
    for line in lines:
        in_seq, out_seq = line.rstrip().split('\t')
        in_seq_tokens, out_seq_tokens = in_seq.split(' '), out_seq.split(' ')
        if max(len(in_seq_tokens), len(out_seq_tokens)) > max_seq_len - 1:
            continue  # 如果加上EOS后长于max_seq_len,则忽略掉此样本
        process_one_seq(in_seq_tokens, in_tokens, in_seqs, max_seq_len)
        process_one_seq(out_seq_tokens, out_tokens, out_seqs, max_seq_len)
    in_vocab, in_data = build_data(in_tokens, in_seqs)
    out_vocab, out_data = build_data(out_tokens, out_seqs)
    return in_vocab, out_vocab, Data.TensorDataset(in_data, out_data) 
开发者ID:wdxtub,项目名称:deep-learning-note,代码行数:17,代码来源:53_machine_translation.py

示例14: __init__

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def __init__(self, config):
        """
        :param config:
        """
        self.config = config
        if config.data_mode == "imgs":
            raise NotImplementedError("This mode is not implemented YET")

        elif config.data_mode == "numpy":
            raise NotImplementedError("This mode is not implemented YET")

        elif config.data_mode == "random":
            train_data = torch.randn(self.config.batch_size, self.config.input_channels, self.config.img_size, self.config.img_size)
            train_labels = torch.ones(self.config.batch_size).long()
            valid_data = train_data
            valid_labels = train_labels
            self.len_train_data = train_data.size()[0]
            self.len_valid_data = valid_data.size()[0]

            self.train_iterations = (self.len_train_data + self.config.batch_size - 1) // self.config.batch_size
            self.valid_iterations = (self.len_valid_data + self.config.batch_size - 1) // self.config.batch_size

            train = TensorDataset(train_data, train_labels)
            valid = TensorDataset(valid_data, valid_labels)

            self.train_loader = DataLoader(train, batch_size=config.batch_size, shuffle=True)
            self.valid_loader = DataLoader(valid, batch_size=config.batch_size, shuffle=False)

        else:
            raise Exception("Please specify in the json a specified mode in data_mode") 
开发者ID:moemen95,项目名称:Pytorch-Project-Template,代码行数:32,代码来源:example.py

示例15: convert_ner_features_to_dataset

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def convert_ner_features_to_dataset(ner_features):
    all_input_ids = torch.tensor([f.input_ids for f in ner_features], dtype=torch.long)
    # very important to use the mask type of uint8 to support advanced indexing
    all_input_masks = torch.tensor([f.input_masks for f in ner_features], dtype=torch.uint8)
    all_segment_ids = torch.tensor([f.segment_ids for f in ner_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_ids for f in ner_features], dtype=torch.long)
    all_seq_len = torch.tensor([f.seq_len for f in ner_features], dtype=torch.long)
    ner_tensor_dataset = TensorDataset(all_input_ids, all_input_masks, all_segment_ids, all_label_ids, all_seq_len)

    return ner_tensor_dataset 
开发者ID:dolphin-zs,项目名称:Doc2EDAG,代码行数:12,代码来源:ner_task.py


注:本文中的torch.utils.data.TensorDataset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。