本文整理汇总了Python中torch.utils.data.TensorDataset方法的典型用法代码示例。如果您正苦于以下问题:Python data.TensorDataset方法的具体用法?Python data.TensorDataset怎么用?Python data.TensorDataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.utils.data
的用法示例。
在下文中一共展示了data.TensorDataset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: feature_to_dataset
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def feature_to_dataset(features):
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
all_start_pos = torch.tensor([t.start_pos for t in features], dtype=torch.long)
# print([t.end_pos for t in features])
all_end_pos = torch.tensor([t.end_pos for t in features], dtype=torch.long)
all_dep_ids = torch.tensor([t.dep_ids for t in features], dtype=torch.long)
tensors = [all_input_ids, all_input_mask, all_segment_ids, all_start_pos, all_end_pos, all_dep_ids]
if hasattr(features[0], 'pos_ids'):
all_pos_ids = torch.tensor([t.pos_ids for t in features], dtype=torch.long)
tensors.append(all_pos_ids)
dataset = TensorDataset(*tensors)
# Input Tensors:
# all_input_ids,
# all_input_mask,
# all_segment_ids,
# all_start_pos,
# all_end_pos,
# all_dep_ids,
# all_pos_ids, (如果有)
return dataset
示例2: _unpack_batch
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def _unpack_batch(self, batch: TensorDataset) -> Dict:
"""
拆分batch,得到encoder的输入和word mask,sentence length,以及dep ids,以及其他输入信息
eg:
dataset = TensorDataset(all_input_ids, all_input_mask,
all_segment_ids, all_start_pos,
all_end_pos, all_dep_ids,
all_pos_ids)
Args:
batch: 输入的单个batch,类型为TensorDataset(或者torchtext.dataset),可用索引分别取值
Returns:
返回一个字典,[1]是inputs,类型为字典;[2]是word mask;[3]是sentence length,python 列表;[4]是dep ids,
根据实际情况可能还包含其他输入信息
"""
raise NotImplementedError('must implement in sub class')
示例3: with_test_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def with_test_data(self, x, batch_size=1, num_workers=1, steps=None):
"""Use this trial with the given test data. Returns self so that methods can be chained for convenience.
Example: ::
# Simple trial that runs for 10 test iterations on some random data
>>> from torchbearer import Trial
>>> data = torch.rand(10, 1)
>>> trial = Trial(None).with_test_data(data).for_test_steps(10).run(1)
Args:
x (torch.Tensor): The test x data to use during calls to :meth:`.predict`
batch_size (int): The size of each batch to sample from the data
num_workers (int): Number of worker threads to use in the data loader
steps (int): The number of steps per epoch to take when using this data
Returns:
Trial: self
"""
dataset = TensorDataset(x)
dataloader = DataLoader(dataset, batch_size, num_workers=num_workers)
self.with_test_generator(dataloader, steps=steps)
return self
示例4: test_callbacks
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def test_callbacks(self):
from torch.utils.data import TensorDataset
traingen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
valgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
testgen = TensorDataset(torch.rand(10, 1, 3), torch.rand(10, 1))
model = torch.nn.Linear(3, 1)
optim = torch.optim.SGD(model.parameters(), lr=0.01)
cbs = []
cbs.extend([c.EarlyStopping(), c.GradientClipping(10, model.parameters()), c.Best('test.pt'),
c.MostRecent('test.pt'), c.ReduceLROnPlateau(), c.CosineAnnealingLR(0.1, 0.01),
c.ExponentialLR(1), c.Interval('test.pt'), c.CSVLogger('test_csv.pt'),
c.L1WeightDecay(), c.L2WeightDecay(), c.TerminateOnNaN(monitor='fail_metric')])
trial = torchbearer.Trial(model, optim, torch.nn.MSELoss(), metrics=['loss'], callbacks=cbs)
trial = trial.with_generators(traingen, valgen, testgen)
trial.run(2)
trial.predict()
trial.evaluate(data_key=torchbearer.TEST_DATA)
trial.evaluate()
import os
os.remove('test.pt')
os.remove('test_csv.pt')
示例5: load_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def load_data(hdf5_file, ndata, batch_size, only_input=True, return_stats=False):
with h5py.File(hdf5_file, 'r') as f:
x_data = f['input'][:ndata]
print(f'x_data: {x_data.shape}')
if not only_input:
y_data = f['output'][:ndata]
print(f'y_data: {y_data.shape}')
stats = {}
if return_stats:
y_variation = ((y_data - y_data.mean(0, keepdims=True)) ** 2).sum(
axis=(0, 2, 3))
stats['y_variation'] = y_variation
data_tuple = (torch.FloatTensor(x_data), ) if only_input else (
torch.FloatTensor(x_data), torch.FloatTensor(y_data))
data_loader = DataLoader(TensorDataset(*data_tuple),
batch_size=batch_size, shuffle=True, drop_last=True)
print(f'Loaded dataset: {hdf5_file}')
return data_loader, stats
示例6: train
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def train(self, num_epochs=3, batch_size=32):
"""
Trains the LR model.
:param num_epochs: (int) number of epochs.
"""
batch_losses = []
# We train only on cells that do not have their initial value as NULL.
X_train, Y_train = self._X.index_select(0, self._train_idx), self._Y.index_select(0, self._train_idx)
torch_ds = TensorDataset(X_train, Y_train)
# Main training loop.
for epoch_idx in range(1, num_epochs+1):
logging.debug("Logistic: epoch %d", epoch_idx)
batch_cnt = 0
for batch_X, batch_Y in tqdm(DataLoader(torch_ds, batch_size=batch_size)):
batch_pred = self.forward(batch_X)
batch_loss = self._loss(batch_pred, batch_Y.reshape(-1,1))
batch_losses.append(float(batch_loss))
self.zero_grad()
batch_loss.backward()
self._optimizer.step()
batch_cnt += 1
logging.debug('Logistic: average batch loss: %f', sum(batch_losses[-1 * batch_cnt:]) / batch_cnt)
return batch_losses
示例7: create_dataset
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def create_dataset(self, features, is_sorted=False):
# Convert to Tensors and build dataset
if is_sorted:
logger.info("sorted data by th length of input")
features = sorted(
features, key=lambda x: x.input_len, reverse=True)
all_input_ids = torch.tensor(
[f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor(
[f.input_mask for f in features], dtype=torch.long)
all_segment_ids = torch.tensor(
[f.segment_ids for f in features], dtype=torch.long)
all_label_ids = torch.tensor(
[f.label_id for f in features], dtype=torch.long)
dataset = TensorDataset(
all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
return dataset
示例8: get_tensor_dataset
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_tensor_dataset(numpy_array):
"""
Gets a numpy array of indices, convert it into a Torch tensor,
divided it into inputs and targets and wrap it
into a TensorDataset
Args:
numpy_array: to be converted
Returns: a TensorDataset
"""
tensor = torch.from_numpy(numpy_array).long()
inp = tensor[:, :-1]
target = tensor[:, 1:]
return TensorDataset(inp, target)
示例9: generate_images
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def generate_images(self, z_batch, intervention=None):
'''
Makes some images.
'''
with torch.no_grad(), self.modellock:
batch_size = 10
self.apply_intervention(intervention)
test_loader = DataLoader(TensorDataset(z_batch[:,:,None,None]),
batch_size=batch_size,
pin_memory=('cuda' == self.device.type
and z_batch.device.type == 'cpu'))
result_img = torch.zeros(
*((len(z_batch), 3) + self.model.output_shape[2:]),
dtype=torch.uint8, device=self.device)
for batch_num, [batch_z,] in enumerate(test_loader):
batch_z = batch_z.to(self.device)
out = self.model(batch_z)
result_img[batch_num*batch_size:
batch_num*batch_size+len(batch_z)] = (
(((out + 1) / 2) * 255).clamp(0, 255).byte())
return result_img
示例10: feature_maps
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def feature_maps(self, z_batch, intervention=None, layers=None,
quantiles=True):
feature_map = defaultdict(list)
with torch.no_grad(), self.modellock:
batch_size = 10
self.apply_intervention(intervention)
test_loader = DataLoader(
TensorDataset(z_batch[:,:,None,None]),
batch_size=batch_size,
pin_memory=('cuda' == self.device.type
and z_batch.device.type == 'cpu'))
processed = 0
for batch_num, [batch_z] in enumerate(test_loader):
batch_z = batch_z.to(self.device)
# Run model but disregard output
self.model(batch_z)
processing = batch_z.shape[0]
for layer, feature in self.model.retained_features().items():
for single_featuremap in feature:
if quantiles:
feature_map[layer].append(self.quantiles[layer]
.normalize(single_featuremap))
else:
feature_map[layer].append(single_featuremap)
return feature_map
示例11: get_folds
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_folds(self, folds):
indices = np.hstack([self.folds[f] for f in folds]).reshape(-1)
if self.__class__.__bases__[0].__name__ == 'TensorDataset':
indices = torch.from_numpy(indices).to(opt.device)
# if opt.use_cuda:
# indices = indices.cuda()
X = torch.index_select(self.tensors[0], 0, indices)
Y = torch.index_select(self.tensors[1], 0, indices)
return TensorDataset(X, Y)
else:
X = [self.X[i] for i in indices]
indices = torch.from_numpy(indices).to(opt.device)
# if opt.use_cuda:
# indices = indices.cuda()
Y = torch.index_select(self.Y, 0, indices)
return AmazonDataset(X, Y, self.max_seq_len)
示例12: get_msda_amazon_datasets
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def get_msda_amazon_datasets(data_file, domain, kfold, feature_num):
print(f'Loading mSDA Preprocessed Multi-Domain Amazon data for {domain} Domain')
dataset = pickle.load(open(data_file, 'rb'))[domain]
lx, ly = dataset['labeled']
if feature_num > 0:
lx = lx[:, : feature_num]
lx = torch.from_numpy(lx.toarray()).float().to(opt.device)
ly = torch.from_numpy(ly).long().to(opt.device)
print(f'{domain} Domain has {len(ly)} labeled instances.')
# if opt.use_cuda:
# lx, ly = lx.cuda(), ly.cuda()
labeled_set = FoldedDataset(TensorDataset, kfold, lx, ly)
ux, uy = dataset['unlabeled']
if feature_num > 0:
ux = ux[:, : feature_num]
ux = torch.from_numpy(ux.toarray()).float().to(opt.device)
uy = torch.from_numpy(uy).long().to(opt.device)
print(f'{domain} Domain has {len(uy)} unlabeled instances.')
# if opt.use_cuda:
# ux, uy = ux.cuda(), uy.cuda()
unlabeled_set = TensorDataset(ux, uy)
return labeled_set, unlabeled_set
示例13: read_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def read_data(max_seq_len):
# in和out分别是input和output的缩写
in_tokens, out_tokens, in_seqs, out_seqs = [], [], [], []
with io.open('./data/translation/fr-en-small.txt') as f:
lines = f.readlines()
for line in lines:
in_seq, out_seq = line.rstrip().split('\t')
in_seq_tokens, out_seq_tokens = in_seq.split(' '), out_seq.split(' ')
if max(len(in_seq_tokens), len(out_seq_tokens)) > max_seq_len - 1:
continue # 如果加上EOS后长于max_seq_len,则忽略掉此样本
process_one_seq(in_seq_tokens, in_tokens, in_seqs, max_seq_len)
process_one_seq(out_seq_tokens, out_tokens, out_seqs, max_seq_len)
in_vocab, in_data = build_data(in_tokens, in_seqs)
out_vocab, out_data = build_data(out_tokens, out_seqs)
return in_vocab, out_vocab, Data.TensorDataset(in_data, out_data)
示例14: __init__
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def __init__(self, config):
"""
:param config:
"""
self.config = config
if config.data_mode == "imgs":
raise NotImplementedError("This mode is not implemented YET")
elif config.data_mode == "numpy":
raise NotImplementedError("This mode is not implemented YET")
elif config.data_mode == "random":
train_data = torch.randn(self.config.batch_size, self.config.input_channels, self.config.img_size, self.config.img_size)
train_labels = torch.ones(self.config.batch_size).long()
valid_data = train_data
valid_labels = train_labels
self.len_train_data = train_data.size()[0]
self.len_valid_data = valid_data.size()[0]
self.train_iterations = (self.len_train_data + self.config.batch_size - 1) // self.config.batch_size
self.valid_iterations = (self.len_valid_data + self.config.batch_size - 1) // self.config.batch_size
train = TensorDataset(train_data, train_labels)
valid = TensorDataset(valid_data, valid_labels)
self.train_loader = DataLoader(train, batch_size=config.batch_size, shuffle=True)
self.valid_loader = DataLoader(valid, batch_size=config.batch_size, shuffle=False)
else:
raise Exception("Please specify in the json a specified mode in data_mode")
示例15: convert_ner_features_to_dataset
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import TensorDataset [as 别名]
def convert_ner_features_to_dataset(ner_features):
all_input_ids = torch.tensor([f.input_ids for f in ner_features], dtype=torch.long)
# very important to use the mask type of uint8 to support advanced indexing
all_input_masks = torch.tensor([f.input_masks for f in ner_features], dtype=torch.uint8)
all_segment_ids = torch.tensor([f.segment_ids for f in ner_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_ids for f in ner_features], dtype=torch.long)
all_seq_len = torch.tensor([f.seq_len for f in ner_features], dtype=torch.long)
ner_tensor_dataset = TensorDataset(all_input_ids, all_input_masks, all_segment_ids, all_label_ids, all_seq_len)
return ner_tensor_dataset