本文整理匯總了Python中torchtext.data方法的典型用法代碼示例。如果您正苦於以下問題:Python torchtext.data方法的具體用法?Python torchtext.data怎麽用?Python torchtext.data使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torchtext
的用法示例。
在下文中一共展示了torchtext.data方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_fields
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def get_fields(data_type, n_src_features, n_tgt_features):
"""
Args:
data_type: type of the source input. Options are [text|img|audio].
n_src_features: the number of source features to
create `torchtext.data.Field` for.
n_tgt_features: the number of target features to
create `torchtext.data.Field` for.
Returns:
A dictionary whose keys are strings and whose values are the
corresponding Field objects.
"""
if data_type == 'text':
return TextDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'img':
return ImageDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'video':
return VideoDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'audio':
return AudioDataset.get_fields(n_src_features, n_tgt_features)
示例2: create_batches
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def create_batches(self):
""" Create batches """
if self.train:
def _pool(data, random_shuffler):
for p in torchtext.data.batch(data, self.batch_size * 100):
p_batch = torchtext.data.batch(
sorted(p, key=self.sort_key),
self.batch_size, self.batch_size_fn)
for b in random_shuffler(list(p_batch)):
yield b
self.batches = _pool(self.data(), self.random_shuffler)
else:
self.batches = []
for b in torchtext.data.batch(self.data(), self.batch_size,
self.batch_size_fn):
self.batches.append(sorted(b, key=self.sort_key))
示例3: _merge_field_vocabs
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def _merge_field_vocabs(knl_field, src_field, tgt_field, vocab_size, min_freq):
# in the long run, shouldn't it be possible to do this by calling
# build_vocab with both the src and tgt data?
specials = [tgt_field.unk_token, tgt_field.pad_token,
tgt_field.init_token, tgt_field.eos_token]
merged = sum(
[knl_field.vocab.freqs, src_field.vocab.freqs, tgt_field.vocab.freqs], Counter()
)
merged_vocab = Vocab(
merged, specials=specials,
max_size=vocab_size, min_freq=min_freq
)
knl_field.vocab = merged_vocab
src_field.vocab = merged_vocab
tgt_field.vocab = merged_vocab
assert len(src_field.vocab) == len(tgt_field.vocab) == len(knl_field.vocab)
示例4: load_dataloaders
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def load_dataloaders(args):
logger.info("Preparing dataloaders...")
FR = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, init_token="<sos>", eos_token="<eos>",\
batch_first=True)
EN = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, batch_first=True)
train_path = os.path.join("./data/", "df.csv")
if not os.path.isfile(train_path):
tokenize_data(args)
train = torchtext.data.TabularDataset(train_path, format="csv", \
fields=[("EN", EN), ("FR", FR)])
FR.build_vocab(train)
EN.build_vocab(train)
train_iter = BucketIterator(train, batch_size=args.batch_size, repeat=False, sort_key=lambda x: (len(x["EN"]), len(x["FR"])),\
shuffle=True, train=True)
train_length = len(train)
logger.info("Loaded dataloaders.")
return train_iter, FR, EN, train_length
示例5: get_fields
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def get_fields(data_type, n_src_features, n_tgt_features):
"""
Args:
data_type: type of the source input. Options are [text|img|audio].
n_src_features: the number of source features to
create `torchtext.data.Field` for.
n_tgt_features: the number of target features to
create `torchtext.data.Field` for.
Returns:
A dictionary whose keys are strings and whose values are the
corresponding Field objects.
"""
if data_type == 'text':
return TextDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'img':
return ImageDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'audio':
return AudioDataset.get_fields(n_src_features, n_tgt_features)
示例6: _old_style_vocab
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def _old_style_vocab(vocab):
"""Detect old-style vocabs (``List[Tuple[str, torchtext.data.Vocab]]``).
Args:
vocab: some object loaded from a *.vocab.pt file
Returns:
Whether ``vocab`` is a list of pairs where the second object
is a :class:`torchtext.vocab.Vocab` object.
This exists because previously only the vocab objects from the fields
were saved directly, not the fields themselves, and the fields needed to
be reconstructed at training and translation time.
"""
return isinstance(vocab, list) and \
any(isinstance(v[1], Vocab) for v in vocab)
示例7: _merge_field_vocabs
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def _merge_field_vocabs(src_field, tgt_field, vocab_size, min_freq,
vocab_size_multiple):
# in the long run, shouldn't it be possible to do this by calling
# build_vocab with both the src and tgt data?
specials = [tgt_field.unk_token, tgt_field.pad_token,
tgt_field.init_token, tgt_field.eos_token]
merged = sum(
[src_field.vocab.freqs, tgt_field.vocab.freqs], Counter()
)
merged_vocab = Vocab(
merged, specials=specials,
max_size=vocab_size, min_freq=min_freq
)
if vocab_size_multiple > 1:
_pad_vocab_to_multiple(merged_vocab, vocab_size_multiple)
src_field.vocab = merged_vocab
tgt_field.vocab = merged_vocab
assert len(src_field.vocab) == len(tgt_field.vocab)
示例8: create_batches
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def create_batches(self):
if self.train:
def _pool(data, random_shuffler):
for p in torchtext.data.batch(data, self.batch_size * 100):
p_batch = batch_iter(
sorted(p, key=self.sort_key),
self.batch_size,
batch_size_fn=self.batch_size_fn,
batch_size_multiple=self.batch_size_multiple)
for b in random_shuffler(list(p_batch)):
yield b
self.batches = _pool(self.data(), self.random_shuffler)
else:
self.batches = []
for b in batch_iter(
self.data(),
self.batch_size,
batch_size_fn=self.batch_size_fn,
batch_size_multiple=self.batch_size_multiple):
self.batches.append(sorted(b, key=self.sort_key))
示例9: get_fields
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def get_fields(data_type, n_src_features, n_tgt_features):
"""
Args:
data_type: type of the source input. Options are [text|img|audio].
n_src_features: the number of source features to
create `torchtext.data.Field` for.
n_tgt_features: the number of target features to
create `torchtext.data.Field` for.
Returns:
A dictionary whose keys are strings and whose values are the
corresponding Field objects.
"""
if data_type == 'text':
return TextDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'img':
return ImageDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'audio':
return AudioDataset.get_fields(n_src_features, n_tgt_features)
elif data_type == 'gcn':
return GCNDataset.get_fields(n_src_features, n_tgt_features)
示例10: get_morph
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def get_morph(batch):
#Not very nice but we do not have access to value comming from opt.gpuid command line parameter here.
use_cuda = batch.src[0].is_cuda
# morph_index = batch.morph.data.transpose(0, 1) # [ seqLen x batch_size ] ==> [ batch_size x seqLen ]
# morph_voc = batch.dataset.fields['morph'].vocab.stoi
morph_index = batch.morph.view((batch.src[0].data.size()[0], 6, batch.src[0].data.size()[1]))
morph_index = morph_index.permute(2, 0, 1).contiguous()
# morph_index = torch.LongTensor(morph_index)
morph_mask = torch.lt(torch.eq(morph_index, 1), 1).float()
# morph_index = autograd.Variable(morph_index)
# morph_mask = autograd.Variable(torch.FloatTensor(morph_mask), requires_grad=False)
if use_cuda:
morph_index = morph_index.cuda()
morph_mask = morph_mask.cuda()
return morph_index, morph_mask
示例11: make_features
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def make_features(batch, side):
"""
Args:
batch (Variable): a batch of source or target data.
side (str): for source or for target.
Returns:
A sequence of src/tgt tensors with optional feature tensors
of size (len x batch).
"""
assert side in ['src', 'tgt']
if isinstance(batch.__dict__[side], tuple):
data = batch.__dict__[side][0]
else:
data = batch.__dict__[side]
feat_start = side + "_feat_"
features = sorted(batch.__dict__[k]
for k in batch.__dict__ if feat_start in k)
levels = [data] + features
return torch.cat([level.unsqueeze(2) for level in levels], 2)
示例12: collapse_copy_scores
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def collapse_copy_scores(self, scores, batch, tgt_vocab):
"""Given scores from an expanded dictionary
corresponeding to a batch, sums together copies,
with a dictionary word when it is ambigious.
"""
offset = len(tgt_vocab)
for b in range(batch.batch_size):
index = batch.indices.data[b]
src_vocab = self.src_vocabs[index]
for i in range(1, len(src_vocab)):
sw = src_vocab.itos[i]
ti = tgt_vocab.stoi[sw]
if ti != 0:
scores[:, b, ti] += scores[:, b, offset + i]
scores[:, b, offset + i].fill_(1e-20)
return scores
示例13: predict
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def predict(test_mode, dataset_iter):
model.eval()
dataset_iter.init_epoch()
qids = []
predictions = []
labels = []
for dev_batch_idx, dev_batch in enumerate(dataset_iter):
qid_array = np.transpose(dev_batch.id.cpu().data.numpy())
true_label_array = np.transpose(dev_batch.label.cpu().data.numpy())
output = model.convModel(dev_batch)
scores = model.linearLayer(output)
score_array = scores.cpu().data.numpy().reshape(-1)
qids.extend(qid_array.tolist())
predictions.extend(score_array.tolist())
labels.extend(true_label_array.tolist())
dev_map, dev_mrr = get_map_mrr(qids, predictions, labels)
logger.info("{} {}".format(dev_map, dev_mrr))
# Run the model on the dev set
示例14: create_batches
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def create_batches(self):
if self.train:
if self.yield_raw_example:
self.batches = batch_iter(
self.data(),
1,
batch_size_fn=None,
batch_size_multiple=1)
else:
self.batches = _pool(
self.data(),
self.batch_size,
self.batch_size_fn,
self.batch_size_multiple,
self.sort_key,
self.random_shuffler,
self.pool_factor)
else:
self.batches = []
for b in batch_iter(
self.data(),
self.batch_size,
batch_size_fn=self.batch_size_fn,
batch_size_multiple=self.batch_size_multiple):
self.batches.append(sorted(b, key=self.sort_key))
示例15: make_features
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import data [as 別名]
def make_features(batch, side, data_type='text'):
"""
Args:
batch (Variable): a batch of source or target data.
side (str): for source or for target.
data_type (str): type of the source input. Options are [text|img].
Returns:
A sequence of src/tgt tensors with optional feature tensors
of size (len x batch).
"""
assert side in ['src', 'tgt']
if isinstance(batch.__dict__[side], tuple):
data = batch.__dict__[side][0]
else:
data = batch.__dict__[side]
feat_start = side + "_feat_"
keys = sorted([k for k in batch.__dict__ if feat_start in k])
features = [batch.__dict__[k] for k in keys]
levels = [data] + features
if data_type == 'text':
return torch.cat([level.unsqueeze(2) for level in levels], 2)
else:
return levels[0]