Python torchtext.data方法代码示例

本文整理汇总了Python中torchtext.data方法的典型用法代码示例。如果您正苦于以下问题：Python torchtext.data方法的具体用法？Python torchtext.data怎么用？Python torchtext.data使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torchtext的用法示例。

在下文中一共展示了torchtext.data方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_fields

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def get_fields(data_type, n_src_features, n_tgt_features):
    """
    Args:
        data_type: type of the source input. Options are [text|img|audio].
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.

    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    if data_type == 'text':
        return TextDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'img':
        return ImageDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'video':
        return VideoDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'audio':
        return AudioDataset.get_fields(n_src_features, n_tgt_features)

开发者ID:xiadingZ，项目名称:video-caption-openNMT.pytorch，代码行数:23，代码来源:IO.py

示例2: create_batches

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def create_batches(self):
        """ Create batches """
        if self.train:
            def _pool(data, random_shuffler):
                for p in torchtext.data.batch(data, self.batch_size * 100):
                    p_batch = torchtext.data.batch(
                        sorted(p, key=self.sort_key),
                        self.batch_size, self.batch_size_fn)
                    for b in random_shuffler(list(p_batch)):
                        yield b

            self.batches = _pool(self.data(), self.random_shuffler)
        else:
            self.batches = []
            for b in torchtext.data.batch(self.data(), self.batch_size,
                                          self.batch_size_fn):
                self.batches.append(sorted(b, key=self.sort_key))

开发者ID:lizekang，项目名称:ITDD，代码行数:19，代码来源:inputter.py

示例3: _merge_field_vocabs

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def _merge_field_vocabs(knl_field, src_field, tgt_field, vocab_size, min_freq):
    # in the long run, shouldn't it be possible to do this by calling
    # build_vocab with both the src and tgt data?
    specials = [tgt_field.unk_token, tgt_field.pad_token,
                tgt_field.init_token, tgt_field.eos_token]
    merged = sum(
        [knl_field.vocab.freqs, src_field.vocab.freqs, tgt_field.vocab.freqs], Counter()
    )
    merged_vocab = Vocab(
        merged, specials=specials,
        max_size=vocab_size, min_freq=min_freq
    )
    knl_field.vocab = merged_vocab
    src_field.vocab = merged_vocab
    tgt_field.vocab = merged_vocab
    assert len(src_field.vocab) == len(tgt_field.vocab) == len(knl_field.vocab)

开发者ID:lizekang，项目名称:ITDD，代码行数:18，代码来源:inputter.py

示例4: load_dataloaders

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def load_dataloaders(args):
    logger.info("Preparing dataloaders...")
    FR = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, init_token="<sos>", eos_token="<eos>",\
                              batch_first=True)
    EN = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, batch_first=True)
    
    train_path = os.path.join("./data/", "df.csv")
    if not os.path.isfile(train_path):
        tokenize_data(args)
    train = torchtext.data.TabularDataset(train_path, format="csv", \
                                             fields=[("EN", EN), ("FR", FR)])
    FR.build_vocab(train)
    EN.build_vocab(train)
    train_iter = BucketIterator(train, batch_size=args.batch_size, repeat=False, sort_key=lambda x: (len(x["EN"]), len(x["FR"])),\
                                shuffle=True, train=True)
    train_length = len(train)
    logger.info("Loaded dataloaders.")
    return train_iter, FR, EN, train_length

开发者ID:plkmo，项目名称:NLP_Toolkit，代码行数:20，代码来源:preprocessing_funcs.py

示例5: get_fields

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def get_fields(data_type, n_src_features, n_tgt_features):
    """
    Args:
        data_type: type of the source input. Options are [text|img|audio].
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.

    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    if data_type == 'text':
        return TextDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'img':
        return ImageDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'audio':
        return AudioDataset.get_fields(n_src_features, n_tgt_features)

开发者ID:abaheti95，项目名称:DC-NeuralConversation，代码行数:21，代码来源:IO.py

示例6: _old_style_vocab

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def _old_style_vocab(vocab):
    """Detect old-style vocabs (``List[Tuple[str, torchtext.data.Vocab]]``).

    Args:
        vocab: some object loaded from a *.vocab.pt file

    Returns:
        Whether ``vocab`` is a list of pairs where the second object
        is a :class:`torchtext.vocab.Vocab` object.

    This exists because previously only the vocab objects from the fields
    were saved directly, not the fields themselves, and the fields needed to
    be reconstructed at training and translation time.
    """

    return isinstance(vocab, list) and \
        any(isinstance(v[1], Vocab) for v in vocab)

开发者ID:harvardnlp，项目名称:encoder-agnostic-adaptation，代码行数:19，代码来源:inputter.py

示例7: _merge_field_vocabs

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def _merge_field_vocabs(src_field, tgt_field, vocab_size, min_freq,
                        vocab_size_multiple):
    # in the long run, shouldn't it be possible to do this by calling
    # build_vocab with both the src and tgt data?
    specials = [tgt_field.unk_token, tgt_field.pad_token,
                tgt_field.init_token, tgt_field.eos_token]
    merged = sum(
        [src_field.vocab.freqs, tgt_field.vocab.freqs], Counter()
    )
    merged_vocab = Vocab(
        merged, specials=specials,
        max_size=vocab_size, min_freq=min_freq
    )
    if vocab_size_multiple > 1:
        _pad_vocab_to_multiple(merged_vocab, vocab_size_multiple)
    src_field.vocab = merged_vocab
    tgt_field.vocab = merged_vocab
    assert len(src_field.vocab) == len(tgt_field.vocab)

开发者ID:harvardnlp，项目名称:encoder-agnostic-adaptation，代码行数:20，代码来源:inputter.py

示例8: create_batches

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def create_batches(self):
        if self.train:
            def _pool(data, random_shuffler):
                for p in torchtext.data.batch(data, self.batch_size * 100):
                    p_batch = batch_iter(
                        sorted(p, key=self.sort_key),
                        self.batch_size,
                        batch_size_fn=self.batch_size_fn,
                        batch_size_multiple=self.batch_size_multiple)
                    for b in random_shuffler(list(p_batch)):
                        yield b

            self.batches = _pool(self.data(), self.random_shuffler)
        else:
            self.batches = []
            for b in batch_iter(
                    self.data(),
                    self.batch_size,
                    batch_size_fn=self.batch_size_fn,
                    batch_size_multiple=self.batch_size_multiple):
                self.batches.append(sorted(b, key=self.sort_key))

开发者ID:harvardnlp，项目名称:encoder-agnostic-adaptation，代码行数:23，代码来源:inputter.py

示例9: get_fields

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def get_fields(data_type, n_src_features, n_tgt_features):
    """
    Args:
        data_type: type of the source input. Options are [text|img|audio].
        n_src_features: the number of source features to
            create `torchtext.data.Field` for.
        n_tgt_features: the number of target features to
            create `torchtext.data.Field` for.

    Returns:
        A dictionary whose keys are strings and whose values are the
        corresponding Field objects.
    """
    if data_type == 'text':
        return TextDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'img':
        return ImageDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'audio':
        return AudioDataset.get_fields(n_src_features, n_tgt_features)
    elif data_type == 'gcn':
        return GCNDataset.get_fields(n_src_features, n_tgt_features)

开发者ID:diegma，项目名称:graph-2-text，代码行数:23，代码来源:IO.py

示例10: get_morph

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def get_morph(batch):

    #Not very nice but we do not have access to value comming from opt.gpuid command line parameter here.
    use_cuda = batch.src[0].is_cuda

    # morph_index = batch.morph.data.transpose(0, 1)  # [ seqLen x batch_size ] ==> [ batch_size x seqLen ]

    # morph_voc = batch.dataset.fields['morph'].vocab.stoi

    morph_index = batch.morph.view((batch.src[0].data.size()[0], 6, batch.src[0].data.size()[1]))
    morph_index = morph_index.permute(2, 0, 1).contiguous()



    # morph_index = torch.LongTensor(morph_index)
    morph_mask = torch.lt(torch.eq(morph_index, 1), 1).float()
    # morph_index = autograd.Variable(morph_index)
    # morph_mask = autograd.Variable(torch.FloatTensor(morph_mask), requires_grad=False)
    if use_cuda:
        morph_index = morph_index.cuda()
        morph_mask = morph_mask.cuda()

    return morph_index, morph_mask

开发者ID:diegma，项目名称:graph-2-text，代码行数:25，代码来源:IO.py

示例11: make_features

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def make_features(batch, side):
    """
    Args:
        batch (Variable): a batch of source or target data.
        side (str): for source or for target.
    Returns:
        A sequence of src/tgt tensors with optional feature tensors
        of size (len x batch).
    """
    assert side in ['src', 'tgt']
    if isinstance(batch.__dict__[side], tuple):
        data = batch.__dict__[side][0]
    else:
        data = batch.__dict__[side]
    feat_start = side + "_feat_"
    features = sorted(batch.__dict__[k]
                      for k in batch.__dict__ if feat_start in k)
    levels = [data] + features
    return torch.cat([level.unsqueeze(2) for level in levels], 2)

开发者ID:antspy，项目名称:quantized_distillation，代码行数:21，代码来源:IO.py

示例12: collapse_copy_scores

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def collapse_copy_scores(self, scores, batch, tgt_vocab):
        """Given scores from an expanded dictionary
        corresponeding to a batch, sums together copies,
        with a dictionary word when it is ambigious.
        """
        offset = len(tgt_vocab)
        for b in range(batch.batch_size):
            index = batch.indices.data[b]
            src_vocab = self.src_vocabs[index]
            for i in range(1, len(src_vocab)):
                sw = src_vocab.itos[i]
                ti = tgt_vocab.stoi[sw]
                if ti != 0:
                    scores[:, b, ti] += scores[:, b, offset + i]
                    scores[:, b, offset + i].fill_(1e-20)
        return scores

开发者ID:antspy，项目名称:quantized_distillation，代码行数:18，代码来源:IO.py

示例13: predict

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def predict(test_mode, dataset_iter):
    model.eval()
    dataset_iter.init_epoch()
    qids = []
    predictions = []
    labels = []
    for dev_batch_idx, dev_batch in enumerate(dataset_iter):
        qid_array = np.transpose(dev_batch.id.cpu().data.numpy())
        true_label_array = np.transpose(dev_batch.label.cpu().data.numpy())
        output = model.convModel(dev_batch)
        scores = model.linearLayer(output)
        score_array = scores.cpu().data.numpy().reshape(-1)
        qids.extend(qid_array.tolist())
        predictions.extend(score_array.tolist())
        labels.extend(true_label_array.tolist())

    dev_map, dev_mrr = get_map_mrr(qids, predictions, labels)

    logger.info("{} {}".format(dev_map, dev_mrr))


# Run the model on the dev set

开发者ID:castorini，项目名称:castor，代码行数:24，代码来源:main.py

示例14: create_batches

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def create_batches(self):
        if self.train:
            if self.yield_raw_example:
                self.batches = batch_iter(
                    self.data(),
                    1,
                    batch_size_fn=None,
                    batch_size_multiple=1)
            else:
                self.batches = _pool(
                    self.data(),
                    self.batch_size,
                    self.batch_size_fn,
                    self.batch_size_multiple,
                    self.sort_key,
                    self.random_shuffler,
                    self.pool_factor)
        else:
            self.batches = []
            for b in batch_iter(
                    self.data(),
                    self.batch_size,
                    batch_size_fn=self.batch_size_fn,
                    batch_size_multiple=self.batch_size_multiple):
                self.batches.append(sorted(b, key=self.sort_key))

开发者ID:OpenNMT，项目名称:OpenNMT-py，代码行数:27，代码来源:inputter.py

示例15: make_features

# 需要导入模块: import torchtext [as 别名]
# 或者: from torchtext import data [as 别名]
def make_features(batch, side, data_type='text'):
    """
    Args:
        batch (Variable): a batch of source or target data.
        side (str): for source or for target.
        data_type (str): type of the source input. Options are [text|img].
    Returns:
        A sequence of src/tgt tensors with optional feature tensors
        of size (len x batch).
    """
    assert side in ['src', 'tgt']
    if isinstance(batch.__dict__[side], tuple):
        data = batch.__dict__[side][0]
    else:
        data = batch.__dict__[side]

    feat_start = side + "_feat_"
    keys = sorted([k for k in batch.__dict__ if feat_start in k])
    features = [batch.__dict__[k] for k in keys]
    levels = [data] + features

    if data_type == 'text':
        return torch.cat([level.unsqueeze(2) for level in levels], 2)
    else:
        return levels[0]

开发者ID:moonlightlane，项目名称:QG-Net，代码行数:27，代码来源:IO.py

注：本文中的torchtext.data方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。