Python data.Field方法代码示例

本文整理汇总了Python中torchtext.data.Field方法的典型用法代码示例。如果您正苦于以下问题：Python data.Field方法的具体用法？Python data.Field怎么用？Python data.Field使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torchtext.data的用法示例。

在下文中一共展示了data.Field方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_process

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_process(self):
        raw_field = data.RawField()
        field = data.Field(sequential=True, use_vocab=False, batch_first=True)

        # Test tensor-like batch data which is accepted by both RawField and Field
        batch = [[1, 2, 3], [2, 3, 4]]
        batch_tensor = torch.LongTensor(batch)

        raw_field_processed = raw_field.process(batch)
        field_processed = field.process(batch, device=-1, train=False)

        assert raw_field_processed == batch
        assert field_processed.data.equal(batch_tensor)

        # Test non-tensor data which is only accepted by RawField
        any_obj = [object() for _ in range(5)]

        raw_field_processed = raw_field.process(any_obj)
        assert any_obj == raw_field_processed

        with pytest.raises(TypeError):
            field.process(any_obj)

开发者ID:salesforce，项目名称:decaNLP，代码行数:24，代码来源:test_field.py

示例2: from_list

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def from_list(src_list, tgt_list=None, share_fields_from=None, **kwargs):
        if tgt_list is None:
            corpus = zip(src_list)
        else:
            corpus = zip(src_list, tgt_list)

        if share_fields_from is not None:
            src_field = share_fields_from.fields[src_field_name]
            if tgt_list is None:
                tgt_field = None
            else:
                tgt_field = share_fields_from.fields[tgt_field_name]
        else:
            # tokenize by character
            src_field = Field(batch_first=True, include_lengths=True, tokenize=list,
                              init_token=SOS, eos_token=EOS, unk_token=None)
            if tgt_list is None:
                tgt_field = None
            else:
                tgt_field = Field(batch_first=True, tokenize=list,
                                  init_token=SOS, eos_token=EOS, unk_token=None)

        return Seq2SeqDataset(corpus, src_field, tgt_field, **kwargs)

开发者ID:iotayo，项目名称:aivivn-tone，代码行数:25，代码来源:dataset.py

示例3: test_preprocess

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_preprocess(self):
        # Default case.
        field = data.Field()
        assert field.preprocess("Test string.") == ["Test", "string."]

        # Test that lowercase is properly applied.
        field_lower = data.Field(lower=True)
        assert field_lower.preprocess("Test string.") == ["test", "string."]

        # Test that custom preprocessing pipelines are properly applied.
        preprocess_pipeline = data.Pipeline(lambda x: x + "!")
        field_preprocessing = data.Field(preprocessing=preprocess_pipeline,
                                         lower=True)
        assert field_preprocessing.preprocess("Test string.") == ["test!", "string.!"]

        # Test that non-sequential data is properly handled.
        field_not_sequential = data.Field(sequential=False, lower=True,
                                          preprocessing=preprocess_pipeline)
        assert field_not_sequential.preprocess("Test string.") == "test string.!"

        # Non-regression test that we do not try to decode unicode strings to unicode
        field_not_sequential = data.Field(sequential=False, lower=True,
                                          preprocessing=preprocess_pipeline)
        assert field_not_sequential.preprocess("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎiᑕoᗪᕮ_tᕮ᙭t!"

开发者ID:salesforce，项目名称:decaNLP，代码行数:26，代码来源:test_field.py

示例4: test_numericalize_include_lengths

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_numericalize_include_lengths(self):
        self.write_test_ppid_dataset(data_format="tsv")
        question_field = data.Field(sequential=True, include_lengths=True)
        tsv_fields = [("id", None), ("q1", question_field),
                      ("q2", question_field), ("label", None)]
        tsv_dataset = data.TabularDataset(
            path=self.test_ppid_dataset_path, format="tsv",
            fields=tsv_fields)
        question_field.build_vocab(tsv_dataset)

        test_example_data = [["When", "do", "you", "use", "シ",
                              "instead", "of", "し?"],
                             ["What", "is", "2+2", "<pad>", "<pad>",
                              "<pad>", "<pad>", "<pad>"],
                             ["Here", "is", "a", "sentence", "with",
                              "some", "oovs", "<pad>"]]
        test_example_lengths = [8, 3, 7]

        # Test with include_lengths
        include_lengths_numericalized = question_field.numericalize(
            (test_example_data, test_example_lengths), device=-1)
        verify_numericalized_example(question_field,
                                     test_example_data,
                                     include_lengths_numericalized,
                                     test_example_lengths)

开发者ID:salesforce，项目名称:decaNLP，代码行数:27，代码来源:test_field.py

示例5: test_numericalize_batch_first

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_numericalize_batch_first(self):
        self.write_test_ppid_dataset(data_format="tsv")
        question_field = data.Field(sequential=True, batch_first=True)
        tsv_fields = [("id", None), ("q1", question_field),
                      ("q2", question_field), ("label", None)]
        tsv_dataset = data.TabularDataset(
            path=self.test_ppid_dataset_path, format="tsv",
            fields=tsv_fields)
        question_field.build_vocab(tsv_dataset)

        test_example_data = [["When", "do", "you", "use", "シ",
                              "instead", "of", "し?"],
                             ["What", "is", "2+2", "<pad>", "<pad>",
                              "<pad>", "<pad>", "<pad>"],
                             ["Here", "is", "a", "sentence", "with",
                              "some", "oovs", "<pad>"]]

        # Test with batch_first
        include_lengths_numericalized = question_field.numericalize(
            test_example_data, device=-1)
        verify_numericalized_example(question_field,
                                     test_example_data,
                                     include_lengths_numericalized,
                                     batch_first=True)

开发者ID:salesforce，项目名称:decaNLP，代码行数:26，代码来源:test_field.py

示例6: test_errors

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_errors(self):
        # Test that passing a non-tuple (of data and length) to numericalize
        # with Field.include_lengths = True raises an error.
        with self.assertRaises(ValueError):
            self.write_test_ppid_dataset(data_format="tsv")
            question_field = data.Field(sequential=True, include_lengths=True)
            tsv_fields = [("id", None), ("q1", question_field),
                          ("q2", question_field), ("label", None)]
            tsv_dataset = data.TabularDataset(
                path=self.test_ppid_dataset_path, format="tsv",
                fields=tsv_fields)
            question_field.build_vocab(tsv_dataset)
            test_example_data = [["When", "do", "you", "use", "シ",
                                  "instead", "of", "し?"],
                                 ["What", "is", "2+2", "<pad>", "<pad>",
                                  "<pad>", "<pad>", "<pad>"],
                                 ["Here", "is", "a", "sentence", "with",
                                  "some", "oovs", "<pad>"]]
            question_field.numericalize(
                test_example_data, device=-1)

开发者ID:salesforce，项目名称:decaNLP，代码行数:22，代码来源:test_field.py

示例7: init

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def __init__(self, emb_dim=50, mbsize=32):
        self.TEXT = data.Field(init_token='<start>', eos_token='<eos>', lower=True, tokenize='spacy', fix_length=16)
        self.LABEL = data.Field(sequential=False, unk_token=None)

        # Only take sentences with length <= 15
        f = lambda ex: len(ex.text) <= 15 and ex.label != 'neutral'

        train, val, test = datasets.SST.splits(
            self.TEXT, self.LABEL, fine_grained=False, train_subtrees=False,
            filter_pred=f
        )

        self.TEXT.build_vocab(train, vectors=GloVe('6B', dim=emb_dim))
        self.LABEL.build_vocab(train)

        self.n_vocab = len(self.TEXT.vocab.itos)
        self.emb_dim = emb_dim

        self.train_iter, self.val_iter, _ = data.BucketIterator.splits(
            (train, val, test), batch_size=mbsize, device=-1,
            shuffle=True, repeat=True
        )
        self.train_iter = iter(self.train_iter)
        self.val_iter = iter(self.val_iter)

开发者ID:wiseodd，项目名称:controlled-text-generation，代码行数:26，代码来源:dataset.py

示例8: get_sst

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def get_sst():    
    inputs = data.Field(lower='preserve-case')
    answers = data.Field(sequential=False, unk_token=None)

    # build with subtrees so inputs are right
    train_s, dev_s, test_s = datasets.SST.splits(inputs, answers, fine_grained = False, train_subtrees = True,
                                           filter_pred=lambda ex: ex.label != 'neutral')
    inputs.build_vocab(train_s, dev_s, test_s)
    answers.build_vocab(train_s)
    
    # rebuild without subtrees to get longer sentences
    train, dev, test = datasets.SST.splits(inputs, answers, fine_grained = False, train_subtrees = False,
                                       filter_pred=lambda ex: ex.label != 'neutral')
    
    train_iter, dev_iter, test_iter = data.BucketIterator.splits(
            (train, dev, test), batch_size=1, device=0)

    return inputs, answers, train_iter, dev_iter

开发者ID:jamie-murdoch，项目名称:ContextualDecomposition，代码行数:20，代码来源:sent_util.py

示例9: evaluate_predictions

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def evaluate_predictions(snapshot_file):
    print('loading', snapshot_file)
    try:  # load onto gpu
        model = torch.load(snapshot_file)
    except:  # load onto cpu
        model = torch.load(snapshot_file, map_location=lambda storage, loc: storage)
    inputs = data.Field()
    answers = data.Field(sequential=False, unk_token=None)

    train, dev, test = datasets.SST.splits(inputs, answers, fine_grained=False, train_subtrees=False,
                                           filter_pred=lambda ex: ex.label != 'neutral')
    inputs.build_vocab(train)
    answers.build_vocab(train)
    train_iter, dev_iter, test_iter = data.BucketIterator.splits(
        (train, dev, test), batch_size=1, device=0)
    train_iter.init_epoch() 
    for batch_idx, batch in enumerate(train_iter):
        print('batch_idx', batch_idx)
        out = model(batch)
        target = batch.label
        break
    return batch, out, target


# batch of [start, stop) with unigrams working

开发者ID:jamie-murdoch，项目名称:ContextualDecomposition，代码行数:27，代码来源:sent_util.py

示例10: load_mr

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def load_mr(text_field, label_field, batch_size):
    print('loading data')
    train_data, dev_data, test_data = MR.splits(text_field, label_field)
    text_field.build_vocab(train_data, dev_data, test_data)
    label_field.build_vocab(train_data, dev_data, test_data)
    print('building batches')
    train_iter, dev_iter, test_iter = data.Iterator.splits(
        (train_data, dev_data, test_data), batch_sizes=(batch_size, len(dev_data), len(test_data)),repeat=False,
        device = -1
    )

    return train_iter, dev_iter, test_iter
#
# text_field = data.Field(lower=True)
# label_field = data.Field(sequential=False)
# train_iter, dev_iter , test_iter = load_mr(text_field, label_field, batch_size=50)

开发者ID:malllabiisc，项目名称:DiPS，代码行数:18，代码来源:classification_datasets.py

示例11: test_batch_iter

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_batch_iter(self):
        self.write_test_numerical_features_dataset()
        FLOAT = data.Field(use_vocab=False, sequential=False,
                           dtype=torch.float)
        INT = data.Field(use_vocab=False, sequential=False, is_target=True)
        TEXT = data.Field(sequential=False)

        dst = data.TabularDataset(path=self.test_numerical_features_dataset_path,
                                  format="tsv", skip_header=False,
                                  fields=[("float", FLOAT),
                                          ("int", INT),
                                          ("text", TEXT)])
        TEXT.build_vocab(dst)
        itr = data.Iterator(dst, batch_size=2, device=-1, shuffle=False)
        fld_order = [k for k, v in dst.fields.items() if
                     v is not None and not v.is_target]
        batch = next(iter(itr))
        (x1, x2), y = batch
        x = (x1, x2)[fld_order.index("float")]
        self.assertEquals(y.data[0], 1)
        self.assertEquals(y.data[1], 12)
        self.assertAlmostEqual(x.data[0], 0.1, places=4)
        self.assertAlmostEqual(x.data[1], 0.5, places=4)

开发者ID:pytorch，项目名称:text，代码行数:25，代码来源:test_batch.py

示例12: test_input_with_newlines_in_text

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_input_with_newlines_in_text(self):
        # Smoke test for ensuring that TabularDataset works with files with newlines
        example_with_newlines = [("\"hello \n world\"", "1"),
                                 ("\"there is a \n newline\"", "0"),
                                 ("\"there is no newline\"", "1")]
        fields = [("text", data.Field(lower=True)),
                  ("label", data.Field(sequential=False))]

        for delim in [",", "\t"]:
            with open(self.test_newline_dataset_path, "wt") as f:
                for line in example_with_newlines:
                    f.write("{}\n".format(delim.join(line)))

            format_ = "csv" if delim == "," else "tsv"
            dataset = data.TabularDataset(
                path=self.test_newline_dataset_path, format=format_, fields=fields)
            # if the newline is not parsed correctly, this should raise an error
            for example in dataset:
                self.assert_(hasattr(example, "text"))
                self.assert_(hasattr(example, "label"))

开发者ID:pytorch，项目名称:text，代码行数:22，代码来源:test_dataset.py

示例13: test_process

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_process(self):
        raw_field = data.RawField()
        field = data.Field(sequential=True, use_vocab=False, batch_first=True)

        # Test tensor-like batch data which is accepted by both RawField and Field
        batch = [[1, 2, 3], [2, 3, 4]]
        batch_tensor = torch.LongTensor(batch)

        raw_field_processed = raw_field.process(batch)
        field_processed = field.process(batch)

        assert raw_field_processed == batch
        assert field_processed.data.equal(batch_tensor)

        # Test non-tensor data which is only accepted by RawField
        any_obj = [object() for _ in range(5)]

        raw_field_processed = raw_field.process(any_obj)
        assert any_obj == raw_field_processed

        with pytest.raises(TypeError):
            field.process(any_obj)

开发者ID:pytorch，项目名称:text，代码行数:24，代码来源:test_field.py

示例14: test_numericalize_basic

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_numericalize_basic(self):
        self.write_test_ppid_dataset(data_format="tsv")
        question_field = data.Field(sequential=True)
        tsv_fields = [("id", None), ("q1", question_field),
                      ("q2", question_field), ("label", None)]
        tsv_dataset = data.TabularDataset(
            path=self.test_ppid_dataset_path, format="tsv",
            fields=tsv_fields)
        question_field.build_vocab(tsv_dataset)

        test_example_data = [["When", "do", "you", "use", "シ",
                              "instead", "of", "し?"],
                             ["What", "is", "2+2", "<pad>", "<pad>",
                              "<pad>", "<pad>", "<pad>"],
                             ["Here", "is", "a", "sentence", "with",
                              "some", "oovs", "<pad>"]]

        # Test default
        default_numericalized = question_field.numericalize(test_example_data)
        verify_numericalized_example(question_field, test_example_data,
                                     default_numericalized)

开发者ID:pytorch，项目名称:text，代码行数:23，代码来源:test_field.py

示例15: test_numericalize_include_lengths

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Field [as 别名]
def test_numericalize_include_lengths(self):
        self.write_test_ppid_dataset(data_format="tsv")
        question_field = data.Field(sequential=True, include_lengths=True)
        tsv_fields = [("id", None), ("q1", question_field),
                      ("q2", question_field), ("label", None)]
        tsv_dataset = data.TabularDataset(
            path=self.test_ppid_dataset_path, format="tsv",
            fields=tsv_fields)
        question_field.build_vocab(tsv_dataset)

        test_example_data = [["When", "do", "you", "use", "シ",
                              "instead", "of", "し?"],
                             ["What", "is", "2+2", "<pad>", "<pad>",
                              "<pad>", "<pad>", "<pad>"],
                             ["Here", "is", "a", "sentence", "with",
                              "some", "oovs", "<pad>"]]
        test_example_lengths = [8, 3, 7]

        # Test with include_lengths
        include_lengths_numericalized = question_field.numericalize(
            (test_example_data, test_example_lengths))
        verify_numericalized_example(question_field,
                                     test_example_data,
                                     include_lengths_numericalized,
                                     test_example_lengths)

开发者ID:pytorch，项目名称:text，代码行数:27，代码来源:test_field.py

注：本文中的torchtext.data.Field方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。