当前位置: 首页>>代码示例>>Python>>正文


Python data.Pipeline方法代码示例

本文整理汇总了Python中torchtext.data.Pipeline方法的典型用法代码示例。如果您正苦于以下问题:Python data.Pipeline方法的具体用法?Python data.Pipeline怎么用?Python data.Pipeline使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torchtext.data的用法示例。


在下文中一共展示了data.Pipeline方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_composition

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_composition(self):
        id_pipeline = data.Pipeline()
        pipeline = data.Pipeline(TestPipeline.repeat_n)
        pipeline.add_before(id_pipeline)
        pipeline.add_after(id_pipeline)
        pipeline.add_before(six.text_type.lower)
        pipeline.add_after(six.text_type.capitalize)

        other_pipeline = data.Pipeline(six.text_type.swapcase)
        other_pipeline.add_before(pipeline)

        # Assert pipeline gives proper results after composition
        # (test that we aren't modfifying pipes member)
        assert pipeline("teST") == "Testtesttest"
        assert pipeline(["ElE1", "eLe2"]) == ["Ele1ele1ele1", "Ele2ele2ele2"]

        # Assert pipeline that we added to gives proper results
        assert other_pipeline("teST") == "tESTTESTTEST"
        assert other_pipeline(["ElE1", "eLe2"]) == ["eLE1ELE1ELE1", "eLE2ELE2ELE2"] 
开发者ID:salesforce,项目名称:decaNLP,代码行数:21,代码来源:test_pipeline.py

示例2: test_preprocess

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_preprocess(self):
        # Default case.
        field = data.Field()
        assert field.preprocess("Test string.") == ["Test", "string."]

        # Test that lowercase is properly applied.
        field_lower = data.Field(lower=True)
        assert field_lower.preprocess("Test string.") == ["test", "string."]

        # Test that custom preprocessing pipelines are properly applied.
        preprocess_pipeline = data.Pipeline(lambda x: x + "!")
        field_preprocessing = data.Field(preprocessing=preprocess_pipeline,
                                         lower=True)
        assert field_preprocessing.preprocess("Test string.") == ["test!", "string.!"]

        # Test that non-sequential data is properly handled.
        field_not_sequential = data.Field(sequential=False, lower=True,
                                          preprocessing=preprocess_pipeline)
        assert field_not_sequential.preprocess("Test string.") == "test string.!"

        # Non-regression test that we do not try to decode unicode strings to unicode
        field_not_sequential = data.Field(sequential=False, lower=True,
                                          preprocessing=preprocess_pipeline)
        assert field_not_sequential.preprocess("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎiᑕoᗪᕮ_tᕮ᙭t!" 
开发者ID:salesforce,项目名称:decaNLP,代码行数:26,代码来源:test_field.py

示例3: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def __init__(self, text_field, label_field, path=None, examples=None, **kwargs):
        """Create an MR dataset instance given a path and fields.
        Arguments:
            text_field: The field that will be used for text data.
            label_field: The field that will be used for label data.
            path: Path to the data file.
            examples: The examples contain all the data.
            Remaining keyword arguments: Passed to the constructor of
                data.Dataset.
        """
        # text_field.preprocessing = data.Pipeline(clean_str)
        fields = [('text', text_field), ('label', label_field)]
        if examples is None:
            path = self.dirname if path is None else path
            examples = []
            with codecs.open(os.path.join(path, 'rt-polarity.neg'),'r','utf8') as f:
                examples += [
                    data.Example.fromlist([line, 'negative'], fields) for line in f]
            with codecs.open(os.path.join(path, 'rt-polarity.pos'),'r','utf8') as f:
                examples += [
                    data.Example.fromlist([line, 'positive'], fields) for line in f]
        super(MR, self).__init__(examples, fields, **kwargs) 
开发者ID:malllabiisc,项目名称:DiPS,代码行数:24,代码来源:classification_datasets.py

示例4: test_composition

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_composition(self):
        id_pipeline = data.Pipeline()
        pipeline = data.Pipeline(TestPipeline.repeat_n)
        pipeline.add_before(id_pipeline)
        pipeline.add_after(id_pipeline)
        pipeline.add_before(str.lower)
        pipeline.add_after(str.capitalize)

        other_pipeline = data.Pipeline(str.swapcase)
        other_pipeline.add_before(pipeline)

        # Assert pipeline gives proper results after composition
        # (test that we aren't modfifying pipes member)
        assert pipeline("teST") == "Testtesttest"
        assert pipeline(["ElE1", "eLe2"]) == ["Ele1ele1ele1", "Ele2ele2ele2"]

        # Assert pipeline that we added to gives proper results
        assert other_pipeline("teST") == "tESTTESTTEST"
        assert other_pipeline(["ElE1", "eLe2"]) == ["eLE1ELE1ELE1", "eLE2ELE2ELE2"] 
开发者ID:pytorch,项目名称:text,代码行数:21,代码来源:test_pipeline.py

示例5: test_pipeline

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_pipeline(self):
        id_pipeline = data.Pipeline()
        assert id_pipeline("Test STring") == "Test STring"
        assert id_pipeline("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T"
        assert id_pipeline(["1241", "Some String"]) == ["1241", "Some String"]

        pipeline = data.Pipeline(six.text_type.lower)
        assert pipeline("Test STring") == "test string"
        assert pipeline("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎiᑕoᗪᕮ_tᕮ᙭t"
        assert pipeline(["1241", "Some String"]) == ["1241", "some string"]

        args_pipeline = data.Pipeline(TestPipeline.repeat_n)
        assert args_pipeline("test", 5) == "testtesttesttesttest"
        assert args_pipeline(["ele1", "ele2"], 2) == ["ele1ele1", "ele2ele2"] 
开发者ID:salesforce,项目名称:decaNLP,代码行数:16,代码来源:test_pipeline.py

示例6: test_exceptions

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_exceptions(self):
        with self.assertRaises(ValueError):
            data.Pipeline("Not Callable") 
开发者ID:salesforce,项目名称:decaNLP,代码行数:5,代码来源:test_pipeline.py

示例7: test_pipeline

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def test_pipeline(self):
        id_pipeline = data.Pipeline()
        assert id_pipeline("Test STring") == "Test STring"
        assert id_pipeline("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T"
        assert id_pipeline(["1241", "Some String"]) == ["1241", "Some String"]

        pipeline = data.Pipeline(str.lower)
        assert pipeline("Test STring") == "test string"
        assert pipeline("ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T") == "ᑌᑎiᑕoᗪᕮ_tᕮ᙭t"
        assert pipeline(["1241", "Some String"]) == ["1241", "some string"]

        args_pipeline = data.Pipeline(TestPipeline.repeat_n)
        assert args_pipeline("test", 5) == "testtesttesttesttest"
        assert args_pipeline(["ele1", "ele2"], 2) == ["ele1ele1", "ele2ele2"] 
开发者ID:pytorch,项目名称:text,代码行数:16,代码来源:test_pipeline.py

示例8: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def __init__(self, args):
        if not args.cuda:
            args.gpu = -1
        if torch.cuda.is_available() and args.cuda:
            print("Note: You are using GPU for training")
            torch.cuda.set_device(args.gpu)
            torch.cuda.manual_seed(args.seed)
        if torch.cuda.is_available() and not args.cuda:
            print("Warning: You have Cuda but do not use it. You are using CPU for training")

        torch.manual_seed(args.seed)
        np.random.seed(args.seed)
        random.seed(args.seed)

        self.QID = data.Field(sequential=False)
        self.QUESTION = data.Field(batch_first=True)
        self.ANSWER = data.Field(batch_first=True)
        self.LABEL = data.Field(sequential=False)
        self.EXTERNAL = data.Field(sequential=True, tensor_type=torch.FloatTensor, batch_first=True, use_vocab=False,
                              postprocessing=data.Pipeline(lambda arr, _, train: [float(y) for y in arr]))

        if 'TrecQA' in args.dataset:
            train, dev, test = TrecDataset.splits(self.QID, self.QUESTION, self.ANSWER, self.EXTERNAL, self.LABEL)
        elif 'WikiQA' in args.dataset:
            train, dev, test = WikiDataset.splits(self.QID, self.QUESTION, self.ANSWER, self.EXTERNAL, self.LABEL)
        else:
            print("Unsupported dataset")
            exit()

        self.QID.build_vocab(train, dev, test)
        self.QUESTION.build_vocab(train, dev, test)
        self.ANSWER.build_vocab(train, dev, test)
        self.LABEL.build_vocab(train, dev, test)

        if args.cuda:
            self.model = torch.load(args.model, map_location=lambda storage, location: storage.cuda(args.gpu))
        else:
            self.model = torch.load(args.model, map_location=lambda storage, location: storage)

        self.gpu = args.gpu 
开发者ID:castorini,项目名称:castor,代码行数:42,代码来源:bridge.py

示例9: get_E2E_loaders

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def get_E2E_loaders(path, valid=0.1, batch_size=32):
    utterance = data.Field(tokenize=tokenizer, lower=True)
    label     = data.Field(sequential=False, postprocessing=Pipeline(convert_token=convert_token))
    id        = data.Field(use_vocab=False,sequential=False)
    fields = [('id', id),
              ('turn1', utterance),
              ('turn2', utterance),
              ('turn3', utterance),
              ('label', label)]

    train = data.TabularDataset('{}/train.txt'.format(path),
                                format='tsv',
                                fields=fields,
                                skip_header=True)
    valid = data.TabularDataset('{}/valid.txt'.format(path),
                                format='tsv',
                                fields=fields,
                                skip_header=True)

    test = data.TabularDataset('{}/test.txt'.format(path),
                                format='tsv',
                                fields=fields,
                                skip_header=True)
    vectors = vocab.Vectors(name='emojiplusglove.txt', cache='/media/backup/nlp-cic/DialogueRNN/')
    utterance.build_vocab(train, valid, test, vectors=vectors)
    #utterance.build_vocab(train, valid, test, vectors='glove.840B.300d')
    label.build_vocab(train)
    train_iter = BucketIterator(train,
                                  train=True,
                                  batch_size=batch_size,
                                  sort_key=lambda x: len(x.turn3),
                                  device=torch.device(0))
    valid_iter = BucketIterator(valid,
                                  batch_size=batch_size,
                                  sort_key=lambda x: len(x.turn3),
                                  device=torch.device(0))
    test_iter = BucketIterator(test,
                                  batch_size=batch_size,
                                  sort_key=lambda x: len(x.turn3),
                                  device=torch.device(0))
    return train_iter, valid_iter, test_iter,\
            utterance.vocab.vectors if not args.cuda else utterance.vocab.vectors.cuda(),\
            label.vocab.itos 
开发者ID:declare-lab,项目名称:conv-emotion,代码行数:45,代码来源:train_E2E.py

示例10: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def __init__(self, text_field, label_field, path=None, text_cnt=1000, examples=None, **kwargs):
        """Create an MR dataset instance given a path and fields.

        Arguments:
            text_field: The field that will be used for text data.

            label_field: The field that will be used for label data.
            path: Path to the data file.
            examples: The examples contain all the data.
            Remaining keyword arguments: Passed to the constructor of
                data.Dataset.
        """

        def clean_str(string):
            """
            Tokenization/string cleaning for all datasets except for SST.
            Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
            """
            string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
            string = re.sub(r"\'s", " \'s", string)
            string = re.sub(r"\'ve", " \'ve", string)
            string = re.sub(r"n\'t", " n\'t", string)
            string = re.sub(r"\'re", " \'re", string)
            string = re.sub(r"\'d", " \'d", string)
            string = re.sub(r"\'ll", " \'ll", string)
            string = re.sub(r",", " , ", string)
            string = re.sub(r"!", " ! ", string)
            string = re.sub(r"\(", " \( ", string)
            string = re.sub(r"\)", " \) ", string)
            string = re.sub(r"\?", " \? ", string)
            string = re.sub(r"\s{2,}", " ", string)
            return string.strip().lower()

        text_field.preprocessing = data.Pipeline(clean_str)
        fields = [('text', text_field), ('label', label_field)]

        categories = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']
        if examples is None:
            path = self.dirname if path is None else path
            examples = []
            for sub_path in categories:

                sub_path_one = os.path.join(path, sub_path)
                sub_paths_two = os.listdir(sub_path_one)
                cnt = 0
                for sub_path_two in sub_paths_two:
                    lines = ""
                    with open(os.path.join(sub_path_one, sub_path_two), encoding="utf8", errors='ignore') as f:
                        lines = f.read()
                    examples += [data.Example.fromlist([lines, sub_path], fields)]
                    cnt += 1

        super(NEWS_20, self).__init__(examples, fields, **kwargs) 
开发者ID:xiaobaoonline,项目名称:pytorch-in-action,代码行数:55,代码来源:mydatasets.py

示例11: get_input_processor_words

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def get_input_processor_words(vocab_word, vocab_char=None, convert_digits=True):
    """
    Returns a function that converts text into a processed batch. Required duing
    inference.
    Parameters:
        vocab_word: Instance of torchtext.Vocab for input word vocabulary
        vocab_char[optional]: Instance of torchtext.Vocab for input per-word 
                              character vocabulary
        convert_digits: If True will convert numbers to single 0's
    """
    inputs_word = data.Field(init_token="<bos>", eos_token="<eos>", batch_first=True, lower=True,
                                preprocessing=data.Pipeline(
                                    lambda w: '0' if convert_digits and w.isdigit() else w ))
    # Set the vocab object manually without building from training dataset
    inputs_word.vocab = vocab_word

    if vocab_char is not None:
        inputs_char_nesting = data.Field(tokenize=list, init_token="<bos>", eos_token="<eos>", 
                                        batch_first=True)

        inputs_char = data.NestedField(inputs_char_nesting, 
                                        init_token="<bos>", eos_token="<eos>")
        # Set the vocab object manually without building from training dataset
        inputs_char.vocab = inputs_char_nesting.vocab = vocab_char
        
        fields = [(('inputs_word', 'inputs_char'), (inputs_word, inputs_char))]
    else:
        fields = [('inputs_word', inputs_word)]


    def input_processor_fn(inputs):
        if not isinstance(inputs, list):
            inputs = [inputs]

        examples = []
        for line in inputs:
            examples.append(data.Example.fromlist([line], fields))
        
        dataset = data.Dataset(examples, fields)
        # Entire input in one batch
        return data.Batch(data=dataset, 
                          dataset=dataset,
                          device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))

    return input_processor_fn 
开发者ID:kolloldas,项目名称:torchnlp,代码行数:47,代码来源:inputs.py

示例12: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def __init__(self, text_field, label_field, path=None, examples=None, **kwargs):
        """Create an MR dataset instance given a path and fields.

        Arguments:
            text_field: The field that will be used for text data.
            label_field: The field that will be used for label data.
            path: Path to the data file.
            examples: The examples contain all the data.
            Remaining keyword arguments: Passed to the constructor of
                data.Dataset.
        """
        def clean_str(string):
            """
            Tokenization/string cleaning for all datasets except for SST.
            Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
            """
            string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
            string = re.sub(r"\'s", " \'s", string)
            string = re.sub(r"\'ve", " \'ve", string)
            string = re.sub(r"n\'t", " n\'t", string)
            string = re.sub(r"\'re", " \'re", string)
            string = re.sub(r"\'d", " \'d", string)
            string = re.sub(r"\'ll", " \'ll", string)
            string = re.sub(r",", " , ", string)
            string = re.sub(r"!", " ! ", string)
            string = re.sub(r"\(", " \( ", string)
            string = re.sub(r"\)", " \) ", string)
            string = re.sub(r"\?", " \? ", string)
            string = re.sub(r"\s{2,}", " ", string)
            return string.strip()

        text_field.preprocessing = data.Pipeline(clean_str)
        fields = [('text', text_field), ('label', label_field)]

        if examples is None:
            path = self.dirname if path is None else path
            examples = []
            with open(os.path.join(path, 'rt-polarity.neg'), errors='ignore') as f:
                examples += [
                    data.Example.fromlist([line, 'negative'], fields) for line in f]
            with open(os.path.join(path, 'rt-polarity.pos'), errors='ignore') as f:
                examples += [
                    data.Example.fromlist([line, 'positive'], fields) for line in f]
        super(MR, self).__init__(examples, fields, **kwargs) 
开发者ID:Shawn1993,项目名称:cnn-text-classification-pytorch,代码行数:46,代码来源:mydatasets.py

示例13: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import Pipeline [as 别名]
def __init__(self, text_field, label_field, path=None, examples=None, **kwargs):
        """Create an MR dataset instance given a path and fields.

        Arguments:
            text_field: The field that will be used for text data.
            label_field: The field that will be used for label data.
            path: Path to the data file.
            examples: The examples contain all the data.
            Remaining keyword arguments: Passed to the constructor of
                data.Dataset.
        """
        def clean_str(string):
            """
            Tokenization/string cleaning for all datasets except for SST.
            Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
            """
            string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
            string = re.sub(r"\'s", " \'s", string)
            string = re.sub(r"\'ve", " \'ve", string)
            string = re.sub(r"n\'t", " n\'t", string)
            string = re.sub(r"\'re", " \'re", string)
            string = re.sub(r"\'d", " \'d", string)
            string = re.sub(r"\'ll", " \'ll", string)
            string = re.sub(r",", " , ", string)
            string = re.sub(r"!", " ! ", string)
            string = re.sub(r"\(", " \( ", string)
            string = re.sub(r"\)", " \) ", string)
            string = re.sub(r"\?", " \? ", string)
            string = re.sub(r"\s{2,}", " ", string)
            return string.strip()

        text_field.preprocessing = data.Pipeline(clean_str)
        fields = [('text', text_field), ('label', label_field)]

        if examples is None:
            path = self.dirname if path is None else path
            examples = []
            with codecs.open(os.path.join(path, 'rt-polarity.neg'), encoding='utf-8', errors='ignore') as f:
                examples += [
                    data.Example.fromlist([line, 'negative'], fields) for line in f]
            with codecs.open(os.path.join(path, 'rt-polarity.pos'), encoding='utf-8', errors='ignore') as f:
                examples += [
                    data.Example.fromlist([line, 'positive'], fields) for line in f]
        super(MR, self).__init__(examples, fields, **kwargs) 
开发者ID:srviest,项目名称:char-cnn-text-classification-pytorch,代码行数:46,代码来源:mydatasets.py


注:本文中的torchtext.data.Pipeline方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。