当前位置: 首页>>代码示例>>Python>>正文


Python vocab.Vectors方法代码示例

本文整理汇总了Python中torchtext.vocab.Vectors方法的典型用法代码示例。如果您正苦于以下问题:Python vocab.Vectors方法的具体用法?Python vocab.Vectors怎么用?Python vocab.Vectors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torchtext.vocab的用法示例。


在下文中一共展示了vocab.Vectors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, vocabulary_size, word_embedding_dim, hidden_dim, num_layers, dropout, vector_path=None, non_static=False):
        super(BiLSTMWordEmbeddingLookup, self).__init__()

        self.vocabulary_size = vocabulary_size
        self.num_layers = num_layers
        self.word_embedding_dim = word_embedding_dim
        self.hidden_dim = hidden_dim

        self.output_dim = hidden_dim

        self.word_embeddings = nn.Embedding(self.vocabulary_size, self.word_embedding_dim).to(DEVICE)
        if vector_path:
            logger.info('logging word vectors from {}'.format(vector_path))
            word_vectors = Vectors(vector_path).vectors
            self.word_embeddings = self.word_embeddings.from_pretrained(word_vectors, freeze=not non_static).to(DEVICE)

        self.lstm = nn.LSTM(self.word_embedding_dim, self.hidden_dim // 2, bidirectional=True, num_layers=num_layers, dropout=dropout).to(DEVICE)

        self.hidden = self.init_hidden() 
开发者ID:smilelight,项目名称:lightNLP,代码行数:21,代码来源:word_embedding.py

示例2: __init__

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, args):
        super(TextCNN, self).__init__(args)

        self.class_num = args.class_num
        self.chanel_num = 1
        self.filter_num = args.filter_num
        self.filter_sizes = args.filter_sizes

        self.vocabulary_size = args.vocabulary_size
        self.embedding_dimension = args.embedding_dim
        self.embedding = nn.Embedding(self.vocabulary_size, self.embedding_dimension).to(DEVICE)
        if args.static:
            logger.info('logging word vectors from {}'.format(args.vector_path))
            vectors = Vectors(args.vector_path).vectors
            self.embedding = self.embedding.from_pretrained(vectors, freeze=not args.non_static).to(DEVICE)
        if args.multichannel:
            self.embedding2 = nn.Embedding(self.vocabulary_size, self.embedding_dimension).from_pretrained(args.vectors).to(DEVICE)
            self.chanel_num += 1
        else:
            self.embedding2 = None
        self.convs = nn.ModuleList(
            [nn.Conv2d(self.chanel_num, self.filter_num, (size, self.embedding_dimension)) for size in self.filter_sizes]).to(DEVICE)
        self.dropout = nn.Dropout(args.dropout).to(DEVICE)
        self.fc = nn.Linear(len(self.filter_sizes) * self.filter_num, self.class_num).to(DEVICE) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:26,代码来源:model.py

示例3: test_get_vector_data

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_get_vector_data(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
        self.assertIsInstance(vecs, Vectors)

        vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
        self.assertEqual(len(vec_data), 1)
        self.assertEqual(vec_data[0].vectors.size(), torch.Size([100, 300]))
        self.assertEqual(vec_data[0].dim, 300)

        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir) 
开发者ID:anhaidgroup,项目名称:deepmatcher,代码行数:21,代码来源:test_field.py

示例4: test_extend_vocab_1

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_extend_vocab_1(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        mf = MatchingField()
        lf = MatchingField(id=True, sequential=False)
        fields = [('id', lf), ('left_a', mf), ('right_a', mf), ('label', lf)]
        col_naming = {'id': 'id', 'label': 'label', 'left': 'left_', 'right': 'right_'}

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)

        data_path = os.path.join(test_dir_path, 'test_datasets', 'sample_table_small.csv')
        md = MatchingDataset(fields, col_naming, path=data_path)

        mf.build_vocab()
        mf.vocab.vectors = torch.Tensor(len(mf.vocab.itos), 300)
        mf.extend_vocab(md, vectors=vecs)
        self.assertEqual(len(mf.vocab.itos), 6)
        self.assertEqual(mf.vocab.vectors.size(), torch.Size([6, 300])) 
开发者ID:anhaidgroup,项目名称:deepmatcher,代码行数:26,代码来源:test_field.py

示例5: test_extend_vectors_1

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_extend_vectors_1(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
        self.assertIsInstance(vecs, Vectors)

        vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
        v = MatchingVocab(Counter())
        v.vectors = torch.Tensor(1, vec_data[0].dim)
        v.unk_init = torch.Tensor.zero_
        tokens = {'hello', 'world'}
        v.extend_vectors(tokens, vec_data)
        self.assertEqual(len(v.itos), 4)
        self.assertEqual(v.vectors.size(), torch.Size([4, 300]))
        self.assertEqual(list(v.vectors[2][0:10]), [0.0] * 10)
        self.assertEqual(list(v.vectors[3][0:10]), [0.0] * 10)

        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir) 
开发者ID:anhaidgroup,项目名称:deepmatcher,代码行数:27,代码来源:test_field.py

示例6: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, topic, batch_size=64, shuffle=True, device=0,
              vectors=None, unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to directory containing word vectors file
        :param topic: topic from which articles should be fetched
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train_path = os.path.join('TREC', 'robust45_aug_train_%s.tsv' % topic)
        dev_path = os.path.join('TREC', 'robust45_dev_%s.tsv' % topic)
        test_path = os.path.join('TREC', 'core17_10k_%s.tsv' % topic)
        train, val, test = cls.splits(path, train=train_path, validation=dev_path, test=test_path)
        cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
        return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:hedwig,代码行数:25,代码来源:robust45.py

示例7: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
              unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to directory containing word vectors file
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train, val, test = cls.splits(path)
        cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
        return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:hedwig,代码行数:21,代码来源:aapd.py

示例8: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, topic, batch_size=64, shuffle=True, device=0,
              vectors=None, unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to directory containing word vectors file
        :param topic: topic from which articles should be fetched
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train_path = os.path.join('TREC', 'robust04_train_%s.tsv' % topic)
        dev_path = os.path.join('TREC', 'robust04_dev_%s.tsv' % topic)
        test_path = os.path.join('TREC', 'core17_10k_%s.tsv' % topic)
        train, val, test = cls.splits(path, train=train_path, validation=dev_path, test=test_path)
        cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
        return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:hedwig,代码行数:25,代码来源:robust04.py

示例9: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None, unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to word vectors file
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train, val, test = cls.splits(path)

        cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)

        return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:castor,代码行数:22,代码来源:sts2014.py

示例10: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
              unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_dir: directory containing word vectors file
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param pt_file: load cached embedding file from disk if it is true
        :param unk_init: function used to generate vector for OOV words
        :return:
        """

        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train, validation, test = cls.splits(path)

        cls.LABEL_FIELD.build_vocab(train, validation, test)
        cls.TEXT_FIELD.build_vocab(train, validation, test, vectors=vectors)
        return BucketIterator.splits((train, validation, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:castor,代码行数:25,代码来源:quora.py

示例11: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None, unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to word vectors file
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train, test = cls.splits(path)

        cls.TEXT_FIELD.build_vocab(train, test, vectors=vectors)

        return BucketIterator.splits((train, test), batch_size=batch_size, repeat=False, shuffle=shuffle, device=device) 
开发者ID:castorini,项目名称:castor,代码行数:21,代码来源:msrvid.py

示例12: iters

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
              unk_init=torch.Tensor.zero_):
        """
        :param path: directory containing train, test, dev files
        :param vectors_name: name of word vectors file
        :param vectors_cache: path to directory containing word vectors file
        :param batch_size: batch size
        :param device: GPU device
        :param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
        :param unk_init: function used to generate vector for OOV words
        :return:
        """
        if vectors is None:
            vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)

        train, val, test = cls.splits(path)

        cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)

        return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
                                     sort_within_batch=True, device=device) 
开发者ID:castorini,项目名称:castor,代码行数:23,代码来源:sst.py

示例13: __init__

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, args):
        self.RAW = data.RawField()
        self.RAW.is_target = False
        tokenize = lambda x: list(x)
        self.TEXT = data.Field(batch_first=True, tokenize=tokenize)
        self.LABEL = data.Field(sequential=False, unk_token=None)
        self.train, self.dev, self.test = data.TabularDataset.splits(
            path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10',
            train='BQ_train.json',
            validation='BQ_dev.json',
            test='BQ_test.json',
            format='json',
            fields={"gold_label": ("label", self.LABEL),
                    "sentence1": ("q1", self.TEXT),
                    "sentence2": ("q2", self.TEXT),
                    "ID": ("id", self.RAW)})

        self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data))
        self.LABEL.build_vocab(self.train)

        sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2))
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True) 
开发者ID:ShannonAI,项目名称:glyce,代码行数:27,代码来源:semantic_similar_data.py

示例14: test_vocab_download_custom_vectors

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_vocab_download_custom_vectors(self):
        c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
        # Build a vocab and get vectors twice to test caching.
        for i in range(2):
            v = vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
                            vectors=Vectors('wiki.simple.vec',
                                            url=FastText.url_base.format('simple')))

            self.assertEqual(v.itos, ['<unk>', '<pad>', '<bos>',
                                      'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T', 'hello', 'world'])
            vectors = v.vectors.numpy()

            # The first 5 entries in each vector.
            expected_fasttext_simple_en = {
                'hello': [0.39567, 0.21454, -0.035389, -0.24299, -0.095645],
                'world': [0.10444, -0.10858, 0.27212, 0.13299, -0.33165],
            }

            for word in expected_fasttext_simple_en:
                assert_allclose(vectors[v.stoi[word], :5],
                                expected_fasttext_simple_en[word])

            assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
        # Delete the vectors after we're done to save disk space on CI
        if os.environ.get("TRAVIS") == "true":
            vec_file = os.path.join(self.project_root, ".vector_cache", "wiki.simple.vec")
            conditional_remove(vec_file) 
开发者ID:salesforce,项目名称:decaNLP,代码行数:29,代码来源:test_vocab.py

示例15: test_errors

# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_errors(self):
        c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
        with self.assertRaises(ValueError):
            # Test proper error raised when using unknown string alias
            vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
                        vectors=["fasttext.english.300d"])
            vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
                        vectors="fasttext.english.300d")
        with self.assertRaises(ValueError):
            # Test proper error is raised when vectors argument is
            # non-string or non-Vectors
            vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
                        vectors={"word": [1, 2, 3]}) 
开发者ID:salesforce,项目名称:decaNLP,代码行数:15,代码来源:test_vocab.py


注:本文中的torchtext.vocab.Vectors方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。