本文整理汇总了Python中torchtext.vocab.Vectors方法的典型用法代码示例。如果您正苦于以下问题:Python vocab.Vectors方法的具体用法?Python vocab.Vectors怎么用?Python vocab.Vectors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torchtext.vocab
的用法示例。
在下文中一共展示了vocab.Vectors方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, vocabulary_size, word_embedding_dim, hidden_dim, num_layers, dropout, vector_path=None, non_static=False):
super(BiLSTMWordEmbeddingLookup, self).__init__()
self.vocabulary_size = vocabulary_size
self.num_layers = num_layers
self.word_embedding_dim = word_embedding_dim
self.hidden_dim = hidden_dim
self.output_dim = hidden_dim
self.word_embeddings = nn.Embedding(self.vocabulary_size, self.word_embedding_dim).to(DEVICE)
if vector_path:
logger.info('logging word vectors from {}'.format(vector_path))
word_vectors = Vectors(vector_path).vectors
self.word_embeddings = self.word_embeddings.from_pretrained(word_vectors, freeze=not non_static).to(DEVICE)
self.lstm = nn.LSTM(self.word_embedding_dim, self.hidden_dim // 2, bidirectional=True, num_layers=num_layers, dropout=dropout).to(DEVICE)
self.hidden = self.init_hidden()
示例2: __init__
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, args):
super(TextCNN, self).__init__(args)
self.class_num = args.class_num
self.chanel_num = 1
self.filter_num = args.filter_num
self.filter_sizes = args.filter_sizes
self.vocabulary_size = args.vocabulary_size
self.embedding_dimension = args.embedding_dim
self.embedding = nn.Embedding(self.vocabulary_size, self.embedding_dimension).to(DEVICE)
if args.static:
logger.info('logging word vectors from {}'.format(args.vector_path))
vectors = Vectors(args.vector_path).vectors
self.embedding = self.embedding.from_pretrained(vectors, freeze=not args.non_static).to(DEVICE)
if args.multichannel:
self.embedding2 = nn.Embedding(self.vocabulary_size, self.embedding_dimension).from_pretrained(args.vectors).to(DEVICE)
self.chanel_num += 1
else:
self.embedding2 = None
self.convs = nn.ModuleList(
[nn.Conv2d(self.chanel_num, self.filter_num, (size, self.embedding_dimension)) for size in self.filter_sizes]).to(DEVICE)
self.dropout = nn.Dropout(args.dropout).to(DEVICE)
self.fc = nn.Linear(len(self.filter_sizes) * self.filter_num, self.class_num).to(DEVICE)
示例3: test_get_vector_data
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_get_vector_data(self):
vectors_cache_dir = '.cache'
if os.path.exists(vectors_cache_dir):
shutil.rmtree(vectors_cache_dir)
pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
filename = 'fasttext_sample.vec'
file = os.path.join(pathdir, filename)
url_base = urljoin('file:', pathname2url(file))
vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
self.assertIsInstance(vecs, Vectors)
vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
self.assertEqual(len(vec_data), 1)
self.assertEqual(vec_data[0].vectors.size(), torch.Size([100, 300]))
self.assertEqual(vec_data[0].dim, 300)
if os.path.exists(vectors_cache_dir):
shutil.rmtree(vectors_cache_dir)
示例4: test_extend_vocab_1
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_extend_vocab_1(self):
vectors_cache_dir = '.cache'
if os.path.exists(vectors_cache_dir):
shutil.rmtree(vectors_cache_dir)
mf = MatchingField()
lf = MatchingField(id=True, sequential=False)
fields = [('id', lf), ('left_a', mf), ('right_a', mf), ('label', lf)]
col_naming = {'id': 'id', 'label': 'label', 'left': 'left_', 'right': 'right_'}
pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
filename = 'fasttext_sample.vec'
file = os.path.join(pathdir, filename)
url_base = urljoin('file:', pathname2url(file))
vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
data_path = os.path.join(test_dir_path, 'test_datasets', 'sample_table_small.csv')
md = MatchingDataset(fields, col_naming, path=data_path)
mf.build_vocab()
mf.vocab.vectors = torch.Tensor(len(mf.vocab.itos), 300)
mf.extend_vocab(md, vectors=vecs)
self.assertEqual(len(mf.vocab.itos), 6)
self.assertEqual(mf.vocab.vectors.size(), torch.Size([6, 300]))
示例5: test_extend_vectors_1
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_extend_vectors_1(self):
vectors_cache_dir = '.cache'
if os.path.exists(vectors_cache_dir):
shutil.rmtree(vectors_cache_dir)
pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
filename = 'fasttext_sample.vec'
file = os.path.join(pathdir, filename)
url_base = urljoin('file:', pathname2url(file))
vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
self.assertIsInstance(vecs, Vectors)
vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
v = MatchingVocab(Counter())
v.vectors = torch.Tensor(1, vec_data[0].dim)
v.unk_init = torch.Tensor.zero_
tokens = {'hello', 'world'}
v.extend_vectors(tokens, vec_data)
self.assertEqual(len(v.itos), 4)
self.assertEqual(v.vectors.size(), torch.Size([4, 300]))
self.assertEqual(list(v.vectors[2][0:10]), [0.0] * 10)
self.assertEqual(list(v.vectors[3][0:10]), [0.0] * 10)
if os.path.exists(vectors_cache_dir):
shutil.rmtree(vectors_cache_dir)
示例6: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, topic, batch_size=64, shuffle=True, device=0,
vectors=None, unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to directory containing word vectors file
:param topic: topic from which articles should be fetched
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train_path = os.path.join('TREC', 'robust45_aug_train_%s.tsv' % topic)
dev_path = os.path.join('TREC', 'robust45_dev_%s.tsv' % topic)
test_path = os.path.join('TREC', 'core17_10k_%s.tsv' % topic)
train, val, test = cls.splits(path, train=train_path, validation=dev_path, test=test_path)
cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例7: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to directory containing word vectors file
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train, val, test = cls.splits(path)
cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例8: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, topic, batch_size=64, shuffle=True, device=0,
vectors=None, unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to directory containing word vectors file
:param topic: topic from which articles should be fetched
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train_path = os.path.join('TREC', 'robust04_train_%s.tsv' % topic)
dev_path = os.path.join('TREC', 'robust04_dev_%s.tsv' % topic)
test_path = os.path.join('TREC', 'core17_10k_%s.tsv' % topic)
train, val, test = cls.splits(path, train=train_path, validation=dev_path, test=test_path)
cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例9: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None, unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to word vectors file
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train, val, test = cls.splits(path)
cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例10: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_dir: directory containing word vectors file
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param pt_file: load cached embedding file from disk if it is true
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train, validation, test = cls.splits(path)
cls.LABEL_FIELD.build_vocab(train, validation, test)
cls.TEXT_FIELD.build_vocab(train, validation, test, vectors=vectors)
return BucketIterator.splits((train, validation, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例11: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None, unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to word vectors file
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train, test = cls.splits(path)
cls.TEXT_FIELD.build_vocab(train, test, vectors=vectors)
return BucketIterator.splits((train, test), batch_size=batch_size, repeat=False, shuffle=shuffle, device=device)
示例12: iters
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def iters(cls, path, vectors_name, vectors_cache, batch_size=64, shuffle=True, device=0, vectors=None,
unk_init=torch.Tensor.zero_):
"""
:param path: directory containing train, test, dev files
:param vectors_name: name of word vectors file
:param vectors_cache: path to directory containing word vectors file
:param batch_size: batch size
:param device: GPU device
:param vectors: custom vectors - either predefined torchtext vectors or your own custom Vector classes
:param unk_init: function used to generate vector for OOV words
:return:
"""
if vectors is None:
vectors = Vectors(name=vectors_name, cache=vectors_cache, unk_init=unk_init)
train, val, test = cls.splits(path)
cls.TEXT_FIELD.build_vocab(train, val, test, vectors=vectors)
return BucketIterator.splits((train, val, test), batch_size=batch_size, repeat=False, shuffle=shuffle,
sort_within_batch=True, device=device)
示例13: __init__
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def __init__(self, args):
self.RAW = data.RawField()
self.RAW.is_target = False
tokenize = lambda x: list(x)
self.TEXT = data.Field(batch_first=True, tokenize=tokenize)
self.LABEL = data.Field(sequential=False, unk_token=None)
self.train, self.dev, self.test = data.TabularDataset.splits(
path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10',
train='BQ_train.json',
validation='BQ_dev.json',
test='BQ_test.json',
format='json',
fields={"gold_label": ("label", self.LABEL),
"sentence1": ("q1", self.TEXT),
"sentence2": ("q2", self.TEXT),
"ID": ("id", self.RAW)})
self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data))
self.LABEL.build_vocab(self.train)
sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
示例14: test_vocab_download_custom_vectors
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_vocab_download_custom_vectors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
# Build a vocab and get vectors twice to test caching.
for i in range(2):
v = vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
vectors=Vectors('wiki.simple.vec',
url=FastText.url_base.format('simple')))
self.assertEqual(v.itos, ['<unk>', '<pad>', '<bos>',
'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T', 'hello', 'world'])
vectors = v.vectors.numpy()
# The first 5 entries in each vector.
expected_fasttext_simple_en = {
'hello': [0.39567, 0.21454, -0.035389, -0.24299, -0.095645],
'world': [0.10444, -0.10858, 0.27212, 0.13299, -0.33165],
}
for word in expected_fasttext_simple_en:
assert_allclose(vectors[v.stoi[word], :5],
expected_fasttext_simple_en[word])
assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
# Delete the vectors after we're done to save disk space on CI
if os.environ.get("TRAVIS") == "true":
vec_file = os.path.join(self.project_root, ".vector_cache", "wiki.simple.vec")
conditional_remove(vec_file)
示例15: test_errors
# 需要导入模块: from torchtext import vocab [as 别名]
# 或者: from torchtext.vocab import Vectors [as 别名]
def test_errors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
with self.assertRaises(ValueError):
# Test proper error raised when using unknown string alias
vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
vectors=["fasttext.english.300d"])
vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
vectors="fasttext.english.300d")
with self.assertRaises(ValueError):
# Test proper error is raised when vectors argument is
# non-string or non-Vectors
vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
vectors={"word": [1, 2, 3]})