本文整理汇总了Python中utils.load_vocab方法的典型用法代码示例。如果您正苦于以下问题:Python utils.load_vocab方法的具体用法?Python utils.load_vocab怎么用?Python utils.load_vocab使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils
的用法示例。
在下文中一共展示了utils.load_vocab方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import utils [as 别名]
# 或者: from utils import load_vocab [as 别名]
def main(args):
vocab_list = np.array(utils.load_vocab(args.vocab))
vocab_size = len(vocab_list)
config = tf.estimator.RunConfig(model_dir=args.model_dir)
hparams = utils.create_hparams(
args, vocab_size, utils.SOS_ID, utils.EOS_ID)
hparams.decoder.set_hparam('beam_width', args.beam_width)
model = tf.estimator.Estimator(
model_fn=las_model_fn,
config=config,
params=hparams)
predictions = model.predict(
input_fn=lambda: input_fn(
args.data, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=1),
predict_keys='sample_ids')
if args.beam_width > 0:
predictions = [vocab_list[y['sample_ids'][:, 0]].tolist() + [utils.EOS]
for y in predictions]
else:
predictions = [vocab_list[y['sample_ids']].tolist() + [utils.EOS]
for y in predictions]
predictions = [' '.join(y[:y.index(utils.EOS)]) for y in predictions]
with open(args.save, 'w') as f:
f.write('\n'.join(predictions))
示例2: main
# 需要导入模块: import utils [as 别名]
# 或者: from utils import load_vocab [as 别名]
def main(args):
vocab_list = utils.load_vocab(args.vocab)
vocab_size = len(vocab_list)
config = tf.estimator.RunConfig(model_dir=args.model_dir)
hparams = utils.create_hparams(
args, vocab_size, utils.SOS_ID, utils.EOS_ID)
model = tf.estimator.Estimator(
model_fn=las_model_fn,
config=config,
params=hparams)
if args.valid:
train_spec = tf.estimator.TrainSpec(
input_fn=lambda: input_fn(
args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=args.num_epochs))
eval_spec = tf.estimator.EvalSpec(
input_fn=lambda: input_fn(
args.valid or args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size),
start_delay_secs=60,
throttle_secs=args.eval_secs)
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
else:
model.train(
input_fn=lambda: input_fn(
args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=args.num_epochs))
示例3: create_index
# 需要导入模块: import utils [as 别名]
# 或者: from utils import load_vocab [as 别名]
def create_index(self, index_folder, docs_path, add_terms=False):
print 'Loading Vocab...'
if not self.vocab:
self.vocab = utils.load_vocab(prm.vocab_path, prm.n_words)
os.mkdir(index_folder)
self.t1 = FieldType()
self.t1.setStored(True)
self.t1.setIndexOptions(IndexOptions.DOCS)
self.t2 = FieldType()
self.t2.setStored(False)
self.t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
self.t3 = FieldType()
self.t3.setStored(True)
self.t3.setIndexOptions(IndexOptions.NONE)
fsDir = MMapDirectory(Paths.get(index_folder))
writerConfig = IndexWriterConfig(StandardAnalyzer())
self.writer = IndexWriter(fsDir, writerConfig)
print "%d docs in index" % self.writer.numDocs()
print "Indexing documents..."
doc_id = 0
import corpus_hdf5
corpus = corpus_hdf5.CorpusHDF5(docs_path)
for txt in corpus.get_text_iter():
title = corpus.get_article_title(doc_id)
self.add_doc(doc_id, title, txt, add_terms)
if doc_id % 1000 == 0:
print 'indexing doc', doc_id
doc_id += 1
print "Index of %d docs..." % self.writer.numDocs()
self.writer.close()
示例4: train
# 需要导入模块: import utils [as 别名]
# 或者: from utils import load_vocab [as 别名]
def train(**kwargs):
config = Config()
config.update(**kwargs)
print('当前设置为:\n', config)
if config.use_cuda:
torch.cuda.set_device(config.gpu)
print('loading corpus')
vocab = load_vocab(config.vocab)
label_dic = load_vocab(config.label_file)
tagset_size = len(label_dic)
train_data = read_corpus(config.train_file, max_length=config.max_length, label_dic=label_dic, vocab=vocab)
dev_data = read_corpus(config.dev_file, max_length=config.max_length, label_dic=label_dic, vocab=vocab)
train_ids = torch.LongTensor([temp.input_id for temp in train_data])
train_masks = torch.LongTensor([temp.input_mask for temp in train_data])
train_tags = torch.LongTensor([temp.label_id for temp in train_data])
train_dataset = TensorDataset(train_ids, train_masks, train_tags)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=config.batch_size)
dev_ids = torch.LongTensor([temp.input_id for temp in dev_data])
dev_masks = torch.LongTensor([temp.input_mask for temp in dev_data])
dev_tags = torch.LongTensor([temp.label_id for temp in dev_data])
dev_dataset = TensorDataset(dev_ids, dev_masks, dev_tags)
dev_loader = DataLoader(dev_dataset, shuffle=True, batch_size=config.batch_size)
model = BERT_LSTM_CRF(config.bert_path, tagset_size, config.bert_embedding, config.rnn_hidden, config.rnn_layer, dropout_ratio=config.dropout_ratio, dropout1=config.dropout1, use_cuda=config.use_cuda)
if config.load_model:
assert config.load_path is not None
model = load_model(model, name=config.load_path)
if config.use_cuda:
model.cuda()
model.train()
optimizer = getattr(optim, config.optim)
optimizer = optimizer(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
eval_loss = 10000
for epoch in range(config.base_epoch):
step = 0
for i, batch in enumerate(train_loader):
step += 1
model.zero_grad()
inputs, masks, tags = batch
inputs, masks, tags = Variable(inputs), Variable(masks), Variable(tags)
if config.use_cuda:
inputs, masks, tags = inputs.cuda(), masks.cuda(), tags.cuda()
feats = model(inputs, masks)
loss = model.loss(feats, masks,tags)
loss.backward()
optimizer.step()
if step % 50 == 0:
print('step: {} | epoch: {}| loss: {}'.format(step, epoch, loss.item()))
loss_temp = dev(model, dev_loader, epoch, config)
if loss_temp < eval_loss:
save_model(model,epoch)