本文整理汇总了Python中util.vocabulary.Vocabulary.new方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.new方法的具体用法?Python Vocabulary.new怎么用?Python Vocabulary.new使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类util.vocabulary.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.new方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train(args):
trace('loading corpus ...')
with open(args.source) as fp:
trees = [make_tree(l) for l in fp]
trace('extracting leaf nodes ...')
word_lists = [extract_words(t) for t in trees]
trace('extracting gold operations ...')
op_lists = [make_operations(t) for t in trees]
trace('making vocabulary ...')
word_vocab = Vocabulary.new(word_lists, args.vocab)
phrase_set = set()
semi_set = set()
for tree in trees:
phrase_set |= set(extract_phrase_labels(tree))
semi_set |= set(extract_semi_labels(tree))
phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False)
trace('converting data ...')
word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists]
trace('start training ...')
parser = Parser(
args.vocab, args.embed, args.queue, args.stack,
len(phrase_set), len(semi_set),
)
if USE_GPU:
parser.to_gpu()
opt = optimizers.AdaGrad(lr = 0.005)
opt.setup(parser)
opt.add_hook(optimizer.GradientClipping(5))
for epoch in range(args.epoch):
n = 0
for samples in batch(zip(word_lists, op_lists), args.minibatch):
parser.zerograds()
loss = my_zeros((), np.float32)
for word_list, op_list in zip(*samples):
trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
loss += parser.forward(word_list, op_list, 0)
n += 1
loss.backward()
opt.update()
trace('saving model ...')
prefix = args.model + '.%03.d' % (epoch + 1)
word_vocab.save(prefix + '.words')
phrase_vocab.save(prefix + '.phrases')
semi_vocab.save(prefix + '.semiterminals')
parser.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', parser)
trace('finished.')
示例2: train_model
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train_model(args):
trace('making vocaburaries ...')
src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
trace('making model ...')
model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden)
for epoch in range(args.epoch):
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
gen1 = gens.word_list(args.source)
gen2 = gens.word_list(args.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
model.init_optimizer()
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch = model.train(src_batch, trg_batch)
for k in range(K):
trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
trained += K
trace('saving model ...')
model.save(args.model + '.%03d' % (epoch + 1))
trace('finished.')
示例3: train_model
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train_model(self):
trace('making vocaburaries ...')
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace('making model ...')
model = self.new(src_vocab, trg_vocab, self.embed, self.hidden, self.parameter_dict)
random_number = random.randint(0, self.minibatch)
for i_epoch in range(self.epoch):
trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
model.init_optimizer()
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch = model.train(src_batch, trg_batch)
if trained == 0:
self.print_out(random_number, i_epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace('saving model ...')
model.save("ChainerMachineTranslation" + '.%03d' % (self.epoch + 1))
trace('finished.')
开发者ID:tksugimoto,项目名称:Chainer_Machine_Translation_ipython_notebook,代码行数:34,代码来源:EncoderDecoderModel.py
示例4: train
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train(self):
"""
Train method
If you use the word2vec model, you possible to use the copy weight
Optimizer method use the Adagrad
"""
trace("making vocabularies ...")
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace("making model ...")
self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP)
if self.word2vecFlag:
self.copy_model(self.word2vec, self.attention_dialogue.emb)
self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True)
for epoch in range(self.epoch):
trace("epoch %d/%d: " % (epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
opt = optimizers.AdaGrad(lr=0.01)
opt.setup(self.attention_dialogue)
opt.add_hook(optimizer.GradientClipping(5))
random_number = random.randint(0, self.minibatch - 1)
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = self.forward_implement(
src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0
)
loss.backward()
opt.update()
self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace("saving model ...")
prefix = self.model
model_path = APP_ROOT + "/model/" + prefix
src_vocab.save(model_path + ".srcvocab")
trg_vocab.save(model_path + ".trgvocab")
self.attention_dialogue.save_spec(model_path + ".spec")
serializers.save_hdf5(model_path + ".weights", self.attention_dialogue)
trace("finished.")
示例5: train
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train(self):
trace('making vocabularies ...')
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace('making model ...')
encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
if self.word2vecFlag:
self.copy_model(self.word2vec, encdec.enc)
self.copy_model(self.word2vec, encdec.dec, dec_flag=True)
else:
encdec = self.encdec
for epoch in range(self.epoch):
trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
opt = optimizers.AdaGrad(lr = 0.01)
opt.setup(encdec)
opt.add_hook(optimizer.GradientClipping(5))
random_number = random.randint(0, self.minibatch - 1)
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
loss.backward()
opt.update()
if trained == 0:
self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace('saving model ...')
prefix = self.model
src_vocab.save(prefix + '.srcvocab')
trg_vocab.save(prefix + '.trgvocab')
encdec.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', encdec)
trace('finished.')
示例6: train
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train(args):
trace('making vocabularies ...')
src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
trace('making model ...')
attmt = AttentionMT(args.vocab, args.embed, args.hidden)
if args.use_gpu:
attmt.to_gpu()
for epoch in range(args.epoch):
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
gen1 = gens.word_list(args.source)
gen2 = gens.word_list(args.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
opt = optimizers.AdaGrad(lr = 0.01)
opt.setup(attmt)
opt.add_hook(optimizer.GradientClipping(5))
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
loss.backward()
opt.update()
for k in range(K):
trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
trained += K
trace('saving model ...')
prefix = args.model + '.%03.d' % (epoch + 1)
src_vocab.save(prefix + '.srcvocab')
trg_vocab.save(prefix + '.trgvocab')
attmt.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', attmt)
trace('finished.')
示例7: train_model
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train_model(args):
train_begin = time.time()
trace('making vocaburaries ...')
vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)
trace('begin training ...')
model = TransSegmentationModel.new(vocab, args.context, args.hidden, args.labels, args.eta)
for epoch in range(args.epoch):
epoch_beg = time.time()
trace('START epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
total_loss = 0
model.init_optimizer()
with open(args.corpus) as fp:
for text in fp:
word_list = text.split()
if not word_list:
continue
text = ' '.join(word_list)
letters = ''.join(word_list)
labels, accum_loss_f = model.train(text)
total_loss += accum_loss_f
trained += 1
hyp = make_hyp(letters, labels)
"""for 1sentence output
trace("accum_loss : %lf"% (accum_loss_f))
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trace('trained %d: '% trained)
trace(text)
trace(hyp)
"""
"""
if trained % 100 == 0:
trace(' %8d' % trained)
"""
trace('FINISHED epoch %d/%d: ' % (epoch + 1, args.epoch))
trace('total_loss : %lf'%total_loss)
trace('saving model ...')
model.save(args.model + '.%03d' % (epoch + 1))
epoch_time = time.time() - epoch_beg
trace('elapsed_time/1epoch : %lf'%epoch_time)
trace('finished.')
elapsed_time = time.time() - train_begin
trace('train_time : %lf'%elapsed_time)
trace('')
示例8: train_model
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train_model(args):
trace('making vocabularies ...')
vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)
trace('start training ...')
model = RNNSegmentationModel.new(vocab, args.embed, args.hidden)
for epoch in range(args.epoch):
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
model.init_optimizer()
with open(args.corpus) as fp:
for text in fp:
word_list = text.split()
if not word_list:
continue
text = ' '.join(word_list)
letters = ''.join(word_list)
scores = model.train(text)
trained += 1
hyp = make_hyp(letters, scores)
trace(trained)
trace(text)
trace(hyp)
trace(' '.join('%+.1f' % x for x in scores))
if trained % 100 == 0:
trace(' %8d' % trained)
trace('saveing model ...')
model.save(args.model + '.%03d' % (epoch + 1))
trace('finished.')
示例9: train_model
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train_model(args):
trace("making vocabularies ...")
vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)
trace("start training ...")
model = SegmentationModel.new(vocab, args.context, args.hidden)
for epoch in range(args.epoch):
trace("epoch %d/%d: " % (epoch + 1, args.epoch))
trained = 0
model.init_optimizer()
with open(args.corpus) as fp:
for text in fp:
word_list = text.split()
if not word_list:
continue
text = " ".join(word_list)
letters = "".join(word_list)
scores = model.train(text)
trained += 1
hyp = make_hyp(letters, scores)
trace(trained)
trace(text)
trace(hyp)
trace(" ".join("%+.1f" % x for x in scores))
if trained % 100 == 0:
trace(" %8d" % trained)
trace("saveing model ...")
model.save(args.model + ".%03d" % (epoch + 1))
trace("finished.")
示例10: train
# 需要导入模块: from util.vocabulary import Vocabulary [as 别名]
# 或者: from util.vocabulary.Vocabulary import new [as 别名]
def train(args):
trace('loading corpus ...')
with open(args.source) as fp:
trees = [make_tree(l) for l in fp]
trace('extracting leaf nodes ...')
word_lists = [extract_words(t) for t in trees]
lower_lists = [[w.lower() for w in words] for words in word_lists]
trace('extracting gold operations ...')
op_lists = [make_operations(t) for t in trees]
trace('making vocabulary ...')
word_vocab = Vocabulary.new(lower_lists, args.vocab)
phrase_set = set()
semiterminal_set = set()
for tree in trees:
phrase_set |= set(extract_phrase_labels(tree))
semiterminal_set |= set(extract_semiterminals(tree))
phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False)
trace('converting data ...')
word_lists = [to_vram_words(convert_word_list(x, word_vocab)) for x in word_lists]
op_lists = [to_vram_ops(convert_op_list(x, phrase_vocab, semiterminal_vocab)) for x in op_lists]
trace('start training ...')
parser = Parser(
args.vocab, args.embed, args.char_embed, args.queue,
args.stack, args.srstate, len(phrase_set), len(semiterminal_set),
)
if args.use_gpu:
parser.to_gpu()
opt = optimizers.SGD(lr = 0.1)
opt.setup(parser)
opt.add_hook(optimizer.GradientClipping(10))
opt.add_hook(optimizer.WeightDecay(0.0001))
batch_set = list(zip(word_lists, op_lists))
for epoch in range(args.epoch):
n = 0
random.shuffle(batch_set)
for samples in batch(batch_set, args.minibatch):
parser.zerograds()
loss = XP.fzeros(())
embed_cache = {}
for word_list, op_list in zip(*samples):
trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
loss += parser.forward(word_list, op_list, 0, embed_cache)
n += 1
loss.backward()
opt.update()
trace('saving model ...')
prefix = args.model + '.%03.d' % (epoch + 1)
word_vocab.save(prefix + '.words')
phrase_vocab.save(prefix + '.phrases')
semiterminal_vocab.save(prefix + '.semiterminals')
parser.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', parser)
opt.lr *= 0.92
trace('finished.')