本文整理汇总了Python中util.vocabulary.Vocabulary类的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary类的具体用法?Python Vocabulary怎么用?Python Vocabulary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Vocabulary类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
def train(args):
trace('loading corpus ...')
with open(args.source) as fp:
trees = [make_tree(l) for l in fp]
trace('extracting leaf nodes ...')
word_lists = [extract_words(t) for t in trees]
trace('extracting gold operations ...')
op_lists = [make_operations(t) for t in trees]
trace('making vocabulary ...')
word_vocab = Vocabulary.new(word_lists, args.vocab)
phrase_set = set()
semi_set = set()
for tree in trees:
phrase_set |= set(extract_phrase_labels(tree))
semi_set |= set(extract_semi_labels(tree))
phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False)
trace('converting data ...')
word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists]
trace('start training ...')
parser = Parser(
args.vocab, args.embed, args.queue, args.stack,
len(phrase_set), len(semi_set),
)
if USE_GPU:
parser.to_gpu()
opt = optimizers.AdaGrad(lr = 0.005)
opt.setup(parser)
opt.add_hook(optimizer.GradientClipping(5))
for epoch in range(args.epoch):
n = 0
for samples in batch(zip(word_lists, op_lists), args.minibatch):
parser.zerograds()
loss = my_zeros((), np.float32)
for word_list, op_list in zip(*samples):
trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
loss += parser.forward(word_list, op_list, 0)
n += 1
loss.backward()
opt.update()
trace('saving model ...')
prefix = args.model + '.%03.d' % (epoch + 1)
word_vocab.save(prefix + '.words')
phrase_vocab.save(prefix + '.phrases')
semi_vocab.save(prefix + '.semiterminals')
parser.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', parser)
trace('finished.')
示例2: load
def load(filename):
self = AttentionalTranslationModel()
with ModelFile(filename) as fp:
self.__src_vocab = Vocabulary.load(fp.get_file_pointer())
self.__trg_vocab = Vocabulary.load(fp.get_file_pointer())
self.__n_embed = int(fp.read())
self.__n_hidden = int(fp.read())
self.__make_model()
wrapper.begin_model_access(self.__model)
fp.read_embed(self.__model.w_xi)
fp.read_linear(self.__model.w_ia)
fp.read_linear(self.__model.w_aa)
fp.read_linear(self.__model.w_ib)
fp.read_linear(self.__model.w_bb)
fp.read_linear(self.__model.w_aw)
fp.read_linear(self.__model.w_bw)
fp.read_linear(self.__model.w_pw)
fp.read_linear(self.__model.w_we)
fp.read_linear(self.__model.w_ap)
fp.read_linear(self.__model.w_bp)
fp.read_embed(self.__model.w_yp)
fp.read_linear(self.__model.w_pp)
fp.read_linear(self.__model.w_cp)
fp.read_linear(self.__model.w_dp)
fp.read_linear(self.__model.w_py)
wrapper.end_model_access(self.__model)
return self
示例3: test
def test(self):
trace('loading model ...')
src_vocab = Vocabulary.load(self.model + '.srcvocab')
trg_vocab = Vocabulary.load(self.model + '.trgvocab')
encdec = EncoderDecoder.load_spec(self.model + '.spec')
serializers.load_hdf5(self.model + '.weights', encdec)
trace('generating translation ...')
generated = 0
with open(self.target, 'w') as fp:
for src_batch in gens.batch(gens.word_list(self.source), self.minibatch):
src_batch = fill_batch(src_batch)
K = len(src_batch)
trace('sample %8d - %8d ...' % (generated + 1, generated + K))
hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit)
source_cuont = 0
for hyp in hyp_batch:
hyp.append('</s>')
hyp = hyp[:hyp.index('</s>')]
print("src : " + "".join(src_batch[source_cuont]).replace("</s>", ""))
print('hyp : ' +''.join(hyp))
print(' '.join(hyp), file=fp)
source_cuont = source_cuont + 1
generated += K
trace('finished.')
示例4: train_model
def train_model(self):
trace('making vocaburaries ...')
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace('making model ...')
model = self.new(src_vocab, trg_vocab, self.embed, self.hidden, self.parameter_dict)
random_number = random.randint(0, self.minibatch)
for i_epoch in range(self.epoch):
trace('epoch %d/%d: ' % (i_epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
model.init_optimizer()
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch = model.train(src_batch, trg_batch)
if trained == 0:
self.print_out(random_number, i_epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace('saving model ...')
model.save("ChainerMachineTranslation" + '.%03d' % (self.epoch + 1))
trace('finished.')
开发者ID:tksugimoto,项目名称:Chainer_Machine_Translation_ipython_notebook,代码行数:32,代码来源:EncoderDecoderModel.py
示例5: test
def test(args):
trace('loading model ...')
word_vocab = Vocabulary.load(args.model + '.words')
phrase_vocab = Vocabulary.load(args.model + '.phrases')
semiterminal_vocab = Vocabulary.load(args.model + '.semiterminals')
parser = Parser.load_spec(args.model + '.spec')
if args.use_gpu:
parser.to_gpu()
serializers.load_hdf5(args.model + '.weights', parser)
embed_cache = {}
parser.reset()
trace('generating parse trees ...')
with open(args.source) as fp:
for l in fp:
word_list = to_vram_words(convert_word_list(l.split(), word_vocab))
tree = combine_xbar(
restore_labels(
parser.forward(word_list, None, args.unary_limit, embed_cache),
phrase_vocab,
semiterminal_vocab))
print('( ' + tree_to_string(tree) + ' )')
trace('finished.')
示例6: train_model
def train_model(args):
trace('making vocaburaries ...')
src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
trace('making model ...')
model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden)
for epoch in range(args.epoch):
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
gen1 = gens.word_list(args.source)
gen2 = gens.word_list(args.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
model.init_optimizer()
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch = model.train(src_batch, trg_batch)
for k in range(K):
trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
trained += K
trace('saving model ...')
model.save(args.model + '.%03d' % (epoch + 1))
trace('finished.')
示例7: test
def test(args):
trace('loading model ...')
src_vocab = Vocabulary.load(args.model + '.srcvocab')
trg_vocab = Vocabulary.load(args.model + '.trgvocab')
attmt = AttentionMT.load_spec(args.model + '.spec')
if args.use_gpu:
attmt.to_gpu()
serializers.load_hdf5(args.model + '.weights', attmt)
trace('generating translation ...')
generated = 0
with open(args.target, 'w') as fp:
for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
src_batch = fill_batch(src_batch)
K = len(src_batch)
trace('sample %8d - %8d ...' % (generated + 1, generated + K))
hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, attmt, False, args.generation_limit)
for hyp in hyp_batch:
hyp.append('</s>')
hyp = hyp[:hyp.index('</s>')]
print(' '.join(hyp), file=fp)
generated += K
trace('finished.')
示例8: __predict_sentence
def __predict_sentence(self, src_batch):
dialogue = EncoderDecoderModelForwardSlack(self.parameter)
src_vocab = Vocabulary.load(self.model_name + '.srcvocab')
trg_vocab = Vocabulary.load(self.model_name + '.trgvocab')
model = EncoderDecoder.load_spec(self.model_name + '.spec')
serializers.load_hdf5(dialogue.model + '.weights', model)
hyp_batch = dialogue.forward(src_batch, None, src_vocab, trg_vocab, model, False, self.generation_limit)
return hyp_batch
示例9: __init__
def __init__(self, args):
trace('loading model ...')
self.args = args
self.src_vocab = Vocabulary.load(args.model + '.srcvocab')
self.trg_vocab = Vocabulary.load(args.model + '.trgvocab')
self.encdec = EncoderDecoder.load_spec(args.model + '.spec')
if args.use_gpu:
self.encdec.to_gpu()
serializers.load_hdf5(args.model + '.weights', self.encdec)
trace('generating translation ...')
示例10: __predict_sentence
def __predict_sentence(self, src_batch):
"""
predict sentence
:param src_batch: get the source sentence
:return:
"""
dialogue = EncoderDecoderModelAttention(self.parameter)
src_vocab = Vocabulary.load(self.model_name + '.srcvocab')
trg_vocab = Vocabulary.load(self.model_name + '.trgvocab')
model = AttentionDialogue.load_spec(self.model_name + '.spec', self.XP)
serializers.load_hdf5(self.model_name + '.weights', model)
hyp_batch = dialogue.forward_implement(src_batch, None, src_vocab, trg_vocab, model, False, self.generation_limit)
return hyp_batch
示例11: train
def train(self):
"""
Train method
If you use the word2vec model, you possible to use the copy weight
Optimizer method use the Adagrad
"""
trace("making vocabularies ...")
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace("making model ...")
self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP)
if self.word2vecFlag:
self.copy_model(self.word2vec, self.attention_dialogue.emb)
self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True)
for epoch in range(self.epoch):
trace("epoch %d/%d: " % (epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
opt = optimizers.AdaGrad(lr=0.01)
opt.setup(self.attention_dialogue)
opt.add_hook(optimizer.GradientClipping(5))
random_number = random.randint(0, self.minibatch - 1)
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = self.forward_implement(
src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0
)
loss.backward()
opt.update()
self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace("saving model ...")
prefix = self.model
model_path = APP_ROOT + "/model/" + prefix
src_vocab.save(model_path + ".srcvocab")
trg_vocab.save(model_path + ".trgvocab")
self.attention_dialogue.save_spec(model_path + ".spec")
serializers.save_hdf5(model_path + ".weights", self.attention_dialogue)
trace("finished.")
示例12: train
def train(self):
trace('making vocabularies ...')
src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)
trace('making model ...')
encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
if self.word2vecFlag:
self.copy_model(self.word2vec, encdec.enc)
self.copy_model(self.word2vec, encdec.dec, dec_flag=True)
else:
encdec = self.encdec
for epoch in range(self.epoch):
trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
trained = 0
gen1 = gens.word_list(self.source)
gen2 = gens.word_list(self.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
opt = optimizers.AdaGrad(lr = 0.01)
opt.setup(encdec)
opt.add_hook(optimizer.GradientClipping(5))
random_number = random.randint(0, self.minibatch - 1)
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
loss.backward()
opt.update()
if trained == 0:
self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)
trained += K
trace('saving model ...')
prefix = self.model
src_vocab.save(prefix + '.srcvocab')
trg_vocab.save(prefix + '.trgvocab')
encdec.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', encdec)
trace('finished.')
示例13: load
def load(self, filename):
with ModelFile(filename) as fp:
self.src_vocab = Vocabulary.load(fp.get_file_pointer())
self.trg_vocab = Vocabulary.load(fp.get_file_pointer())
self.n_embed = int(fp.read())
self.n_hidden = int(fp.read())
self.make_model()
wrapper.begin_model_access(self.model)
fp.read_embed(self.model.weight_xi)
fp.read_linear(self.model.weight_ip)
fp.read_linear(self.model.weight_pp)
fp.read_linear(self.model.weight_pq)
fp.read_linear(self.model.weight_qj)
fp.read_linear(self.model.weight_jy)
fp.read_embed(self.model.weight_yq)
fp.read_linear(self.model.weight_qq)
wrapper.end_model_access(self.model)
return self
开发者ID:tksugimoto,项目名称:Chainer_Machine_Translation_ipython_notebook,代码行数:18,代码来源:EncoderDecoderModel.py
示例14: train
def train(args):
trace('making vocabularies ...')
src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
trace('making model ...')
attmt = AttentionMT(args.vocab, args.embed, args.hidden)
if args.use_gpu:
attmt.to_gpu()
for epoch in range(args.epoch):
trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
trained = 0
gen1 = gens.word_list(args.source)
gen2 = gens.word_list(args.target)
gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
opt = optimizers.AdaGrad(lr = 0.01)
opt.setup(attmt)
opt.add_hook(optimizer.GradientClipping(5))
for src_batch, trg_batch in gen3:
src_batch = fill_batch(src_batch)
trg_batch = fill_batch(trg_batch)
K = len(src_batch)
hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
loss.backward()
opt.update()
for k in range(K):
trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
trace(' src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
trained += K
trace('saving model ...')
prefix = args.model + '.%03.d' % (epoch + 1)
src_vocab.save(prefix + '.srcvocab')
trg_vocab.save(prefix + '.trgvocab')
attmt.save_spec(prefix + '.spec')
serializers.save_hdf5(prefix + '.weights', attmt)
trace('finished.')
示例15: load
def load(filename):
self = EncoderDecoderModel()
with ModelFile(filename) as fp:
self.__src_vocab = Vocabulary.load(fp.get_file_pointer())
self.__trg_vocab = Vocabulary.load(fp.get_file_pointer())
self.__n_embed = int(fp.read())
self.__n_hidden = int(fp.read())
self.__make_model()
wrapper.begin_model_access(self.__model)
fp.read_embed(self.__model.w_xi)
fp.read_linear(self.__model.w_ip)
fp.read_linear(self.__model.w_pp)
fp.read_linear(self.__model.w_pq)
fp.read_linear(self.__model.w_qj)
fp.read_linear(self.__model.w_jy)
fp.read_embed(self.__model.w_yq)
fp.read_linear(self.__model.w_qq)
wrapper.end_model_access(self.__model)
return self