本文整理汇总了Python中data_load.load_vocab方法的典型用法代码示例。如果您正苦于以下问题:Python data_load.load_vocab方法的具体用法?Python data_load.load_vocab怎么用?Python data_load.load_vocab使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类data_load
的用法示例。
在下文中一共展示了data_load.load_vocab方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def __init__(self, hp):
self.hp = hp
self.token2idx, self.idx2token = load_vocab(hp.vocab)
self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
示例2: create_train_data
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def create_train_data():
from data_load import load_vocab
roma2idx, idx2roma, surf2idx, idx2surf = load_vocab()
romaji_sents, surface_sents = [], []
for line in codecs.open('preprocessed/ja.tsv', 'r', 'utf-8'):
try:
idx, romaji_sent, surface_sent = line.strip().split("\t")
except ValueError:
continue
if len(romaji_sent) < hp.max_len:
romaji_sents.append(np.array([roma2idx.get(roma, 1) for roma in romaji_sent+"S"], np.int32).tostring())
surface_sents.append(np.array([surf2idx.get(surf, 1) for surf in surface_sent+"S"], np.int32).tostring())
pickle.dump((romaji_sents, surface_sents), open('preprocessed/train.pkl', 'wb'), protocol=2)
示例3: create_qa_context
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def create_qa_context(model_path: str, word_to_ix_path: str,
embed_dim: int, hidden_dim: int, device) -> QAContext:
word_dict = load_vocab(word_to_ix_path)
vocab_size = len(word_dict)
model = TraForEncoder(vocab_size, embed_dim, hidden_dim)
if not torch.cuda.is_available():
model.load_state_dict(torch.load(model_path, map_location='cpu'))
else:
model.load_state_dict(torch.load(model_path))
return QAContext(model, word_dict, device)
示例4: eval
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def eval(logdir):
# Load graph
model = Net1()
# dataflow
df = Net1DataFlow(hp.test1.data_path, hp.test1.batch_size)
ckpt = tf.train.latest_checkpoint(logdir)
pred_conf = PredictConfig(
model=model,
input_names=get_eval_input_names(),
output_names=get_eval_output_names())
if ckpt:
pred_conf.session_init = SaverRestore(ckpt)
predictor = OfflinePredictor(pred_conf)
x_mfccs, y_ppgs = next(df().get_data())
y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs)
# plot confusion matrix
_, idx2phn = load_vocab()
y_ppg_1d = [idx2phn[i] for i in y_ppg_1d]
pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d]
summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns)
writer = tf.summary.FileWriter(logdir)
writer.add_summary(summ_loss)
writer.add_summary(summ_acc)
writer.add_summary(summ_cm)
writer.close()
示例5: __init__
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def __init__(self):
# Load vocabulary
self.char2idx, self.idx2char = load_vocab()
self.L = tf.placeholder(tf.int32, shape=(None, None))
self.mels = tf.placeholder(tf.float32, shape=(None, None, n_mels))
self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None,))
# network 1
with tf.variable_scope("Text2Mel"):
# Get S or decoder inputs. (B, T//r, n_mels)
self.S = tf.concat((tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)
# Networks
with tf.variable_scope("TextEnc"):
self.K, self.V = TextEnc(self.L) # (N, Tx, e)
with tf.variable_scope("AudioEnc"):
self.Q = AudioEnc(self.S)
with tf.variable_scope("Attention"):
# R: (B, T/r, 2d)
# alignments: (B, N, T/r)
# max_attentions: (B,)
self.R, self.alignments, self.max_attentions = Attention(self.Q, self.K, self.V,
mononotic_attention=True,
prev_max_attentions=self.prev_max_attentions)
with tf.variable_scope("AudioDec"):
self.Y_logits, self.Y = AudioDec(self.R) # (B, T/r, n_mels)
# network 2
# During inference, the predicted melspectrogram values are fed.
with tf.variable_scope("SSRN"):
self.Z_logits, self.Z = SSRN(self.Y)
with tf.variable_scope("gs"):
self.global_step = tf.Variable(0, name='global_step', trainable=False)
示例6: eval
# 需要导入模块: import data_load [as 别名]
# 或者: from data_load import load_vocab [as 别名]
def eval():
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X, Y = load_data(mode="test") # texts
char2idx, idx2char = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
# Inference
if not os.path.exists(hp.savedir): os.mkdir(hp.savedir)
with open("{}/{}".format(hp.savedir, mname), 'w') as fout:
results = []
baseline_results = []
for step in range(len(X) // hp.batch_size):
x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]
# predict characters
preds = sess.run(g.preds, {g.x: x})
for xx, yy, pp in zip(x, y, preds): # sentence-wise
expected = ''
got = ''
for xxx, yyy, ppp in zip(xx, yy, pp): # character-wise
if xxx == 0:
break
else:
got += idx2char.get(xxx, "*")
expected += idx2char.get(xxx, "*")
if ppp == 1: got += " "
if yyy == 1: expected += " "
# prediction results
if ppp == yyy:
results.append(1)
else:
results.append(0)
# baseline results
if yyy == 0: # no space
baseline_results.append(1)
else:
baseline_results.append(0)
fout.write("▌Expected: " + expected + "\n")
fout.write("▌Got: " + got + "\n\n")
fout.write(
"Final Accuracy = %d/%d=%.4f\n" % (sum(results), len(results), float(sum(results)) / len(results)))
fout.write(
"Baseline Accuracy = %d/%d=%.4f" % (sum(baseline_results), len(baseline_results), float(sum(baseline_results)) / len(baseline_results)))