本文整理汇总了Python中tflearn.data_utils.pad_sequences函数的典型用法代码示例。如果您正苦于以下问题:Python pad_sequences函数的具体用法?Python pad_sequences怎么用?Python pad_sequences使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了pad_sequences函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: do_rnn
def do_rnn(trainX, testX, trainY, testY):
global n_words
# Data preprocessing
# Sequence padding
print "GET n_words embedding %d" % n_words
trainX = pad_sequences(trainX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, MAX_DOCUMENT_LENGTH])
net = tflearn.embedding(net, input_dim=n_words, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=3)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=32,run_id="maidou")
示例2: do_rnn
def do_rnn(trainX, testX, trainY, testY):
max_document_length=64
y_test=testY
trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, max_document_length])
net = tflearn.embedding(net, input_dim=10240000, output_dim=64)
net = tflearn.lstm(net, 64, dropout=0.1)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0,tensorboard_dir="dga_log")
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=10,run_id="dga",n_epoch=1)
y_predict_list = model.predict(testX)
#print y_predict_list
y_predict = []
for i in y_predict_list:
print i[0]
if i[0] > 0.5:
y_predict.append(0)
else:
y_predict.append(1)
print(classification_report(y_test, y_predict))
print metrics.confusion_matrix(y_test, y_predict)
示例3: pad_sentences_qr
def pad_sentences_qr(query, response, q_max_len, r_max_len, index):
train_query = pad_sequences(query, maxlen=q_max_len, value=index)
train_response = pad_sequences(response, maxlen=r_max_len, value=index)
train_query = np.array(train_query)
train_response = np.array(train_response)
train_query_response = np.append(train_query, train_response, axis=1)
return train_query, train_query_response, train_response, q_max_len, r_max_len, index+1
示例4: do_cnn
def do_cnn(trainX, trainY,testX, testY):
global n_words
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
testX = pad_sequences(testX, maxlen=MAX_DOCUMENT_LENGTH, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Building convolutional network
network = input_data(shape=[None, MAX_DOCUMENT_LENGTH], name='input')
network = tflearn.embedding(network, input_dim=n_words+1, output_dim=128)
branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
network = merge([branch1, branch2, branch3], mode='concat', axis=1)
network = tf.expand_dims(network, 2)
network = global_max_pool(network)
network = dropout(network, 0.5)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy', name='target')
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(trainX, trainY, n_epoch = 20, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
示例5: do_rnn
def do_rnn(x,y):
global max_document_length
print "RNN"
trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0)
y_test=testY
trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, max_document_length])
net = tflearn.embedding(net, input_dim=10240000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=0.1, show_metric=True,
batch_size=10,run_id="webshell",n_epoch=5)
y_predict_list=model.predict(testX)
y_predict=[]
for i in y_predict_list:
if i[0] > 0.5:
y_predict.append(0)
else:
y_predict.append(1)
do_metrics(y_test, y_predict)
示例6: pad_SentencesQR
def pad_SentencesQR(query, response):
q_max_len, r_max_len, query_word, response_word, index = fenci(query, response)
print("query max length:{}, response max length:{}".format(q_max_len, r_max_len))
train_query = pad_sequences(query_word, maxlen=q_max_len, value=index)
train_response = pad_sequences(response_word, maxlen=r_max_len, value=index)
# print train_query[0]
# print train_response[0]
train_query = np.array(train_query)
train_response = np.array(train_response)
train_query_response = np.append(train_query, train_response, axis=1)
return train_query, train_query_response, train_response, q_max_len, r_max_len, index
示例7: do_cnn
def do_cnn(x,y):
global max_document_length
print "CNN and tf"
trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0)
y_test=testY
trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Building convolutional network
network = input_data(shape=[None,max_document_length], name='input')
network = tflearn.embedding(network, input_dim=1000000, output_dim=128)
branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
network = merge([branch1, branch2, branch3], mode='concat', axis=1)
network = tf.expand_dims(network, 2)
network = global_max_pool(network)
network = dropout(network, 0.8)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy', name='target')
model = tflearn.DNN(network, tensorboard_verbose=0)
#if not os.path.exists(pkl_file):
# Training
model.fit(trainX, trainY,
n_epoch=5, shuffle=True, validation_set=0.1,
show_metric=True, batch_size=100,run_id="webshell")
# model.save(pkl_file)
#else:
# model.load(pkl_file)
y_predict_list=model.predict(testX)
#y_predict = list(model.predict(testX,as_iterable=True))
y_predict=[]
for i in y_predict_list:
print i[0]
if i[0] > 0.5:
y_predict.append(0)
else:
y_predict.append(1)
print 'y_predict_list:'
print y_predict_list
print 'y_predict:'
print y_predict
#print y_test
do_metrics(y_test, y_predict)
示例8: do_rnn
def do_rnn(trainX, testX, trainY, testY):
global max_sequences_len
global max_sys_call
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=max_sequences_len, value=0.)
testX = pad_sequences(testX, maxlen=max_sequences_len, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY_old=testY
testY = to_categorical(testY, nb_classes=2)
# Network building
print "GET max_sequences_len embedding %d" % max_sequences_len
print "GET max_sys_call embedding %d" % max_sys_call
net = tflearn.input_data([None, max_sequences_len])
net = tflearn.embedding(net, input_dim=max_sys_call+1, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.3)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.1,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=3)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=32,run_id="maidou")
y_predict_list = model.predict(testX)
#print y_predict_list
y_predict = []
for i in y_predict_list:
#print i[0]
if i[0] > 0.5:
y_predict.append(0)
else:
y_predict.append(1)
#y_predict=to_categorical(y_predict, nb_classes=2)
print(classification_report(testY_old, y_predict))
print metrics.confusion_matrix(testY_old, y_predict)
示例9: bi_lstm
def bi_lstm(trainX, trainY,testX, testY):
trainX = pad_sequences(trainX, maxlen=200, value=0.)
testX = pad_sequences(testX, maxlen=200, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data(shape=[None, 200])
net = tflearn.embedding(net, input_dim=20000, output_dim=128)
net = tflearn.bidirectional_rnn(net, BasicLSTMCell(128), BasicLSTMCell(128))
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, clip_gradients=0., tensorboard_verbose=2)
model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=64,run_id="rnn-bilstm")
示例10: lstm
def lstm(trainX, trainY,testX, testY):
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=32,run_id="rnn-lstm")
示例11: main
def main(_):
# 1.load data with vocabulary of words and labels
vocabulary_word2index, vocabulary_index2word = create_voabulary(word2vec_model_path=FLAGS.word2vec_model_path,name_scope="transformer_classification") # simple='simple'
vocab_size = len(vocabulary_word2index)
print("transformer_classification.vocab_size:", vocab_size)
vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label(name_scope="transformer_classification")
questionid_question_lists=load_final_test_data(FLAGS.predict_source_file)
print("list of total questions:",len(questionid_question_lists))
test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists)
print("list of total questions2:",len(test))
testX=[]
question_id_list=[]
for tuple in test:
question_id,question_string_list=tuple
question_id_list.append(question_id)
testX.append(question_string_list)
# 2.Data preprocessing: Sequence padding
print("start padding....")
testX2 = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.) # padding to max length
print("list of total questions3:", len(testX2))
print("end padding...")
# 3.create session.
config=tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
# 4.Instantiate Model
model=Transformer(FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length,
vocab_size, FLAGS.embed_size,FLAGS.d_model,FLAGS.d_k,FLAGS.d_v,FLAGS.h,FLAGS.num_layer,FLAGS.is_training,l2_lambda=FLAGS.l2_lambda)
saver=tf.train.Saver()
if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
print("Restoring Variables from Checkpoint")
saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
else:
print("Can't find the checkpoint.going to stop")
return
# 5.feed data, to get logits
number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data)
index=0
predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8')
for start, end in zip(range(0, number_of_training_data, FLAGS.batch_size),range(FLAGS.batch_size, number_of_training_data+1, FLAGS.batch_size)):
logits=sess.run(model.logits,feed_dict={model.input_x:testX2[start:end],model.dropout_keep_prob:1}) #logits:[batch_size,self.num_classes]
question_id_sublist=question_id_list[start:end]
get_label_using_logits_batch(question_id_sublist, logits, vocabulary_index2word_label, predict_target_file_f)
# 6. get lable using logtis
#predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label)
#print(index," ;predicted_labels:",predicted_labels)
# 7. write question id and labels to file system.
#write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f)
index=index+1
predict_target_file_f.close()
示例12: do_rnn_wordbag
def do_rnn_wordbag(trainX, testX, trainY, testY):
global max_document_length
print "RNN and wordbag"
trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.)
testX = pad_sequences(testX, maxlen=max_document_length, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, max_document_length])
net = tflearn.embedding(net, input_dim=10240000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
batch_size=10,run_id="review",n_epoch=5)
示例13: create_datasets
def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):
# IMDB Dataset loading
train, test, _ = imdb.load_data(
path=file_path,
n_words=vocab_size,
valid_portion=val_fraction,
sort_by_len=False)
trainX, trainY = train
testX, testY = test
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=FLAGS.max_len, value=0.)
testX = pad_sequences(testX, maxlen=FLAGS.max_len, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
train_dataset = DataSet(trainX, trainY)
return train_dataset
示例14: main
def main(_):
# 1.load data with vocabulary of words and labels
vocabulary_word2index, vocabulary_index2word = create_voabulary()
vocab_size = len(vocabulary_word2index)
print("vocab_size:",vocab_size)
#iii=0
#iii/0
vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label()
questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO
test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO
testX=[]
question_id_list=[]
for tuple in test:
question_id,question_string_list=tuple
question_id_list.append(question_id)
testX.append(question_string_list)
# 2.Data preprocessing: Sequence padding
print("start padding....")
testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.) # padding to max length
print("end padding...")
# 3.create session.
config=tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
# 4.Instantiate Model
fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
saver=tf.train.Saver()
if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
print("Restoring Variables from Checkpoint")
saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
else:
print("Can't find the checkpoint.going to stop")
return
# 5.feed data, to get logits
number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data)
batch_size=1
index=0
predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8')
for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data+1, batch_size)):
logits=sess.run(fast_text.logits,feed_dict={fast_text.sentence:testX2[start:end]}) #'shape of logits:', ( 1, 1999)
# 6. get lable using logtis
predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label)
# 7. write question id and labels to file system.
write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f)
index=index+1
predict_target_file_f.close()
示例15: main
def main(_):
# 1.load data with vocabulary of words and labels
vocabulary_word2index, vocabulary_index2word = create_voabulary(word2vec_model_path=FLAGS.word2vec_model_path,name_scope="dynamic_memory_network")
vocab_size = len(vocabulary_word2index)
vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label(name_scope="dynamic_memory_network")
questionid_question_lists=load_final_test_data(FLAGS.predict_source_file)
test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists)
testX=[]
question_id_list=[]
for tuple in test:
question_id,question_string_list=tuple
question_id_list.append(question_id)
testX.append(question_string_list)
# 2.Data preprocessing: Sequence padding
print("start padding....")
testX2 = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.) # padding to max length
print("end padding...")
# 3.create session.
config=tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
# 4.Instantiate Model
model = DynamicMemoryNetwork(FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sequence_length,
FLAGS.story_length,vocab_size, FLAGS.embed_size, FLAGS.hidden_size, FLAGS.is_training,num_pass=FLAGS.num_pass,
use_gated_gru=FLAGS.use_gated_gru,decode_with_sequences=FLAGS.decode_with_sequences,multi_label_flag=FLAGS.multi_label_flag,l2_lambda=FLAGS.l2_lambda)
saver=tf.train.Saver()
if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
print("Restoring Variables from Checkpoint of EntityNet.")
saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
else:
print("Can't find the checkpoint.going to stop")
return
# 5.feed data, to get logits
number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data)
index=0
predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8')
for start, end in zip(range(0, number_of_training_data, FLAGS.batch_size),range(FLAGS.batch_size, number_of_training_data+1, FLAGS.batch_size)):
logits=sess.run(model.logits,feed_dict={model.query:testX2[start:end],model.story: np.expand_dims(testX2[start:end],axis=1),
model.dropout_keep_prob:1.0}) #'shape of logits:', ( 1, 1999)
# 6. get lable using logtis
#predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label)
# 7. write question id and labels to file system.
#write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f)
question_id_sublist=question_id_list[start:end]
get_label_using_logits_batch(question_id_sublist, logits, vocabulary_index2word_label, predict_target_file_f)
index=index+1
predict_target_file_f.close()