本文整理匯總了Python中keras.preprocessing.sequence方法的典型用法代碼示例。如果您正苦於以下問題:Python preprocessing.sequence方法的具體用法?Python preprocessing.sequence怎麽用?Python preprocessing.sequence使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類keras.preprocessing
的用法示例。
在下文中一共展示了preprocessing.sequence方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: learn_single_repr
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def learn_single_repr(self, q1_embed, q1_len, q1_max, rnn_type,
reuse=None, pool=False, name="", mask=None):
""" This is the single sequence encoder function.
rnn_type controls what type of encoder is used.
Supports neural bag-of-words (NBOW) and CNN encoder
"""
if('NBOW' in rnn_type):
q1_output = tf.reduce_sum(q1_embed, 1)
if(pool):
return q1_embed, q1_output
elif('CNN' in rnn_type):
q1_output = build_raw_cnn(q1_embed, self.args.rnn_size,
filter_sizes=3,
initializer=self.initializer,
dropout=self.rnn_dropout, reuse=reuse, name=name)
if(pool):
q1_output = tf.reduce_max(q1_output, 1)
return q1_output, q1_output
else:
q1_output = q1_embed
return q1_output
示例2: encode_outputs
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_outputs(self, sents):
"""
Given a dataframe split to sentences, encode outputs for rnn classification.
Should return a list sequence of sample of length maxlen.
"""
output_encodings = []
sents = self.get_fixed_size(sents)
# Encode outputs
for sent in sents:
output_encodings.append(list(np_utils.to_categorical(list(self.transform_labels(sent.label.values)),
num_classes = self.num_of_classes())))
# Pad / truncate to maximum length
return np.ndarray(shape = (len(sents),
self.sent_maxlen,
self.num_of_classes()),
buffer = np.array(pad_sequences(output_encodings,
lambda : \
np.zeros(self.num_of_classes()),
maxlen = self.sent_maxlen)))
示例3: pad_sequences
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def pad_sequences(sequences, pad_func, maxlen = None):
"""
Similar to keras.preprocessing.sequence.pad_sequence but using Sample as higher level
abstraction.
pad_func is a pad class generator.
"""
ret = []
# Determine the maxlen
max_value = max(map(len, sequences))
if maxlen is None:
maxlen = max_value
# Pad / truncate (done this way to deal with np.array)
for sequence in sequences:
cur_seq = list(sequence[:maxlen])
cur_seq.extend([pad_func()] * (maxlen - len(sequence)))
ret.append(cur_seq)
return ret
示例4: get_ELMo_lmdb_vector
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def get_ELMo_lmdb_vector(self, token_list, max_size_sentence):
"""
Try to get the ELMo embeddings for a sequence cached in LMDB
"""
if self.env_ELMo is None:
# db cache not available, we don't cache ELMo stuff
return None
try:
ELMo_vector = np.zeros((len(token_list), max_size_sentence-2, ELMo_embed_size), dtype='float32')
with self.env_ELMo.begin() as txn:
for i in range(0, len(token_list)):
txn = self.env_ELMo.begin()
# get a hash for the token_list
the_hash = list_digest(token_list[i])
vector = txn.get(the_hash.encode(encoding='UTF-8'))
if vector:
# adapt expected shape/padding
local_embeddings = _deserialize_pickle(vector)
if local_embeddings.shape[0] > max_size_sentence-2:
# squeeze the extra padding space
ELMo_vector[i] = local_embeddings[:max_size_sentence-2,]
elif local_embeddings.shape[0] == max_size_sentence-2:
# bingo~!
ELMo_vector[i] = local_embeddings
else:
# fill the missing space with padding
filler = np.zeros((max_size_sentence-(local_embeddings.shape[0]+2), ELMo_embed_size), dtype='float32')
ELMo_vector[i] = np.concatenate((local_embeddings, filler))
vector = None
else:
return None
except lmdb.Error:
# no idea why, but we need to close and reopen the environment to avoid
# mdb_txn_begin: MDB_BAD_RSLOT: Invalid reuse of reader locktable slot
# when opening new transaction !
self.env_ELMo.close()
self.env_ELMo = lmdb.open(self.embedding_ELMo_cache, readonly=True, max_readers=2048, max_spare_txns=2, lock=False)
return self.get_ELMo_lmdb_vector(token_list)
return ELMo_vector
示例5: cache_ELMo_lmdb_vector
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def cache_ELMo_lmdb_vector(self, token_list, ELMo_vector):
"""
Cache in LMDB the ELMo embeddings for a given sequence
"""
if self.env_ELMo is None:
# db cache not available, we don't cache ELMo stuff
return None
txn = self.env_ELMo.begin(write=True)
for i in range(0, len(token_list)):
# get a hash for the token_list
the_hash = list_digest(token_list[i])
txn.put(the_hash.encode(encoding='UTF-8'), _serialize_pickle(ELMo_vector[i]))
txn.commit()
示例6: cache_BERT_lmdb_vector
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def cache_BERT_lmdb_vector(self, sentence, BERT_vector):
"""
Cache in LMDB the BERT embeddings for a given sequence
"""
if self.env_BERT is None:
# db cache not available, we don't cache BERT stuff
return None
txn = self.env_BERT.begin(write=True)
#for i in range(0, len(sentence)):
# get a hash for the token_list
the_hash = list_digest(sentence)
txn.put(the_hash.encode(encoding='UTF-8'), _serialize_pickle(BERT_vector))
txn.commit()
示例7: encode_inputs
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_inputs(self, sents):
"""
Given a dataframe split to sentences, encode inputs for rnn classification.
Should return a dictionary of sequences of sample of length maxlen.
"""
word_inputs = []
pred_inputs = []
pos_inputs = []
sents = self.get_fixed_size(sents)
for sent in sents:
# pd assigns NaN for very infreq. empty string (see wiki train)
sent_words = [word
if not (isinstance(word, float) and math.isnan(word)) else " "
for word in sent.word.values]
pos_tags_encodings = [NLTK_POS_TAGS.index(tag)
for (_, tag)
in nltk.pos_tag(sent_words)]
word_encodings = [self.emb.get_word_index(w) for w in sent_words]
pred_word_encodings = [self.emb.get_word_index(w) for w in sent_words]
word_inputs.append([Sample(w) for w in word_encodings])
pred_inputs.append([Sample(w) for w in pred_word_encodings])
pos_inputs.append([Sample(pos) for pos in pos_tags_encodings])
# Pad / truncate to desired maximum length
ret = {"word_inputs" : [],
"predicate_inputs": []}
ret = defaultdict(lambda: [])
for name, sequence in zip(["word_inputs", "predicate_inputs", "postags_inputs"],
[word_inputs, pred_inputs, pos_inputs]):
for samples in pad_sequences(sequence,
pad_func = lambda : Pad_sample(),
maxlen = self.sent_maxlen):
ret[name].append([sample.encode() for sample in samples])
return {k: np.array(v) for k, v in ret.iteritems()}
示例8: encode_outputs
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_outputs(self, sents):
"""
Given a dataframe split to sentences, encode outputs for rnn classification.
Should return a list sequence of sample of length maxlen.
"""
output_encodings = []
sents = self.get_fixed_size(sents)
# Encode outputs
for sent in sents:
output_encodings.append(list(np_utils.to_categorical(\
list(self.transform_labels(sent.label.values)),
nb_classes = self.num_of_classes())))
# Pad / truncate to maximum length
return np.ndarray(shape = (len(sents),
self.sent_maxlen,
self.num_of_classes()),
buffer = np.array(pad_sequences(output_encodings,
lambda : \
np.zeros(self.num_of_classes()),
maxlen = self.sent_maxlen)))
# Functional Keras -- all of the following are currying functions expecting models as input
# https://keras.io/getting-started/functional-api-guide/
示例9: run_discriminator
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def run_discriminator(q, a):
sa = (a != 0).sum()
# *************************************************************************
# running discriminator:
# *************************************************************************
p = 1
m = 0
model_discrim = init_model()
count = 0
for i, sent in enumerate(a):
l = np.where(sent==3) # the position od the symbol EOS
limit = l[0][0]
count += limit + 1
Q = np.zeros((count,maxlen_input))
A = np.zeros((count,maxlen_input))
Y = np.zeros((count,dictionary_size))
# Loop over the training examples:
count = 0
for i, sent in enumerate(a):
ans_partial = np.zeros((1,maxlen_input))
# Loop over the positions of the current target output (the current output sequence):
l = np.where(sent==3) # the position of the symbol EOS
limit = l[0][0]
for k in range(1,limit+1):
# Mapping the target output (the next output word) for one-hot codding:
y = np.zeros((1, dictionary_size))
y[0, int(sent[k])] = 1
# preparing the partial answer to input:
ans_partial[0,-k:] = sent[0:k]
# training the model for one epoch using teacher forcing:
Q[count, :] = q[i:i+1]
A[count, :] = ans_partial
Y[count, :] = y
count += 1
p = model_discrim.predict([ Q, A, Y])
p = p[-sa:-1]
P = np.sum(np.log(p))/sa
return P
示例10: get_BERT_lmdb_vector
# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def get_BERT_lmdb_vector(self, sentence):
"""
Try to get the BERT extracted embeddings for a sequence cached in LMDB
"""
if self.env_BERT is None:
# db cache not available, we don't cache ELMo stuff
return None
try:
BERT_vector = np.zeros((BERT_sentence_size, BERT_embed_size), dtype='float32')
with self.env_BERT.begin() as txn:
txn = self.env_BERT.begin()
# get a hash for the token_list
the_hash = list_digest(sentence)
vector = txn.get(the_hash.encode(encoding='UTF-8'))
if vector:
# adapt expected shape/padding
BERT_vector = _deserialize_pickle(vector)
'''
if local_embeddings.shape[0] > max_size_sentence:
# squeeze the extra padding space
BERT_vector = local_embeddings[:max_size_sentence,]
elif local_embeddings.shape[0] == max_size_sentence:
# bingo~!
BERT_vector = local_embeddings
else:
# fill the missing space with padding
filler = np.zeros((max_size_sentence-(local_embeddings.shape[0]), BERT_embed_size), dtype='float32')
BERT_vector = np.concatenate((local_embeddings, filler))
'''
vector = None
else:
return None
except lmdb.Error:
# no idea why, but we need to close and reopen the environment to avoid
# mdb_txn_begin: MDB_BAD_RSLOT: Invalid reuse of reader locktable slot
# when opening new transaction !
self.env_BERT.close()
self.env_BERT = lmdb.open(self.embedding_BERT_cache, readonly=True, max_readers=2048, max_spare_txns=2, lock=False)
return self.get_BERT_lmdb_vector(sentence)
return BERT_vector