本文整理匯總了Python中tensorflow.python.keras.preprocessing.sequence.pad_sequences方法的典型用法代碼示例。如果您正苦於以下問題:Python sequence.pad_sequences方法的具體用法?Python sequence.pad_sequences怎麽用?Python sequence.pad_sequences使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensorflow.python.keras.preprocessing.sequence
的用法示例。
在下文中一共展示了sequence.pad_sequences方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: preprocess
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def preprocess(train_data_file, word_index_file, num_words):
"""Loads Numpy file .npz format and process its the data.
Pad the arrays so they all have the same length, then create an integer
tensor of shape max_length * num_reviews. Then we use an embedding layer
capable of handling this shape as the first layer in our network.
Args:
train_data_file: (str) Location of file.
word_index_file: (str) Location of JSON file with index information.
num_words: (int) Number of words to get from IMDB dataset.
Returns:
A tuple of training and test data.
"""
(train_data, train_labels), (test_data, test_labels) = _load_data(
path=train_data_file, num_words=num_words)
word_index = _get_word_index(word_index_file)
# Standardize the lengths for training.
train_data = pad_sequences(train_data, value=word_index['<PAD>'],
padding='post', maxlen=SENTENCE_SIZE)
# Standardize the lengths for test.
test_data = pad_sequences(test_data, value=word_index['<PAD>'],
padding='post', maxlen=SENTENCE_SIZE)
return (train_data, train_labels), (test_data, test_labels)
示例2: process_x_dataset
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def process_x_dataset(self,
data: List[List[str]],
max_len: Optional[int] = None,
subset: Optional[List[int]] = None) -> np.ndarray:
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
if max_len is None:
max_len = self.sequence_length
if subset is not None:
target = get_list_subset(data, subset)
else:
target = data
numerized_samples = self.numerize_token_sequences(target)
return pad_sequences(numerized_samples, max_len, padding='post', truncating='post')
示例3: split_and_zero_padding
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def split_and_zero_padding(df, max_seq_length):
# Split to dicts
X = {'left': df['question1_n'], 'right': df['question2_n']}
# Zero padding
for dataset, side in itertools.product([X], ['left', 'right']):
dataset[side] = pad_sequences(dataset[side], padding='pre', truncating='post', maxlen=max_seq_length)
return dataset
# --
示例4: test_nlp_padded_valid
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def test_nlp_padded_valid(self):
num_words = 1024
(x_train, y_train), (x_test, y_test) = TestUtil.get_random_variable_length_dataset(max_value=num_words)
explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1)
counter = CountVectoriser(num_words)
tfidf_transformer = TfidfTransformer()
explained_model = Pipeline([('counts', counter),
('tfidf', tfidf_transformer),
('model', explained_model)])
explained_model.fit(x_train, y_train)
model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True,
num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
masking_operation = WordDropMasking()
loss = binary_crossentropy
explainer = CXPlain(explained_model, model_builder, masking_operation, loss)
x_train = pad_sequences(x_train, padding="post", truncating="post", dtype=int)
x_test = pad_sequences(x_test, padding="post", truncating="post", dtype=int, maxlen=x_train.shape[1])
explainer.fit(x_train, y_train)
eval_score = explainer.score(x_test, y_test)
train_score = explainer.get_last_fit_score()
median = explainer.predict(x_test)
self.assertTrue(median.shape == x_test.shape)
示例5: test_imdb_padded_valid
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def test_imdb_padded_valid(self):
num_samples = 32
num_words = 1024
(x_train, y_train), (x_test, y_test) = TestUtil.get_imdb(word_dictionary_size=num_words,
num_subsamples=num_samples)
explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1)
counter = CountVectoriser(num_words)
tfidf_transformer = TfidfTransformer()
explained_model = Pipeline([('counts', counter),
('tfidf', tfidf_transformer),
('model', explained_model)])
explained_model.fit(x_train, y_train)
model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True,
num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
masking_operation = WordDropMasking()
loss = binary_crossentropy
explainer = CXPlain(explained_model, model_builder, masking_operation, loss)
x_train = pad_sequences(x_train, padding="post", truncating="post", dtype=int)
x_test = pad_sequences(x_test, padding="post", truncating="post", dtype=int, maxlen=x_train.shape[1])
explainer.fit(x_train, y_train)
eval_score = explainer.score(x_test, y_test)
train_score = explainer.get_last_fit_score()
median = explainer.predict(x_test)
self.assertTrue(median.shape == x_test.shape)
示例6: test_is_variable_length_padded_false
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def test_is_variable_length_padded_false(self):
(x, _), _ = TestUtil.get_random_variable_length_dataset(max_value=1024)
x = pad_sequences(x, padding="post", truncating="post", dtype=int)
return_value = Validation.is_variable_length(x)
self.assertEqual(return_value, False)
示例7: sents2sequences
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def sents2sequences(tokenizer, sentences, reverse=False, pad_length=None, padding_type='post'):
encoded_text = tokenizer.texts_to_sequences(sentences)
preproc_text = pad_sequences(encoded_text, padding=padding_type, maxlen=pad_length)
if reverse:
preproc_text = np.flip(preproc_text, axis=1)
return preproc_text
示例8: test_causal_loss_padded_input
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def test_causal_loss_padded_input(self):
models = TestUtil.get_classification_models()
batch_size = 32
num_samples = 1024
num_words = 1024
(x_train, y_train), (x_test, y_test) = \
TestUtil.get_random_variable_length_dataset(num_samples=num_samples, max_value=num_words)
x, y = np.concatenate([x_train, x_test], axis=0), np.concatenate([y_train, y_test], axis=0)
self.assertEqual(x.shape[0], num_samples)
for explained_model in models:
counter = CountVectoriser(num_words)
tfidf_transformer = TfidfTransformer()
explained_model = Pipeline([('counts', counter),
('tfidf', tfidf_transformer),
('model', explained_model)])
TestUtil.fit_proxy(explained_model, x, y)
masking = WordDropMasking()
x = pad_sequences(x, padding="post", truncating="post", dtype=int)
_, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking(explained_model, x, y,
batch_size=batch_size,
downsample_factors=(1,),
flatten=False)
auxiliary_outputs = y_pred
all_but_one_auxiliary_outputs = all_y_pred_imputed
all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim(
all_but_one_auxiliary_outputs
)
delta_errors = calculate_delta_errors(y,
auxiliary_outputs,
all_but_one_auxiliary_outputs,
NumpyInterface.binary_crossentropy,
math_ops=NumpyInterface)
# Ensure correct delta error dimensionality.
self.assertEqual(delta_errors.shape, (num_samples, x.shape[1]))
示例9: run_loss
# 需要導入模塊: from tensorflow.python.keras.preprocessing import sequence [as 別名]
# 或者: from tensorflow.python.keras.preprocessing.sequence import pad_sequences [as 別名]
def run_loss(args):
data = args["data"]
# For each run we want to get a new random balance
data.process()
# split, train, test
dense_out = len(data.labels[0])
# split for all models
X_train_, X_test_, Y_train, Y_test = train_test_split(
data.text, data.labels, test_size=0.20, random_state=42
)
print(args)
# Prep data for the LSTM model
# This currently will train the tokenizer on all text (unbalanced and train/test)
# It would be nice to replace this with a pretrained embedding on larger text
tokenizer = Tokenizer(num_words=int(args["max_features"]), split=" ")
tokenizer.fit_on_texts(data.all_text)
X_train = tokenizer.texts_to_sequences(X_train_)
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = tokenizer.texts_to_sequences(X_test_)
X_test = pad_sequences(X_test, maxlen=max_len)
# Train the LSTM model
lstm_model = simple_lstm(
int(args["max_features"]),
dense_out,
X_train.shape[1],
int(args["embed_dim"]),
int(args["lstm_out"]),
args["dropout"],
)
if args["epochs"] == 0:
args["epochs"] = 1
es = EarlyStopping(monitor="val_acc", min_delta=0, patience=6, verbose=0, mode="max")
model_hist = lstm_model.fit(
X_train,
Y_train,
epochs=args["epochs"],
batch_size=batch_size,
verbose=1,
validation_data=(X_test, Y_test),
callbacks=[es],
)
lstm_acc = model_hist.history["val_acc"][-1]
print("LSTM model accuracy ", lstm_acc)
# This minimizes, so the maximize we have to take the inverse :)
return 1 - lstm_acc