本文整理汇总了Python中tensorflow.keras.preprocessing.sequence.pad_sequences方法的典型用法代码示例。如果您正苦于以下问题:Python sequence.pad_sequences方法的具体用法?Python sequence.pad_sequences怎么用?Python sequence.pad_sequences使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow.keras.preprocessing.sequence
的用法示例。
在下文中一共展示了sequence.pad_sequences方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_acc_one_step
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def get_acc_one_step(model, logits, text_lens, labels_batch):
paths = []
accuracy = 0
for logit, text_len, labels in zip(logits, text_lens, labels_batch):
viterbi_path, _ = ta.text.viterbi_decode(logit[:text_len], model.transition_params)
paths.append(viterbi_path)
correct_prediction = tf.equal(
tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([viterbi_path], padding='post'),
dtype=tf.int32),
tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([labels[:text_len]], padding='post'),
dtype=tf.int32)
)
accuracy = accuracy + tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy = accuracy / len(paths)
return accuracy
# 识别句子中的实体
示例2: predict_tags
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict_tags(text: str) -> list:
"""
Predicts POS Tags
Args:
text (str): Input text string
Returns:
list: Containing words their tags
"""
global _POS_TAGGER_MODEL, _WORD2IDX, _IDX2TAG
if _POS_TAGGER_MODEL is None:
_POS_TAGGER_MODEL, _WORD2IDX, _IDX2TAG = _load_metadata(POS_TAGGER_WEIGHTS_PATH,
POS_WORD2IDX_PATH, POS_TAG2IDX_PATH)
tokens = text.split()
encoded = [[_WORD2IDX[word] if word in _WORD2IDX else _WORD2IDX["UNK"] for word in tokens]]
padded = pad_sequences(sequences=encoded, maxlen=50, value=_WORD2IDX['PAD'], padding='post')
predictions = _POS_TAGGER_MODEL.predict(padded)
pred_tags = np.argmax(predictions, axis=2).reshape(predictions.shape[1])
word_tags = [(word, _IDX2TAG[idx]) for word, idx in zip(tokens, pred_tags)]
return word_tags
示例3: predict_ner
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict_ner(text: str) -> list:
"""
Predicts NER Tags
Args:
text (str): Input text string
Returns:
list: Containing words their tags
"""
global _NER_MODEL, _WORD2IDX, _IDX2TAG
if _NER_MODEL is None:
_NER_MODEL, _WORD2IDX, _IDX2TAG = _load_metadata(NER_WEIGHTS_PATH,
NER_WORD2IDX_PATH, NER_TAG2IDX_PATH)
tokens = text.split()
encoded = [[_WORD2IDX[word] if word in _WORD2IDX else _WORD2IDX["UNK"] for word in tokens]]
padded = pad_sequences(sequences=encoded, maxlen=55, value=_WORD2IDX['PAD'], padding='post')
predictions = _NER_MODEL.predict(padded)
pred_tags = np.argmax(predictions, axis=2).reshape(predictions.shape[1])
word_tags = [(word, _IDX2TAG[idx]) for word, idx in zip(tokens, pred_tags)]
return word_tags
示例4: _process_data
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _process_data(self, text):
t = [[self.word_index.get(word, 0) for word in clean_to_list(
tex)] for tex in text]
t = pad_sequences(t, maxlen=self.max_length)
return t
# 保存路径与加载路径相同
示例5: _preprocess_data
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _preprocess_data(self, data, max_len=100):
x = [self.word2idx.get(w[0].lower(), 1) for w in data]
length = len(x)
x = pad_sequences([x], max_len)
return x, length
# 构造模型
示例6: _process_data
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _process_data(data, word2idx, chunk_tags, max_len=None):
if max_len is None:
max_len = max(len(s) for s in data)
x = [[word2idx.get(w[0].lower(), 1) for w in s] for s in data]
y_chunk = [[chunk_tags.index(w[1]) for w in s] for s in data]
x = pad_sequences(x, max_len, padding='post')
y_chunk = pad_sequences(y_chunk, max_len, padding='post')
return x, y_chunk
示例7: vectorize_stories
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def vectorize_stories(word_idx, story_maxlen, query_maxlen, data):
inputs, queries, answers = [], [], []
for story, query, answer in data:
inputs.append([word_idx[w] for w in story])
queries.append([word_idx[w] for w in query])
answers.append(word_idx[answer])
return (pad_sequences(inputs, maxlen=story_maxlen),
pad_sequences(queries, maxlen=query_maxlen), np.array(answers))
示例8: tokenize
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def tokenize(language):
"""Function to tokenize language by mapping words to integer indices"""
# Perform tokenization
language_tokenizer = Tokenizer(filters='')
language_tokenizer.fit_on_texts(language)
tensor = language_tokenizer.texts_to_sequences(language)
# Pad sequences to maximum found sequence length by appending 0s to end
tensor = pad_sequences(sequences=tensor, padding='post')
return tensor, language_tokenizer
示例9: transform
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def transform(self, data):
sequences = self.tokenizer.texts_to_sequences(data)
return pad_sequences(sequences, maxlen=self.maxlen)
示例10: data_preprocessing_v2
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def data_preprocessing_v2(train, test, max_len, max_words=50000):
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train)
train_idx = tokenizer.texts_to_sequences(train)
test_idx = tokenizer.texts_to_sequences(test)
train_padded = pad_sequences(train_idx, maxlen=max_len, padding='post', truncating='post')
test_padded = pad_sequences(test_idx, maxlen=max_len, padding='post', truncating='post')
# vocab size = len(word_docs) + 2 (<UNK>, <PAD>)
return train_padded, test_padded, max_words + 2
示例11: data_preprocessing_with_dict
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def data_preprocessing_with_dict(train, test, max_len):
tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token='<UNK>')
tokenizer.fit_on_texts(train)
train_idx = tokenizer.texts_to_sequences(train)
test_idx = tokenizer.texts_to_sequences(test)
train_padded = pad_sequences(train_idx, maxlen=max_len, padding='post', truncating='post')
test_padded = pad_sequences(test_idx, maxlen=max_len, padding='post', truncating='post')
# vocab size = len(word_docs) + 2 (<UNK>, <PAD>)
return train_padded, test_padded, tokenizer.word_docs, tokenizer.word_index, len(tokenizer.word_docs) + 2
示例12: pad
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def pad(data, len=None):
from tensorflow.keras.preprocessing.sequence import pad_sequences
return pad_sequences(data, maxlen=len, padding='post', truncating='post', value=0)
示例13: pad_sequences
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def pad_sequences(inp):
out_sequences=sequence.pad_sequences(inp, maxlen=gConfig['sentence_size'],padding='post',value=0)
return out_sequences
示例14: predict
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict(sentences):
state=['pos','neg']
model=create_model()
indexes = text_to_vector(sentences)
print(indexes)
inp = pad_sequences([indexes])
inp=tf.reshape(inp[0],(1,len(inp[0])))
predictions=model.step(inp,inp,False)
pred = tf.math.argmax(predictions[0])
p=np.int32(pred.numpy())
return state[p]
示例15: _load_data
# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _load_data(self, test_size=0.2):
log.info('数据预处理...')
# word:index和index:word
word_index = dict()
index_word = ['<unk>']
questions_cols = ['question1', 'question2']
log.info('加载数据集...')
train_data = os.path.join(self.data_path, 'train.csv')
test_data = os.path.join(self.data_path, 'test.csv')
train_df = pd.read_csv(train_data)
test_df = pd.read_csv(test_data)
# 找到最大的句子长度
sentences = [df[col].str.split(' ') for df in [train_df, test_df] for col in questions_cols]
max_length = max([len(s) for ss in sentences for s in ss if isinstance(s, list)])
# 预处理(统计并将字符串转换为索引)
for dataset in [train_df, test_df]:
for index, row in dataset.iterrows():
for question_col in questions_cols:
question_indexes = []
for word in clean_to_list(row[question_col]):
if word in self.stops:
continue
if word not in word_index:
word_index[word] = len(index_word)
question_indexes.append(len(index_word))
index_word.append(word)
else:
question_indexes.append(word_index[word])
dataset._set_value(index, question_col, question_indexes)
x = train_df[questions_cols]
y = train_df['is_duplicate']
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=test_size)
x_train = {'left': x_train.question1, 'right': x_train.question2}
x_val = {'left': x_val.question1, 'right': x_val.question2}
y_train = y_train.values
y_val = y_val.values
for dataset, side in itertools.product([x_train, x_val], ['left', 'right']):
dataset[side] = pad_sequences(dataset[side], maxlen=max_length)
# 校验问题对各自数目是否正确
assert x_train['left'].shape == x_train['right'].shape
assert len(x_train['left']) == len(y_train)
return x_train, y_train, x_val, y_val, word_index, max_length