Python sequence.pad_sequences方法代码示例

本文整理汇总了Python中tensorflow.keras.preprocessing.sequence.pad_sequences方法的典型用法代码示例。如果您正苦于以下问题：Python sequence.pad_sequences方法的具体用法？Python sequence.pad_sequences怎么用？Python sequence.pad_sequences使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow.keras.preprocessing.sequence的用法示例。

在下文中一共展示了sequence.pad_sequences方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_acc_one_step

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def get_acc_one_step(model, logits, text_lens, labels_batch):
        paths = []
        accuracy = 0
        for logit, text_len, labels in zip(logits, text_lens, labels_batch):
            viterbi_path, _ = ta.text.viterbi_decode(logit[:text_len], model.transition_params)
            paths.append(viterbi_path)
            correct_prediction = tf.equal(
                tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([viterbi_path], padding='post'),
                                     dtype=tf.int32),
                tf.convert_to_tensor(tf.keras.preprocessing.sequence.pad_sequences([labels[:text_len]], padding='post'),
                                     dtype=tf.int32)
            )
            accuracy = accuracy + tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        accuracy = accuracy / len(paths)
        return accuracy

    # 识别句子中的实体

开发者ID:msgi，项目名称:nlp-journey，代码行数:19，代码来源:bilstm_crf.py

示例2: predict_tags

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict_tags(text: str) -> list:
    """
    Predicts POS Tags

    Args:
        text (str): Input text string

    Returns:
        list: Containing words their tags
    """

    global _POS_TAGGER_MODEL, _WORD2IDX, _IDX2TAG
    if _POS_TAGGER_MODEL is None:
        _POS_TAGGER_MODEL, _WORD2IDX, _IDX2TAG = _load_metadata(POS_TAGGER_WEIGHTS_PATH,
                                                                POS_WORD2IDX_PATH, POS_TAG2IDX_PATH)

    tokens = text.split()
    encoded = [[_WORD2IDX[word] if word in _WORD2IDX else _WORD2IDX["UNK"] for word in tokens]]
    padded = pad_sequences(sequences=encoded, maxlen=50, value=_WORD2IDX['PAD'], padding='post')
    predictions = _POS_TAGGER_MODEL.predict(padded)
    pred_tags = np.argmax(predictions, axis=2).reshape(predictions.shape[1])
    word_tags = [(word, _IDX2TAG[idx]) for word, idx in zip(tokens, pred_tags)]
    return word_tags

开发者ID:urduhack，项目名称:urduhack，代码行数:25，代码来源:predict.py

示例3: predict_ner

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict_ner(text: str) -> list:
    """
    Predicts NER Tags

    Args:
        text (str): Input text string

    Returns:
        list: Containing words their tags
    """

    global _NER_MODEL, _WORD2IDX, _IDX2TAG
    if _NER_MODEL is None:
        _NER_MODEL, _WORD2IDX, _IDX2TAG = _load_metadata(NER_WEIGHTS_PATH,
                                                         NER_WORD2IDX_PATH, NER_TAG2IDX_PATH)

    tokens = text.split()
    encoded = [[_WORD2IDX[word] if word in _WORD2IDX else _WORD2IDX["UNK"] for word in tokens]]
    padded = pad_sequences(sequences=encoded, maxlen=55, value=_WORD2IDX['PAD'], padding='post')
    predictions = _NER_MODEL.predict(padded)
    pred_tags = np.argmax(predictions, axis=2).reshape(predictions.shape[1])
    word_tags = [(word, _IDX2TAG[idx]) for word, idx in zip(tokens, pred_tags)]
    return word_tags

开发者ID:urduhack，项目名称:urduhack，代码行数:25，代码来源:predict.py

示例4: _process_data

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _process_data(self, text):
        t = [[self.word_index.get(word, 0) for word in clean_to_list(
            tex)] for tex in text]
        t = pad_sequences(t, maxlen=self.max_length)
        return t

    # 保存路径与加载路径相同

开发者ID:msgi，项目名称:nlp-journey，代码行数:9，代码来源:siamese_similarity.py

示例5: _preprocess_data

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _preprocess_data(self, data, max_len=100):
        x = [self.word2idx.get(w[0].lower(), 1) for w in data]
        length = len(x)
        x = pad_sequences([x], max_len)
        return x, length

    # 构造模型

开发者ID:msgi，项目名称:nlp-journey，代码行数:9，代码来源:bilstm_crf.py

示例6: _process_data

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _process_data(data, word2idx, chunk_tags, max_len=None):
        if max_len is None:
            max_len = max(len(s) for s in data)
        x = [[word2idx.get(w[0].lower(), 1) for w in s] for s in data]
        y_chunk = [[chunk_tags.index(w[1]) for w in s] for s in data]

        x = pad_sequences(x, max_len, padding='post')
        y_chunk = pad_sequences(y_chunk, max_len, padding='post')

        return x, y_chunk

开发者ID:msgi，项目名称:nlp-journey，代码行数:12，代码来源:bilstm_crf.py

示例7: vectorize_stories

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def vectorize_stories(word_idx, story_maxlen, query_maxlen, data):
    inputs, queries, answers = [], [], []
    for story, query, answer in data:
        inputs.append([word_idx[w] for w in story])
        queries.append([word_idx[w] for w in query])
        answers.append(word_idx[answer])
    return (pad_sequences(inputs, maxlen=story_maxlen),
            pad_sequences(queries, maxlen=query_maxlen), np.array(answers))

开发者ID:ray-project，项目名称:ray，代码行数:10，代码来源:pbt_memnn_example.py

示例8: tokenize

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def tokenize(language):
    """Function to tokenize language by mapping words to integer indices"""
    # Perform tokenization
    language_tokenizer = Tokenizer(filters='')
    language_tokenizer.fit_on_texts(language)
    tensor = language_tokenizer.texts_to_sequences(language)
    # Pad sequences to maximum found sequence length by appending 0s to end
    tensor = pad_sequences(sequences=tensor, padding='post')

    return tensor, language_tokenizer

开发者ID:uzaymacar，项目名称:attention-mechanisms，代码行数:12，代码来源:machine_translation.py

示例9: transform

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def transform(self, data):
        sequences = self.tokenizer.texts_to_sequences(data)
        return pad_sequences(sequences, maxlen=self.maxlen)

开发者ID:mozilla，项目名称:bugbug，代码行数:5，代码来源:nn.py

示例10: data_preprocessing_v2

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def data_preprocessing_v2(train, test, max_len, max_words=50000):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(train)
    train_idx = tokenizer.texts_to_sequences(train)
    test_idx = tokenizer.texts_to_sequences(test)
    train_padded = pad_sequences(train_idx, maxlen=max_len, padding='post', truncating='post')
    test_padded = pad_sequences(test_idx, maxlen=max_len, padding='post', truncating='post')
    # vocab size = len(word_docs) + 2  (<UNK>, <PAD>)
    return train_padded, test_padded, max_words + 2

开发者ID:TobiasLee，项目名称:Text-Classification，代码行数:11，代码来源:prepare_data.py

示例11: data_preprocessing_with_dict

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def data_preprocessing_with_dict(train, test, max_len):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token='<UNK>')
    tokenizer.fit_on_texts(train)
    train_idx = tokenizer.texts_to_sequences(train)
    test_idx = tokenizer.texts_to_sequences(test)
    train_padded = pad_sequences(train_idx, maxlen=max_len, padding='post', truncating='post')
    test_padded = pad_sequences(test_idx, maxlen=max_len, padding='post', truncating='post')
    # vocab size = len(word_docs) + 2  (<UNK>, <PAD>)
    return train_padded, test_padded, tokenizer.word_docs, tokenizer.word_index, len(tokenizer.word_docs) + 2

开发者ID:TobiasLee，项目名称:Text-Classification，代码行数:11，代码来源:prepare_data.py

示例12: pad

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def pad(data, len=None):
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    return pad_sequences(data, maxlen=len, padding='post', truncating='post', value=0)

开发者ID:guxd，项目名称:deep-code-search，代码行数:5，代码来源:utils.py

示例13: pad_sequences

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def pad_sequences(inp):
    out_sequences=sequence.pad_sequences(inp, maxlen=gConfig['sentence_size'],padding='post',value=0)
    return out_sequences

开发者ID:zhaoyingjun，项目名称:tensorflow2.0-coding，代码行数:5，代码来源:execute.py

示例14: predict

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def predict(sentences):
    state=['pos','neg']
    model=create_model()
    indexes = text_to_vector(sentences)
    print(indexes)
    inp = pad_sequences([indexes])
    inp=tf.reshape(inp[0],(1,len(inp[0])))
    predictions=model.step(inp,inp,False)
    pred = tf.math.argmax(predictions[0])
    p=np.int32(pred.numpy())
    return state[p]

开发者ID:zhaoyingjun，项目名称:tensorflow2.0-coding，代码行数:13，代码来源:execute.py

示例15: _load_data

# 需要导入模块: from tensorflow.keras.preprocessing import sequence [as 别名]
# 或者: from tensorflow.keras.preprocessing.sequence import pad_sequences [as 别名]
def _load_data(self, test_size=0.2):
        log.info('数据预处理...')
        # word:index和index:word
        word_index = dict()
        index_word = ['<unk>']
        questions_cols = ['question1', 'question2']

        log.info('加载数据集...')
        train_data = os.path.join(self.data_path, 'train.csv')
        test_data = os.path.join(self.data_path, 'test.csv')

        train_df = pd.read_csv(train_data)
        test_df = pd.read_csv(test_data)

        # 找到最大的句子长度
        sentences = [df[col].str.split(' ') for df in [train_df, test_df] for col in questions_cols]
        max_length = max([len(s) for ss in sentences for s in ss if isinstance(s, list)])
        # 预处理(统计并将字符串转换为索引)
        for dataset in [train_df, test_df]:
            for index, row in dataset.iterrows():
                for question_col in questions_cols:
                    question_indexes = []
                    for word in clean_to_list(row[question_col]):
                        if word in self.stops:
                            continue
                        if word not in word_index:
                            word_index[word] = len(index_word)
                            question_indexes.append(len(index_word))
                            index_word.append(word)
                        else:
                            question_indexes.append(word_index[word])
                    dataset._set_value(index, question_col, question_indexes)

        x = train_df[questions_cols]
        y = train_df['is_duplicate']
        x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=test_size)

        x_train = {'left': x_train.question1, 'right': x_train.question2}
        x_val = {'left': x_val.question1, 'right': x_val.question2}

        y_train = y_train.values
        y_val = y_val.values

        for dataset, side in itertools.product([x_train, x_val], ['left', 'right']):
            dataset[side] = pad_sequences(dataset[side], maxlen=max_length)

        # 校验问题对各自数目是否正确
        assert x_train['left'].shape == x_train['right'].shape
        assert len(x_train['left']) == len(y_train)
        return x_train, y_train, x_val, y_val, word_index, max_length

开发者ID:msgi，项目名称:nlp-journey，代码行数:52，代码来源:siamese_similarity.py

注：本文中的tensorflow.keras.preprocessing.sequence.pad_sequences方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。