当前位置: 首页>>代码示例>>Python>>正文


Python data_utils.pad_sequences方法代码示例

本文整理汇总了Python中tflearn.data_utils.pad_sequences方法的典型用法代码示例。如果您正苦于以下问题:Python data_utils.pad_sequences方法的具体用法?Python data_utils.pad_sequences怎么用?Python data_utils.pad_sequences使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tflearn.data_utils的用法示例。


在下文中一共展示了data_utils.pad_sequences方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pad_data

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        data_front: The padded front data
        data_behind: The padded behind data
        onehot_labels: The one-hot labels
    """
    data_front = pad_sequences(data.front_tokenindex, maxlen=pad_seq_len, value=0.)
    data_behind = pad_sequences(data.behind_tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels = to_categorical(data.labels, nb_classes=2)
    return data_front, data_behind, onehot_labels 
开发者ID:RandolphVI,项目名称:Text-Pairs-Relation-Classification,代码行数:19,代码来源:data_helpers.py

示例2: pad_data

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        pad_seq: The padded data
        labels: The data labels
    """
    abstract_pad_seq = pad_sequences(data.abstract_tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels_list = data.onehot_labels
    onehot_labels_list_tuple = data.onehot_labels_tuple
    return abstract_pad_seq, onehot_labels_list, onehot_labels_list_tuple 
开发者ID:RandolphVI,项目名称:Hierarchical-Multi-Label-Text-Classification,代码行数:18,代码来源:data_helpers.py

示例3: pad_data

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def pad_data(data, pad_seq_len):
    """
    Padding each sentence of research data according to the max sentence length.
    Return the padded data and data labels.

    Args:
        data: The research data
        pad_seq_len: The max sentence length of research data
    Returns:
        pad_seq: The padded data
        labels: The data labels
    """
    pad_seq = pad_sequences(data.tokenindex, maxlen=pad_seq_len, value=0.)
    onehot_labels = data.onehot_labels
    return pad_seq, onehot_labels 
开发者ID:RandolphVI,项目名称:Multi-Label-Text-Classification,代码行数:17,代码来源:data_helpers.py

示例4: test_pad

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def test_pad():
    trainX='w18476 w4454 w1674 w6 w25 w474 w1333 w1467 w863 w6 w4430 w11 w813 w4463 w863 w6 w4430 w111'
    trainX=trainX.split(" ")
    trainX = pad_sequences([[trainX]], maxlen=100, value=0.)
    print("trainX:",trainX) 
开发者ID:brightmart,项目名称:text_classification,代码行数:7,代码来源:data_util_zhihu.py

示例5: load_data_multilabel

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def load_data_multilabel(traning_data_path,vocab_word2index, vocab_label2index,sentence_len,training_portion=0.95):
    """
    convert data as indexes using word2index dicts.
    :param traning_data_path:
    :param vocab_word2index:
    :param vocab_label2index:
    :return:
    """
    file_object = codecs.open(traning_data_path, mode='r', encoding='utf-8')
    lines = file_object.readlines()
    random.shuffle(lines)
    label_size=len(vocab_label2index)
    X = []
    Y = []
    for i,line in enumerate(lines):
        raw_list = line.strip().split("__label__")
        input_list = raw_list[0].strip().split(" ")
        input_list = [x.strip().replace(" ", "") for x in input_list if x != '']
        x=[vocab_word2index.get(x,UNK_ID) for x in input_list]
        label_list = raw_list[1:]
        label_list=[l.strip().replace(" ", "") for l in label_list if l != '']
        label_list=[vocab_label2index[label] for label in label_list]
        y=transform_multilabel_as_multihot(label_list,label_size)
        X.append(x)
        Y.append(y)
        if i<10:print(i,"line:",line)

    X = pad_sequences(X, maxlen=sentence_len, value=0.)  # padding to max length
    number_examples = len(lines)
    training_number=int(training_portion* number_examples)
    train = (X[0:training_number], Y[0:training_number])
    valid_number=min(1000,number_examples-training_number)
    test = (X[training_number+ 1:training_number+valid_number+1], Y[training_number + 1:training_number+valid_number+1])
    return train,test 
开发者ID:brightmart,项目名称:text_classification,代码行数:36,代码来源:data_util.py

示例6: main

# 需要导入模块: from tflearn import data_utils [as 别名]
# 或者: from tflearn.data_utils import pad_sequences [as 别名]
def main(_):
    # 1.load data with vocabulary of words and labels
    vocabulary_word2index, vocabulary_index2word = create_voabulary()
    vocab_size = len(vocabulary_word2index)
    print("vocab_size:",vocab_size)
    #iii=0
    #iii/0
    vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label()
    questionid_question_lists=load_final_test_data(FLAGS.predict_source_file) #TODO
    test= load_data_predict(vocabulary_word2index,vocabulary_word2index_label,questionid_question_lists) #TODO
    testX=[]
    question_id_list=[]
    for tuple in test:
        question_id,question_string_list=tuple
        question_id_list.append(question_id)
        testX.append(question_string_list)

    # 2.Data preprocessing: Sequence padding
    print("start padding....")
    testX2 = pad_sequences(testX, maxlen=FLAGS.sentence_len, value=0.)  # padding to max length
    print("end padding...")

    # 3.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        # 4.Instantiate Model
        fast_text=fastText(FLAGS.label_size, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.num_sampled,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training)
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Can't find the checkpoint.going to stop")
            return
        # 5.feed data, to get logits
        number_of_training_data=len(testX2);print("number_of_training_data:",number_of_training_data)
        batch_size=1
        index=0
        predict_target_file_f = codecs.open(FLAGS.predict_target_file, 'a', 'utf8')
        for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data+1, batch_size)):
            logits=sess.run(fast_text.logits,feed_dict={fast_text.sentence:testX2[start:end]}) #'shape of logits:', ( 1, 1999)
            # 6. get lable using logtis
            predicted_labels=get_label_using_logits(logits[0],vocabulary_index2word_label)
            # 7. write question id and labels to file system.
            write_question_id_with_labels(question_id_list[index],predicted_labels,predict_target_file_f)
            index=index+1
        predict_target_file_f.close()

# get label using logits 
开发者ID:brightmart,项目名称:text_classification,代码行数:52,代码来源:p5_fastTextB_predict_multilabel.py


注:本文中的tflearn.data_utils.pad_sequences方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。