當前位置: 首頁>>代碼示例>>Python>>正文


Python preprocessing.sequence方法代碼示例

本文整理匯總了Python中keras.preprocessing.sequence方法的典型用法代碼示例。如果您正苦於以下問題:Python preprocessing.sequence方法的具體用法?Python preprocessing.sequence怎麽用?Python preprocessing.sequence使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在keras.preprocessing的用法示例。


在下文中一共展示了preprocessing.sequence方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: learn_single_repr

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def learn_single_repr(self, q1_embed, q1_len, q1_max, rnn_type,
                        reuse=None, pool=False, name="", mask=None):
        """ This is the single sequence encoder function.
        rnn_type controls what type of encoder is used.
        Supports neural bag-of-words (NBOW) and CNN encoder
        """
        if('NBOW' in rnn_type):
            q1_output = tf.reduce_sum(q1_embed, 1)
            if(pool):
                return q1_embed, q1_output
        elif('CNN' in rnn_type):
            q1_output = build_raw_cnn(q1_embed, self.args.rnn_size,
                filter_sizes=3,
                initializer=self.initializer,
                dropout=self.rnn_dropout, reuse=reuse, name=name)
            if(pool):
                q1_output = tf.reduce_max(q1_output, 1)
                return q1_output, q1_output
        else:
            q1_output = q1_embed

        return q1_output 
開發者ID:vanzytay,項目名稱:KDD2018_MPCN,代碼行數:24,代碼來源:model.py

示例2: encode_outputs

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_outputs(self, sents):
        """
        Given a dataframe split to sentences, encode outputs for rnn classification.
        Should return a list sequence of sample of length maxlen.
        """
        output_encodings = []
        sents = self.get_fixed_size(sents)
        # Encode outputs
        for sent in sents:
            output_encodings.append(list(np_utils.to_categorical(list(self.transform_labels(sent.label.values)),
                                                                 num_classes = self.num_of_classes())))

        # Pad / truncate to maximum length
        return np.ndarray(shape = (len(sents),
                                  self.sent_maxlen,
                                  self.num_of_classes()),
                          buffer = np.array(pad_sequences(output_encodings,
                                                          lambda : \
                                                            np.zeros(self.num_of_classes()),
                                                          maxlen = self.sent_maxlen))) 
開發者ID:gabrielStanovsky,項目名稱:supervised-oie,代碼行數:22,代碼來源:model.py

示例3: pad_sequences

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def pad_sequences(sequences, pad_func, maxlen = None):
    """
    Similar to keras.preprocessing.sequence.pad_sequence but using Sample as higher level
    abstraction.
    pad_func is a pad class generator.
    """
    ret = []

    # Determine the maxlen
    max_value = max(map(len, sequences))
    if maxlen is None:
        maxlen = max_value

    # Pad / truncate (done this way to deal with np.array)
    for sequence in sequences:
        cur_seq = list(sequence[:maxlen])
        cur_seq.extend([pad_func()] * (maxlen - len(sequence)))
        ret.append(cur_seq)
    return ret 
開發者ID:gabrielStanovsky,項目名稱:supervised-oie,代碼行數:21,代碼來源:model.py

示例4: get_ELMo_lmdb_vector

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def get_ELMo_lmdb_vector(self, token_list, max_size_sentence):
        """
            Try to get the ELMo embeddings for a sequence cached in LMDB
        """
        if self.env_ELMo is None:
            # db cache not available, we don't cache ELMo stuff
            return None
        try:    
            ELMo_vector = np.zeros((len(token_list), max_size_sentence-2, ELMo_embed_size), dtype='float32')
            with self.env_ELMo.begin() as txn:
                for i in range(0, len(token_list)):
                    txn = self.env_ELMo.begin()
                    # get a hash for the token_list
                    the_hash = list_digest(token_list[i])
                    vector = txn.get(the_hash.encode(encoding='UTF-8'))
                    if vector:
                        # adapt expected shape/padding
                        local_embeddings = _deserialize_pickle(vector)
                        if local_embeddings.shape[0] > max_size_sentence-2:
                            # squeeze the extra padding space
                            ELMo_vector[i] = local_embeddings[:max_size_sentence-2,]
                        elif local_embeddings.shape[0] == max_size_sentence-2:
                            # bingo~!
                            ELMo_vector[i] = local_embeddings
                        else:
                            # fill the missing space with padding
                            filler = np.zeros((max_size_sentence-(local_embeddings.shape[0]+2), ELMo_embed_size), dtype='float32')
                            ELMo_vector[i] = np.concatenate((local_embeddings, filler))
                        vector = None
                    else:
                        return None
        except lmdb.Error:
            # no idea why, but we need to close and reopen the environment to avoid
            # mdb_txn_begin: MDB_BAD_RSLOT: Invalid reuse of reader locktable slot
            # when opening new transaction !
            self.env_ELMo.close()
            self.env_ELMo = lmdb.open(self.embedding_ELMo_cache, readonly=True, max_readers=2048, max_spare_txns=2, lock=False)
            return self.get_ELMo_lmdb_vector(token_list)
        return ELMo_vector 
開發者ID:kermitt2,項目名稱:delft,代碼行數:41,代碼來源:Embeddings.py

示例5: cache_ELMo_lmdb_vector

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def cache_ELMo_lmdb_vector(self, token_list, ELMo_vector):
        """
            Cache in LMDB the ELMo embeddings for a given sequence 
        """
        if self.env_ELMo is None:
            # db cache not available, we don't cache ELMo stuff
            return None
        txn = self.env_ELMo.begin(write=True)
        for i in range(0, len(token_list)):
            # get a hash for the token_list
            the_hash = list_digest(token_list[i])
            txn.put(the_hash.encode(encoding='UTF-8'), _serialize_pickle(ELMo_vector[i]))  
        txn.commit() 
開發者ID:kermitt2,項目名稱:delft,代碼行數:15,代碼來源:Embeddings.py

示例6: cache_BERT_lmdb_vector

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def cache_BERT_lmdb_vector(self, sentence, BERT_vector):
        """
            Cache in LMDB the BERT embeddings for a given sequence 
        """
        if self.env_BERT is None:
            # db cache not available, we don't cache BERT stuff
            return None
        txn = self.env_BERT.begin(write=True)
        #for i in range(0, len(sentence)):
        # get a hash for the token_list
        the_hash = list_digest(sentence)
        txn.put(the_hash.encode(encoding='UTF-8'), _serialize_pickle(BERT_vector))  
        txn.commit() 
開發者ID:kermitt2,項目名稱:delft,代碼行數:15,代碼來源:Embeddings.py

示例7: encode_inputs

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_inputs(self, sents):
        """
        Given a dataframe split to sentences, encode inputs for rnn classification.
        Should return a dictionary of sequences of sample of length maxlen.
        """
        word_inputs = []
        pred_inputs = []
        pos_inputs = []
        sents = self.get_fixed_size(sents)

        for sent in sents:
            # pd assigns NaN for very infreq. empty string (see wiki train)
            sent_words = [word
                         if not (isinstance(word, float) and math.isnan(word)) else " "
                         for word in sent.word.values]

            pos_tags_encodings = [NLTK_POS_TAGS.index(tag)
                                  for (_, tag)
                                  in nltk.pos_tag(sent_words)]
            word_encodings = [self.emb.get_word_index(w) for w in sent_words]
            pred_word_encodings = [self.emb.get_word_index(w) for w in sent_words]
            word_inputs.append([Sample(w) for w in word_encodings])
            pred_inputs.append([Sample(w) for w in pred_word_encodings])
            pos_inputs.append([Sample(pos) for pos in pos_tags_encodings])

        # Pad / truncate to desired maximum length
        ret = {"word_inputs" : [],
               "predicate_inputs": []}
        ret = defaultdict(lambda: [])

        for name, sequence in zip(["word_inputs", "predicate_inputs", "postags_inputs"],
                                  [word_inputs, pred_inputs, pos_inputs]):
            for samples in pad_sequences(sequence,
                                         pad_func = lambda : Pad_sample(),
                                         maxlen = self.sent_maxlen):
                ret[name].append([sample.encode() for sample in samples])

        return {k: np.array(v) for k, v in ret.iteritems()} 
開發者ID:gabrielStanovsky,項目名稱:supervised-oie,代碼行數:40,代碼來源:confidence_model.py

示例8: encode_outputs

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def encode_outputs(self, sents):
        """
        Given a dataframe split to sentences, encode outputs for rnn classification.
        Should return a list sequence of sample of length maxlen.
        """
        output_encodings = []
        sents = self.get_fixed_size(sents)
        # Encode outputs
        for sent in sents:
            output_encodings.append(list(np_utils.to_categorical(\
                                                list(self.transform_labels(sent.label.values)),
                                                            nb_classes = self.num_of_classes())))

        # Pad / truncate to maximum length
        return np.ndarray(shape = (len(sents),
                                  self.sent_maxlen,
                                  self.num_of_classes()),
                          buffer = np.array(pad_sequences(output_encodings,
                                                          lambda : \
                                                            np.zeros(self.num_of_classes()),
                                                          maxlen = self.sent_maxlen)))



    # Functional Keras -- all of the following are currying functions expecting models as input
    # https://keras.io/getting-started/functional-api-guide/ 
開發者ID:gabrielStanovsky,項目名稱:supervised-oie,代碼行數:28,代碼來源:confidence_model.py

示例9: run_discriminator

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def run_discriminator(q, a):

    sa = (a != 0).sum()

    # *************************************************************************
    # running discriminator:
    # *************************************************************************

    p = 1
    m = 0
    model_discrim = init_model()
    count = 0
 
    for i, sent in enumerate(a):
        l = np.where(sent==3)  #  the position od the symbol EOS
        limit = l[0][0]
        count += limit + 1

    Q = np.zeros((count,maxlen_input))
    A = np.zeros((count,maxlen_input))
    Y = np.zeros((count,dictionary_size))

    # Loop over the training examples:
    count = 0
    for i, sent in enumerate(a):
        ans_partial = np.zeros((1,maxlen_input))
        
        # Loop over the positions of the current target output (the current output sequence):
        l = np.where(sent==3)  #  the position of the symbol EOS
        limit = l[0][0]

        for k in range(1,limit+1):
            # Mapping the target output (the next output word) for one-hot codding:
            y = np.zeros((1, dictionary_size))
            y[0, int(sent[k])] = 1

            # preparing the partial answer to input:
            ans_partial[0,-k:] = sent[0:k]

            # training the model for one epoch using teacher forcing:
            Q[count, :] = q[i:i+1] 
            A[count, :] = ans_partial 
            Y[count, :] = y
            count += 1

    p = model_discrim.predict([ Q, A, Y])
    p = p[-sa:-1]
    P = np.sum(np.log(p))/sa
    
    return P 
開發者ID:oswaldoludwig,項目名稱:Seq2seq-Chatbot-for-Keras,代碼行數:52,代碼來源:conversation_discriminator.py

示例10: get_BERT_lmdb_vector

# 需要導入模塊: from keras import preprocessing [as 別名]
# 或者: from keras.preprocessing import sequence [as 別名]
def get_BERT_lmdb_vector(self, sentence):
        """
            Try to get the BERT extracted embeddings for a sequence cached in LMDB
        """
        if self.env_BERT is None:
            # db cache not available, we don't cache ELMo stuff
            return None
        try:    
            BERT_vector = np.zeros((BERT_sentence_size, BERT_embed_size), dtype='float32')
            with self.env_BERT.begin() as txn:
                txn = self.env_BERT.begin()
                # get a hash for the token_list
                the_hash = list_digest(sentence)
                vector = txn.get(the_hash.encode(encoding='UTF-8'))
                
                if vector:
                    # adapt expected shape/padding
                    BERT_vector = _deserialize_pickle(vector)
                    '''
                    if local_embeddings.shape[0] > max_size_sentence:
                        # squeeze the extra padding space
                        BERT_vector = local_embeddings[:max_size_sentence,]
                    elif local_embeddings.shape[0] == max_size_sentence:
                        # bingo~!
                        BERT_vector = local_embeddings
                    else:
                        # fill the missing space with padding
                        filler = np.zeros((max_size_sentence-(local_embeddings.shape[0]), BERT_embed_size), dtype='float32')
                        BERT_vector = np.concatenate((local_embeddings, filler))
                    '''
                    vector = None
                else:
                    return None
                
        except lmdb.Error:
            # no idea why, but we need to close and reopen the environment to avoid
            # mdb_txn_begin: MDB_BAD_RSLOT: Invalid reuse of reader locktable slot
            # when opening new transaction !
            self.env_BERT.close()
            self.env_BERT = lmdb.open(self.embedding_BERT_cache, readonly=True, max_readers=2048, max_spare_txns=2, lock=False)
            return self.get_BERT_lmdb_vector(sentence)
        return BERT_vector 
開發者ID:kermitt2,項目名稱:delft,代碼行數:44,代碼來源:Embeddings.py


注:本文中的keras.preprocessing.sequence方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。