当前位置: 首页>>代码示例>>Python>>正文


Python Vocab.get_wv方法代码示例

本文整理汇总了Python中utils.Vocab.get_wv方法的典型用法代码示例。如果您正苦于以下问题:Python Vocab.get_wv方法的具体用法?Python Vocab.get_wv怎么用?Python Vocab.get_wv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utils.Vocab的用法示例。


在下文中一共展示了Vocab.get_wv方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Model_RNN

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import get_wv [as 别名]
class Model_RNN(LanguageModel):
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps
        if self.config.debug:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100)
        else:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000)

        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
        
        x = collections.Counter(l_train)
        for k in x.keys():
            print 'class:', k, x[k]
        print ''

        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())
        self.wv = self.vocab.get_wv('../glove.6B.50d.txt')

        with open('word_hist.csv', 'w') as f:
            for w in self.word_counts.keys():
                f.write(w+','+str(self.word_counts[w])+'\n')
            
        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
        self.sequence_len1 = np.array(seq_len1)
        self.sequence_len2 = np.array(seq_len2)

    def add_placeholders(self):
        self.X1            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X1')
        self.X2            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X2')
        self.labels        = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels')
        #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state')
        self.seq_len1      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len1') # for variable length sequences
        self.seq_len2      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len2') # for variable length sequences

    def add_embedding(self):
        #L = tf.get_variable('L', shape=(len(self.vocab), self.config.embed_size), dtype=tf.float32) 
        L = tf.Variable(tf.convert_to_tensor(self.wv, dtype=tf.float32), name='L')
        #L = tf.constant(tf.convert_to_tensor(self.wvi), dtype=tf.float32, name='L')
        inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps 
        inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size
        inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs1 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs1]
        inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs2 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs2]
        return inputs1, inputs2

    def add_model_rnn(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state

        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(4*self.config.hidden_size,),                        dtype=tf.float32)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len1[0]:
                        breaka
                tmp = tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1
                
                rnn_outputs1.append(h_curr1)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len2[0]:
                        breaka
                h_curr2 = tf.sigmoid(tf.matmul(h_curr2,Whh) + tf.matmul(inputs2[i],Wxh) + b1)
                rnn_outputs2.append(h_curr2)

        #lstm_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        rnn_final_states = tf.concat(1, [rnn_outputs1[-1], rnn_outputs2[-1]])
        return rnn_final_states

    def add_model_lstm(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        lstm_outputs1 = []
        lstm_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        cell1   = self.initial_state
        cell2   = self.initial_state

        with tf.variable_scope('lstm'):
#.........这里部分代码省略.........
开发者ID:kalpitdixit,项目名称:deep-playlist,代码行数:103,代码来源:model_rnn.py


注:本文中的utils.Vocab.get_wv方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。