当前位置: 首页>>代码示例>>Python>>正文


Python Vocab.encode方法代码示例

本文整理汇总了Python中utils.Vocab.encode方法的典型用法代码示例。如果您正苦于以下问题:Python Vocab.encode方法的具体用法?Python Vocab.encode怎么用?Python Vocab.encode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utils.Vocab的用法示例。


在下文中一共展示了Vocab.encode方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: WhoseLineModel

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class WhoseLineModel(object):

    def __init__(self, config):
        self.config = config
        self.load_data(debug=False)
        self.add_common_model_vars()
        
    def load_data(self, debug=False):
        self.wordvecs = gensim.models.Word2Vec.load_word2vec_format(self.config.wordvecpath, binary=False)
        self.vocab = Vocab()
        self.vocab.construct(self.wordvecs.index2word)
        self.embedding_matrix = np.vstack([self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab))])
        # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ
        self.embedding_matrix[0,:] = np.mean(self.embedding_matrix, axis=0)

        chapter_split = load_chapter_split(self.config.datasplitpath)
        self.speakers = Speakers()
        for line in open(self.config.datapath):
            ch, speaker, line = line.split("\t")
            if chapter_split[ch] == 0:
                self.speakers.add_speaker(speaker)
        self.speakers.prune(self.config.speaker_count-1)  # -1 for OTHER

        self.train_data = []
        self.dev_data = []
        self.test_data = []
        oldch = None
        for ln in open(self.config.datapath):
            ch, speaker, line = ln.split("\t")
            encoded_line = (np.array([self.vocab.encode(word) for word in line.split()], dtype=np.int32),
                            self.speakers.encode(speaker))
            if chapter_split[ch] == 0:
                dataset = self.train_data
            elif chapter_split[ch] == 1:
                dataset = self.dev_data
            else:
                dataset = self.test_data
            if self.config.batch_size == "chapter":
                if ch == oldch:
                    dataset[-1].append(encoded_line)
                else:
                    dataset.append([encoded_line])
            else:
                dataset.append(encoded_line)
            oldch = ch
    
    def add_common_model_vars(self):
        with tf.variable_scope("word_vectors"):
            self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
开发者ID:schmrlng,项目名称:RNNQuoteAttribution,代码行数:51,代码来源:RNNmodels.py

示例2: prep_data

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
def prep_data(trees, X_vocab=None, y_vocab=None):
    update_vocab = False
    if X_vocab is None:
        X_vocab, y_vocab = Vocab(), Vocab()
        update_vocab = True
    X, y = [], []
    for tree in tqdm(trees):
        if len(tree.tokens) < 2: continue
        #TODO accumulate features without iterating over all states
        try:
            for state, decision in tree.iter_oracle_states():
                feats = state.extract_features()
                if update_vocab:
                    X_vocab.add_words(feats)
                    y_vocab.add_word(decision)
                X.append([X_vocab.encode(f) for f in feats])
                y.append(y_vocab.encode(decision))
        except:
            pass
    return X, y, X_vocab, y_vocab
开发者ID:tachim,项目名称:semisupervised2,代码行数:22,代码来源:tf_nn.py

示例3: RNNLM_Model

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    self.encoded_test = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    """Generate placeholder variables to represent the input tensors

    These placeholders are used as inputs by the rest of the model building
    code and will be fed data during training.  Note that when "None" is in a
    placeholder's shape, it's flexible

    Adds following nodes to the computational graph.
    (When None is in a placeholder's shape, it's flexible)

    input_placeholder: Input placeholder tensor of shape
                       (None, num_steps), type tf.int32
    labels_placeholder: Labels placeholder tensor of shape
                        (None, num_steps), type tf.float32
    dropout_placeholder: Dropout value placeholder (scalar),
                         type tf.float32

    Add these placeholders to self as the instance variables
  
      self.input_placeholder
      self.labels_placeholder
      self.dropout_placeholder

    (Don't change the variable names)
    """
    ### YOUR CODE HERE
    self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input')
    self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.config.num_steps], name='Target')
    self.dropout_placeholder = tf.placeholder(tf.int64, name='Dropout')
    ### END YOUR CODE
  
  def add_embedding(self):
    """Add embedding layer.

    Hint: This layer should use the input_placeholder to index into the
          embedding.
    Hint: You might find tf.nn.embedding_lookup useful.
    Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs
    Hint: Check the last slide from the TensorFlow lecture.
    Hint: Here are the dimensions of the variables you will need to create:

      L: (len(self.vocab), embed_size)

    Returns:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    """
    # The embedding lookup is currently only implemented for the CPU
    with tf.device('/cpu:0'):
      ### YOUR CODE HERE
      embeddings = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True)
      inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder)
      inputs = [tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs)]
      ### END YOUR CODE
      return inputs

  def add_projection(self, rnn_outputs):
    """Adds a projection layer.

    The projection layer transforms the hidden representation to a distribution
    over the vocabulary.

    Hint: Here are the dimensions of the variables you will need to
          create 
          
          U:   (hidden_size, len(vocab))
          b_2: (len(vocab),)

    Args:
      rnn_outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each a tensor of shape
               (batch_size, len(vocab)
    """
    ### YOUR CODE HERE
    with tf.name_scope('Projection Layer'):
      U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)])
      b2 = tf.get_variable('b2', len(self.vocab))
#.........这里部分代码省略.........
开发者ID:jingshuangliu22,项目名称:cs224d_assignment2,代码行数:103,代码来源:q3_RNNLM.py

示例4: RNN_Model

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class RNN_Model():

    def load_data(self):
        """Loads train/dev/test data and builds vocabulary."""
        self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200)

        # build vocab from training data
        self.vocab = Vocab()
        train_sents = [t.get_words() for t in self.train_data]
        self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))

    def inference(self, tree, predict_only_root=False):
        """For a given tree build the RNN models computation graph up to where it
            may be used for inference.
        Args:
            tree: a Tree object on which to build the computation graph for the RNN
        Returns:
            softmax_linear: Output tensor with the computed logits.
        """
        node_tensors = self.add_model(tree.root)
        if predict_only_root:
            node_tensors = node_tensors[tree.root]
        else:
            node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2]
            node_tensors = tf.concat(0, node_tensors)
        return self.add_projections(node_tensors)

    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            tf.get_variable("embedding",[len(self.vocab),self.config.embed_size])
            tf.get_variable("W1",[self.config.embed_size * 2, self.config.embed_size])
            tf.get_variable("b1",[1, self.config.embed_size])
            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            tf.get_variable("U",[self.config.embed_size, self.config.label_size])
            tf.get_variable("bs",[1,self.config.label_size])
            ### END YOUR CODE

    def add_model(self, node):
        """Recursively build the model to compute the phrase embeddings in the tree

        Hint: Refer to tree.py and vocab.py before you start. Refer to
              the model's vocab with self.vocab
        Hint: Reuse the "Composition" variable_scope here
        Hint: Store a node's vector representation in node.tensor so it can be
              used by it's parent
        Hint: If node is a leaf node, it's vector representation is just that of the
              word vector (see tf.gather()).
        Args:
            node: a Node object
        Returns:
            node_tensors: Dict: key = Node, value = tensor(1, embed_size)
        """
        with tf.variable_scope('Composition', reuse=True):
            ### YOUR CODE HERE
            embed = tf.get_variable("embedding",[len(self.vocab),self.config.embed_size])
            W1 = tf.get_variable("W1",[self.config.embed_size * 2, self.config.embed_size])
            b1 = tf.get_variable("b1",[1, self.config.embed_size])
            ### END YOUR CODE


        node_tensors = dict()
        curr_node_tensor = None
        if node.isLeaf:
            ### YOUR CODE HERE
            lookup = tf.gather(embed,self.vocab.encode(node.word))
            curr_node_tensor = tf.expand_dims(lookup,0)
            ### END YOUR CODE
        else:
            node_tensors.update(self.add_model(node.left))
            node_tensors.update(self.add_model(node.right))
            ### YOUR CODE HERE
            # print node_tensors[node.left].get_shape()
            combined = tf.concat(1,[node_tensors[node.left], node_tensors[node.right]])
            curr_node_tensor = tf.matmul(combined, W1) + b1
            ### END YOUR CODE
        node_tensors[node] = curr_node_tensor
        return node_tensors

    def add_projections(self, node_tensors):
        """Add projections to the composition vectors to compute the raw sentiment scores

        Hint: Reuse the "Projection" variable_scope here
        Args:
            node_tensors: tensor(?, embed_size)
        Returns:
#.........这里部分代码省略.........
开发者ID:kvfrans,项目名称:cs224-solutions,代码行数:103,代码来源:rnn.py

示例5: Model_RNN

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class Model_RNN(LanguageModel):
    
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../lyrics/data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps 
        X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class)
        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
    
        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())

        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       


    def add_placeholders(self):
        self.X1            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X1')
        self.X2            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X2')
        self.labels        = tf.placeholder(tf.float32,   shape=(None, self.config.n_class), name='labels')
        #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state')
        self.seq_len1      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len1') # for variable length sequences
        self.seq_len2      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len2') # for variable length sequences

    def add_embedding(self):
        L = tf.get_variable('L', shape=(len(self.word_counts.keys()), self.config.embed_size), dtype=tf.float32) 
        inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps 
        inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size
        inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs1 = [tf.squeeze(x) for x in inputs1]
        inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs2 = [tf.squeeze(x) for x in inputs2]
        print 'onh'
        print inputs1[0].get_shape
        return inputs1, inputs2

    def add_model(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        print 'adsf add_model'
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        print 'nthgnghn'
        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(self.config.hidden_size,),                        dtype=tf.float32)
            print Wxh.get_shape
            print inputs1[0].get_shape
            print inputs2[0].get_shape
            for i in range(self.config.max_steps):
                h_curr2 = tf.matmul(h_curr2,Whh) 
                h_curr2 += tf.matmul(inputs2[i],Wxh)
                h_curr2 += b1
                h_curr2 = tf.sigmoid(h_curr2)

                h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1)
                rnn_outputs1.append(h_curr1)
                rnn_outputs2.append(h_curr2)
        
        rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        return rnn_states

    def add_projection(self, rnn_states):
        # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think.
        Whc = tf.get_variable('Whc', shape=(2*self.config.hidden_size,self.config.n_class))
        bhc = tf.get_variable('bhc', shape=(self.config.n_class,))
        projections = tf.matmul(rnn_states[-1],Whc) + bhc # in case we stop short sequences, the rnn_state in further time_steps should be unch
        return projections

    def add_loss_op(self, y):
        loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels)
        loss = tf.reduce_sum(loss)
        return loss
      
    def add_training_op(self, loss):
        #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss)
        train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss)
        return train_op

    def __init__(self, config):
        self.config = config
        self.load_data()
        self.add_placeholders()

        print 'adsf __init__'
        print self.X1.get_shape
        self.inputs1, self.inputs2 = self.add_embedding()
        self.rnn_states            = self.add_model(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2)
        self.projections           = self.add_projection(self.rnn_states)
        self.loss                  = self.add_loss_op(self.projections)
#.........这里部分代码省略.........
开发者ID:anushabala,项目名称:deep-playlist,代码行数:103,代码来源:model_rnn.py

示例6: RNN_Model

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]

#.........这里部分代码省略.........
        if self.training_op is None:
        # YOUR CODE HERE
            optimizer = tf.train.AdamOptimizer(self.config.lr)#tf.train.GradientDescentOptimizer(self.config.lr)
            #optimizer = tf.train.AdamOptimizer(self.config.lr)
            self.training_op = optimizer.minimize(loss)
        # END YOUR CODE
        return self.training_op

    def predictions(self, y):
        """Returns predictions from sparse scores

        Args:
            y: tensor(?, label_size)
        Returns:
            predictions: tensor(?,1)
        """
        if self.prediction is None:
        # YOUR CODE HERE
            self.prediction = tf.argmax(y, dimension=1)
        # END YOUR CODE
        return self.prediction

    def build_feed_dict(self, in_node):
        nodes_list = []
        tr.leftTraverse(in_node, lambda node, args: args.append(node), nodes_list)
        node_to_index = OrderedDict()
        for idx, i in enumerate(nodes_list):
            node_to_index[i] = idx

        feed_dict = {
          self.is_a_leaf   : [ n.isLeaf for n in nodes_list ],
          self.left_child  : [ node_to_index[n.left] if not n.isLeaf else -1 for n in nodes_list ],
          self.right_child : [ node_to_index[n.right] if not n.isLeaf else -1 for n in nodes_list ],
          self.word_index  : [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ],
          self.labelholder : [ n.label for n in nodes_list ]
        }
        return feed_dict

    def predict(self, trees, weights_path, get_loss = False):
        """Make predictions from the provided model."""


        results = []
        losses = []

        logits = self.root_logits_op()
        #evaluation is based upon the root node
        root_loss = self.loss_op(logits=logits, labels=self.labelholder[-1:])
        root_prediction_op = self.root_prediction_op()
        with tf.Session() as sess:
            saver = tf.train.Saver()
            saver.restore(sess, weights_path)
            for t in  trees:
                feed_dict = self.build_feed_dict(t.root)
                if get_loss:
                    root_prediction, loss = sess.run([root_prediction_op, root_loss], feed_dict=feed_dict)
                    losses.append(loss)
                    results.append(root_prediction)
                else:
                    root_prediction = sess.run(root_prediction_op, feed_dict=feed_dict)
                    results.append(root_prediction)
        return results, losses

    #need to rework this: (OP creation needs to be made independent of using OPs)
    def run_epoch(self, new_model = False, verbose=True, epoch=0):
        loss_history = []
开发者ID:kingtaurus,项目名称:cs224d,代码行数:70,代码来源:rnn_tensorarray.py

示例7: Model_RNN

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class Model_RNN(LanguageModel):
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps
        if self.config.debug:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100)
        else:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000)

        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
        
        x = collections.Counter(l_train)
        for k in x.keys():
            print 'class:', k, x[k]
        print ''

        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())
        self.wv = self.vocab.get_wv('../glove.6B.50d.txt')

        with open('word_hist.csv', 'w') as f:
            for w in self.word_counts.keys():
                f.write(w+','+str(self.word_counts[w])+'\n')
            
        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
        self.sequence_len1 = np.array(seq_len1)
        self.sequence_len2 = np.array(seq_len2)

    def add_placeholders(self):
        self.X1            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X1')
        self.X2            = tf.placeholder(tf.int32,   shape=(None, self.config.max_steps), name='X2')
        self.labels        = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels')
        #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state')
        self.seq_len1      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len1') # for variable length sequences
        self.seq_len2      = tf.placeholder(tf.int32,   shape=(None),                        name='seq_len2') # for variable length sequences

    def add_embedding(self):
        #L = tf.get_variable('L', shape=(len(self.vocab), self.config.embed_size), dtype=tf.float32) 
        L = tf.Variable(tf.convert_to_tensor(self.wv, dtype=tf.float32), name='L')
        #L = tf.constant(tf.convert_to_tensor(self.wvi), dtype=tf.float32, name='L')
        inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps 
        inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size
        inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs1 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs1]
        inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size
        inputs2 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs2]
        return inputs1, inputs2

    def add_model_rnn(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        rnn_outputs  = []
        rnn_outputs1 = []
        rnn_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state

        with tf.variable_scope('rnn'):
            Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32)
            Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size),  dtype=tf.float32)
            b1  = tf.get_variable('bhx', shape=(4*self.config.hidden_size,),                        dtype=tf.float32)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len1[0]:
                        breaka
                tmp = tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1
                
                rnn_outputs1.append(h_curr1)

            for i in range(self.config.max_steps):
                if self.config.batch_size==1:
                    if i==seq_len2[0]:
                        breaka
                h_curr2 = tf.sigmoid(tf.matmul(h_curr2,Whh) + tf.matmul(inputs2[i],Wxh) + b1)
                rnn_outputs2.append(h_curr2)

        #lstm_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)]
        rnn_final_states = tf.concat(1, [rnn_outputs1[-1], rnn_outputs2[-1]])
        return rnn_final_states

    def add_model_lstm(self, inputs1, inputs2, seq_len1, seq_len2):
        #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32)
        self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32)
        lstm_outputs1 = []
        lstm_outputs2 = []
        h_curr1 = self.initial_state
        h_curr2 = self.initial_state
        cell1   = self.initial_state
        cell2   = self.initial_state

        with tf.variable_scope('lstm'):
#.........这里部分代码省略.........
开发者ID:kalpitdixit,项目名称:deep-playlist,代码行数:103,代码来源:model_rnn.py

示例8: RNNLM_Model

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    #self.encoded_test = np.array(
        #[self.vocab.encode(word) for word in get_ptb_dataset('test')],
        #dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]#读入训练数据
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    
    self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps))
    self.labels_placeholder = tf.placeholder(tf.float32, (None, self.config.num_steps))
    self.dropout_placeholder = tf.placeholder(tf.float32)

  
  def add_embedding(self):#将one-hot转化为词向量
    
    inputs = []
    with tf.device('/cpu:0'):
      L = tf.get_variable("Embedding", (len(self.vocab), self.config.embed_size))
      tensors = tf.nn.embedding_lookup(L, self.input_placeholder)
      split_tensors = tf.split(1, self.config.num_steps, tensors)
    
      for tensor in split_tensors:

        inputs.append(tf.squeeze(tensor, [1]))
      return inputs#返回的是一个list

  def add_projection(self, rnn_outputs):#把隐藏层转化为词语
    
    with tf.variable_scope("projection"):
      U=tf.get_variable("U",shape=(self.config.hidden_size,len(self.vocab)))
      b_2=tf.get_variable("b_2",shape=(len(self.vocab),))
    outputs=[tf.matmul(x,U)+b_2 for x in rnn_outputs]###softmax?
    

    return outputs

  def add_loss_op(self, output):#计算损失函数
    
    loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])])

    return loss

  def add_training_op(self, loss):#对损失函数进行优化
    
    optimizer=tf.train.AdamOptimizer(self.config.lr)
    train_op=optimizer.minimize(loss)
    return train_op
  
  def __init__(self, config):
    self.config = config
    self.load_data(debug=False)
    self.add_placeholders()
    self.inputs = self.add_embedding()
    self.rnn_outputs = self.add_model(self.inputs)
    self.outputs = self.add_projection(self.rnn_outputs)
    self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
    output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
    self.calculate_loss = self.add_loss_op(output)
    self.train_step = self.add_training_op(self.calculate_loss)


  def add_model(self, inputs):
    
    hidden_size=self.config.hidden_size
    embed_size=self.config.embed_size
    batch_size=self.config.batch_size
    with tf.variable_scope("RNN"):
      H=tf.get_variable("H",shape=(hidden_size,hidden_size))
      I=tf.get_variable("I",shape=(embed_size,hidden_size))
      b_1=tf.get_variable("b_1",shape=(hidden_size,))
    self.initial_state=tf.zeros([batch_size,hidden_size])
    pre_h=self.initial_state
    rnn_outputs=[]
    for step in inputs:
      step=tf.nn.dropout(step,self.dropout_placeholder)
      pre_h=tf.sigmoid(tf.matmul(pre_h,H)+tf.matmul(step,I)+b_1)
      rnn_outputs.append(tf.nn.dropout(pre_h,self.dropout_placeholder))
    self.final_state=pre_h
    return rnn_outputs


  def run_epoch(self, session, data, train_op=None, verbose=10):
    config = self.config
    dp = config.dropout
    if not train_op:
#.........这里部分代码省略.........
开发者ID:zbxzc35,项目名称:RNN-2,代码行数:103,代码来源:RNN.py

示例9: RNNLM_Model

# 需要导入模块: from utils import Vocab [as 别名]
# 或者: from utils.Vocab import encode [as 别名]
class RNNLM_Model(LanguageModel):

  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    self.encoded_test = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]

  def add_placeholders(self):
    """Generate placeholder variables to represent the input tensors

    These placeholders are used as inputs by the rest of the model building
    code and will be fed data during training.  Note that when "None" is in a
    placeholder's shape, it's flexible

    Adds following nodes to the computational graph.
    (When None is in a placeholder's shape, it's flexible)

    input_placeholder: Input placeholder tensor of shape
                       (None, num_steps), type tf.int32
    labels_placeholder: Labels placeholder tensor of shape
                        (None, num_steps), type tf.float32
    dropout_placeholder: Dropout value placeholder (scalar),
                         type tf.float32

    Add these placeholders to self as the instance variables
  
      self.input_placeholder
      self.labels_placeholder
      self.dropout_placeholder

    (Don't change the variable names)
    """
    ### YOUR CODE HERE
    self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps))
    self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps))
    self.dropout_placeholder = tf.placeholder(tf.float32, shape=None)
    ### END YOUR CODE
  
  def add_embedding(self):
    """Add embedding layer.

    Hint: This layer should use the input_placeholder to index into the
          embedding.
    Hint: You might find tf.nn.embedding_lookup useful.
    Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs
    Hint: Check the last slide from the TensorFlow lecture.
    Hint: Here are the dimensions of the variables you will need to create:

      L: (len(self.vocab), embed_size)

    Returns:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    """
    # The embedding lookup is currently only implemented for the CPU
    with tf.device('/cpu:0'):
      ### YOUR CODE HERE
      L = tf.Variable(tf.random_uniform([len(self.vocab), self.config.embed_size], -1.0, 1.0), name="L")
      # Shape of input_placeholder : (batch_size, num_steps)
      # Shape of embed : (num_steps, batch_size, embed_size)
      embed = tf.nn.embedding_lookup(L, tf.transpose(self.input_placeholder, perm=[1,0]))
      inputs = [tf.squeeze(ts, [0]) for ts in tf.split(0, self.config.num_steps, embed)]
      ### END YOUR CODE
      return inputs

  def add_projection(self, rnn_outputs):
    """Adds a projection layer.

    The projection layer transforms the hidden representation to a distribution
    over the vocabulary.

    Hint: Here are the dimensions of the variables you will need to
          create 
          
          U:   (hidden_size, len(vocab))
          b_2: (len(vocab),)

    Args:
      rnn_outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, hidden_size(LIBIN edited)).
    Returns:
      outputs: List of length num_steps, each a tensor of shape
               (batch_size, len(vocab))
    """
    ### YOUR CODE HERE
    with tf.variable_scope("projection", initializer = xavier_weight_init(), reuse=None):
#.........这里部分代码省略.........
开发者ID:lbbc1117,项目名称:CS224d-2016,代码行数:103,代码来源:q3_RNNLM.py


注:本文中的utils.Vocab.encode方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。