当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.string_split方法代码示例

本文整理汇总了Python中tensorflow.string_split方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.string_split方法的具体用法?Python tensorflow.string_split怎么用?Python tensorflow.string_split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensorflow的用法示例。


在下文中一共展示了tensorflow.string_split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_data

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def load_data(input_file, input_vocab):
  """Returns an iterator over the input file.

  Args:
    input_file: The input text file.
    input_vocab: The input vocabulary.

  Returns:
    A dataset batch iterator.
  """
  dataset = tf.data.TextLineDataset(input_file)
  dataset = dataset.map(lambda x: tf.string_split([x]).values)
  dataset = dataset.map(input_vocab.lookup)
  dataset = dataset.map(lambda x: {
      "ids": x,
      "length": tf.shape(x)[0]})
  dataset = dataset.padded_batch(64, {
      "ids": [None],
      "length": []})
  return dataset.make_initializable_iterator() 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:22,代码来源:inference.py

示例2: multiple_content_lookup

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def multiple_content_lookup(content, vocab_table, ids, name=None):
    """

    :param content:
    :param vocab_table:
    :param ids:
    :param name:
    :return: 2-D [batch_size, max_length_in_batch] content id matrix,
             1-D [batch_size] content len vector
    """
    with tf.name_scope(name, 'multiple_content_lookup', [content, vocab_table, ids]):
        content_list = tf.nn.embedding_lookup(content, ids)

        extracted_sparse_content = tf.string_split(content_list, delimiter=' ')

        sparse_content = tf.SparseTensor(indices=extracted_sparse_content.indices,
                                         values=vocab_table.lookup(extracted_sparse_content.values),
                                         dense_shape=extracted_sparse_content.dense_shape)

        extracted_content_ids = tf.sparse_tensor_to_dense(sparse_content,
                                                          default_value=0, name='dense_content')
        extracted_content_len = tf.reduce_sum(tf.cast(tf.not_equal(extracted_content_ids, 0), tf.int32), axis=-1)

        return extracted_content_ids, extracted_content_len 
开发者ID:bxshi,项目名称:ConMask,代码行数:26,代码来源:content.py

示例3: entity_content_embedding_lookup

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def entity_content_embedding_lookup(entities, content, content_len, vocab_table, word_embedding, str_pad, name=None):
    """ Lookup entity word embeddings given a flatten 1-D entity id list and content lookup table

    :param entities: Must be a 1-D entity vector
    :param content:
    :param content_len:
    :param vocab_table:
    :param word_embedding:
    :param str_pad:
    :param name:
    :return:
    """
    with tf.device('/cpu:0'):
        with tf.name_scope(name, 'entity_content_lookup',
                           [entities, content, content_len, vocab_table, word_embedding]):
            ent_content = tf.string_split(tf.nn.embedding_lookup(content, entities, name='ent_content'), delimiter=' ')
            content_len = tf.nn.embedding_lookup(content_len, entities, name='ent_content_len')
            ent_content_dense = tf.sparse_tensor_to_dense(ent_content,
                                                          default_value=str_pad,
                                                          name='ent_content_dense')
            ent_embedding = tf.nn.embedding_lookup(word_embedding,
                                                   vocab_table.lookup(ent_content_dense,
                                                                      name='ent_content_ids'))

            return ent_embedding, content_len 
开发者ID:bxshi,项目名称:ConMask,代码行数:27,代码来源:content.py

示例4: get_true_tails

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def get_true_tails(ent_rel_str, targets_lookup_table, targets, name=None):
    """
    Given ent \t rel pair return a list of string targets
    :param ent_rel_str:
    :param targets_lookup_table:
    :param name:
    :return:
    """
    with tf.name_scope(name, 'get_true_tails', [ent_rel_str, targets_lookup_table, targets]):
        target_entities_lookup_id = targets_lookup_table.lookup(ent_rel_str)
        # CHECK IF WE HAVE -1 HERE, if so the error will be have a -2 that is out of the range
        target_entities_lookup_id = tf.where(tf.equal(target_entities_lookup_id, -1),
                                             target_entities_lookup_id - 1,
                                             target_entities_lookup_id)
        # sparseTensor
        str_targets = tf.string_split(tf.nn.embedding_lookup(targets, target_entities_lookup_id), delimiter=' ')
        return str_targets.values 
开发者ID:bxshi,项目名称:ConMask,代码行数:19,代码来源:corruption.py

示例5: read_word_freq

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def read_word_freq(filename):
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    lines = tf.string_split([value], "\n")

    with tf.Session() as sess:
        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        sess.run([lines])
        lines_eval = lines.eval()
        result = []
        for line in lines_eval.values:
            s = line.split()
            result.append((s[0], int(s[1])))
        coord.request_stop()
        coord.join(threads)
    return result 
开发者ID:koala-ai,项目名称:tensorflow_nlp,代码行数:21,代码来源:word2vec.py

示例6: testStringSplitWithDelimiter

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def testStringSplitWithDelimiter(self):
    strings = ["hello|world", "hello world"]

    with self.test_session() as sess:
      self.assertRaises(
          ValueError, tf.string_split, strings, delimiter="delimiter")

      self.assertRaises(
          ValueError, tf.string_split, strings, delimiter=["|", ""])

      self.assertRaises(ValueError, tf.string_split, strings, delimiter=["a"])

      tokens = tf.string_split(strings, delimiter="|")
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
      self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
      self.assertAllEqual(shape, [2, 2]) 
开发者ID:tobegit3hub,项目名称:deep_image_model,代码行数:19,代码来源:string_split_op_test.py

示例7: testStringSplitWithDelimiterTensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def testStringSplitWithDelimiterTensor(self):
    strings = ["hello|world", "hello world"]

    with self.test_session() as sess:
      delimiter = tf.placeholder(tf.string)

      tokens = tf.string_split(strings, delimiter=delimiter)

      with self.assertRaises(tf.errors.InvalidArgumentError):
        sess.run(tokens, feed_dict={delimiter: ["a", "b"]})
      with self.assertRaises(tf.errors.InvalidArgumentError):
        sess.run(tokens, feed_dict={delimiter: ["a"]})
      with self.assertRaises(tf.errors.InvalidArgumentError):
        sess.run(tokens, feed_dict={delimiter: "abc"})
      indices, values, shape = sess.run(tokens, feed_dict={delimiter: "|"})

      self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
      self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
      self.assertAllEqual(shape, [2, 2]) 
开发者ID:tobegit3hub,项目名称:deep_image_model,代码行数:21,代码来源:string_split_op_test.py

示例8: get_label

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def get_label(self, text, null_character=u'\u2591'):
    """ Returns the ids of the corresponding text,

        Args:
          text: a tensor with shape [batch_size, lexicon_size]
                         and type string
          null_character: a unicode character used to replace '<null>'
          character. the default value is a light shade block '░'.
    """
    batch_size = text.shape[0].value
    lexicon_size = text.shape[1].value
    text = tf.reshape(text, [-1])
    sp_text = tf.string_split(text, delimiter='')
    sp_text = tf.sparse_reset_shape(sp_text, [batch_size*lexicon_size,
                                              self.max_sequence_length])
    sp_text = tf.sparse_tensor_to_dense(sp_text, default_value=null_character)
    ids = self.invert_table.lookup(sp_text)
    ids = tf.reshape(ids, [batch_size, lexicon_size, self.max_sequence_length])
    return tf.to_int32(ids) 
开发者ID:FangShancheng,项目名称:conv-ensemble-str,代码行数:21,代码来源:utils.py

示例9: create_trg_dataset

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def create_trg_dataset(input_dataset,
                       input_data_type,
                       word_vocab_index,
                       word_max_length,
                       word_pad,
                       word_sos,
                       word_eos,
                       word_placeholder_enable,
                       num_parallel):
    """create dataset for input target data"""
    dataset = input_dataset
    
    if input_data_type == "span":
        dataset = dataset.map(lambda span: tf.string_split([span], delimiter='|').values, num_parallel_calls=num_parallel)
        dataset = dataset.map(lambda span: tf.string_to_number(span, out_type=tf.int32), num_parallel_calls=num_parallel)
        dataset = dataset.map(lambda span: tf.expand_dims(span, axis=-1), num_parallel_calls=num_parallel)
    elif input_data_type == "text":
        dataset = dataset.map(lambda sent: generate_word_feat(sent,
            word_vocab_index, word_max_length, word_pad, word_sos, word_eos,
            word_placeholder_enable), num_parallel_calls=num_parallel)
    
    return dataset 
开发者ID:stevezheng23,项目名称:reading_comprehension_tf,代码行数:24,代码来源:data_util.py

示例10: generate_word_feat

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def generate_word_feat(sentence,
                       word_vocab_index,
                       word_max_length,
                       word_pad,
                       word_sos,
                       word_eos,
                       word_placeholder_enable):
    """generate word feature for sentence"""
    words = tf.string_split([sentence], delimiter=' ').values
    if word_placeholder_enable == True:
        words = tf.concat([[word_sos], words[:word_max_length], [word_eos],
            tf.constant(word_pad, shape=[word_max_length])], axis=0)
        word_max_length = word_max_length + 2
    else:
        words = tf.concat([words[:word_max_length],
            tf.constant(word_pad, shape=[word_max_length])], axis=0)
    words = tf.reshape(words[:word_max_length], shape=[word_max_length])
    words = tf.cast(word_vocab_index.lookup(words), dtype=tf.int32)
    words = tf.expand_dims(words, axis=-1)
    
    return words 
开发者ID:stevezheng23,项目名称:reading_comprehension_tf,代码行数:23,代码来源:data_util.py

示例11: decode

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def decode(self, data, items):
    decoded_items = {}

    # Split tokens
    tokens = tf.string_split([data], delimiter=self.delimiter).values

    # Optionally prepend a special token
    if self.prepend_token is not None:
      tokens = tf.concat([[self.prepend_token], tokens], 0)

    # Optionally append a special token
    if self.append_token is not None:
      tokens = tf.concat([tokens, [self.append_token]], 0)

    decoded_items[self.length_feature_name] = tf.size(tokens)
    decoded_items[self.tokens_feature_name] = tokens
    return [decoded_items[_] for _ in items] 
开发者ID:akanimax,项目名称:natural-language-summary-generation-from-structured-data,代码行数:19,代码来源:split_tokens_decoder.py

示例12: parse_raw_text

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def parse_raw_text(sentence):
  """Splits text tensor by word to sparse sequence of tokens.

  Args:
    sentence: `tf.string`, with text record to split.

  Returns:
    Dictionary mapping feature name to tensors with the following entries
    `constants.TOKENS` mapping to a `SparseTensor` and
    `constants.SEQUENCE_LENGTH` mapping to a one-dimensional integer `Tensor`.

  """

  tokens = tf.regex_replace(sentence, _CHAR_TO_FILTER_OUT, ' ',
                            replace_global=True)
  sparse_sequence = tf.string_split(tokens)
  features = {
      constants.TOKENS: sparse_sequence,
      constants.SEQUENCE_LENGTH: get_sparse_tensor_size(sparse_sequence)
  }
  return features 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:23,代码来源:input_fn.py

示例13: generator

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def generator(ln):
    splits = tf.string_split([ln], delimiter=',')
    label = splits.values[0]
    # 解析 dense 部分
    features = {}
    for i in range(1, 14):
        features['I'+str(i)] = tf.string_to_number(splits.values[i], tf.int64)

    return features, label 
开发者ID:wdxtub,项目名称:deep-learning-note,代码行数:11,代码来源:2_adanet_avazu.py

示例14: __init__

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def __init__(self, config, batch_size, one_hot=False):
        self.lookup = None
        reader = tf.TextLineReader()
        filename_queue = tf.train.string_input_producer(["chargan.txt"])
        key, x = reader.read(filename_queue)
        vocabulary = self.get_vocabulary()

        table = tf.contrib.lookup.string_to_index_table_from_tensor(
            mapping = vocabulary, default_value = 0)

        x = tf.string_join([x, tf.constant(" " * 64)]) 
        x = tf.substr(x, [0], [64])
        x = tf.string_split(x,delimiter='')
        x = tf.sparse_tensor_to_dense(x, default_value=' ')
        x = tf.reshape(x, [64])
        x = table.lookup(x)
        self.one_hot = one_hot
        if one_hot:
            x = tf.one_hot(x, len(vocabulary))
            x = tf.cast(x, dtype=tf.float32)
            x = tf.reshape(x, [1, int(x.get_shape()[0]), int(x.get_shape()[1]), 1])
        else:
            x = tf.cast(x, dtype=tf.float32)
            x -= len(vocabulary)/2.0
            x /= len(vocabulary)/2.0
            x = tf.reshape(x, [1,1, 64, 1])

        num_preprocess_threads = 8

        x = tf.train.shuffle_batch(
          [x],
          batch_size=batch_size,
          num_threads=num_preprocess_threads,
          capacity= 5000,
          min_after_dequeue=500,
          enqueue_many=True)

        self.x = x
        self.table = table 
开发者ID:HyperGAN,项目名称:HyperGAN,代码行数:41,代码来源:common.py

示例15: make_preprocessing_fn

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def make_preprocessing_fn(frequency_threshold):
  """Creates a preprocessing function for reddit.

  Args:
    frequency_threshold: The frequency_threshold used when generating
      vocabularies for categorical and text features.

  Returns:
    A preprocessing function.
  """

  def preprocessing_fn(inputs):
    """User defined preprocessing function for reddit columns.

    Args:
      inputs: dictionary of input `tensorflow_transform.Column`.
    Returns:
      A dictionary of `tensorflow_transform.Column` representing the transformed
          columns.
    """
    # TODO(b/35001605) Make this "passthrough" more DRY.
    result = {'score': inputs['score'], 'toplevel': inputs['toplevel']}

    result['subreddit_id'] = tft.string_to_int(
        inputs['subreddit'], frequency_threshold=frequency_threshold)

    for name in ('author', 'comment_body', 'comment_parent_body'):
      words = tf.string_split(inputs[name])
      # TODO(b/33467613) Translate these to bag-of-words style sparse features.
      result[name + '_bow'] = tft.string_to_int(
          words, frequency_threshold=frequency_threshold)

    return result

  return preprocessing_fn 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:37,代码来源:reddit.py


注:本文中的tensorflow.string_split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。