本文整理汇总了Python中tensorflow.string_split方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.string_split方法的具体用法?Python tensorflow.string_split怎么用?Python tensorflow.string_split使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.string_split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_data
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def load_data(input_file, input_vocab):
"""Returns an iterator over the input file.
Args:
input_file: The input text file.
input_vocab: The input vocabulary.
Returns:
A dataset batch iterator.
"""
dataset = tf.data.TextLineDataset(input_file)
dataset = dataset.map(lambda x: tf.string_split([x]).values)
dataset = dataset.map(input_vocab.lookup)
dataset = dataset.map(lambda x: {
"ids": x,
"length": tf.shape(x)[0]})
dataset = dataset.padded_batch(64, {
"ids": [None],
"length": []})
return dataset.make_initializable_iterator()
示例2: multiple_content_lookup
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def multiple_content_lookup(content, vocab_table, ids, name=None):
"""
:param content:
:param vocab_table:
:param ids:
:param name:
:return: 2-D [batch_size, max_length_in_batch] content id matrix,
1-D [batch_size] content len vector
"""
with tf.name_scope(name, 'multiple_content_lookup', [content, vocab_table, ids]):
content_list = tf.nn.embedding_lookup(content, ids)
extracted_sparse_content = tf.string_split(content_list, delimiter=' ')
sparse_content = tf.SparseTensor(indices=extracted_sparse_content.indices,
values=vocab_table.lookup(extracted_sparse_content.values),
dense_shape=extracted_sparse_content.dense_shape)
extracted_content_ids = tf.sparse_tensor_to_dense(sparse_content,
default_value=0, name='dense_content')
extracted_content_len = tf.reduce_sum(tf.cast(tf.not_equal(extracted_content_ids, 0), tf.int32), axis=-1)
return extracted_content_ids, extracted_content_len
示例3: entity_content_embedding_lookup
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def entity_content_embedding_lookup(entities, content, content_len, vocab_table, word_embedding, str_pad, name=None):
""" Lookup entity word embeddings given a flatten 1-D entity id list and content lookup table
:param entities: Must be a 1-D entity vector
:param content:
:param content_len:
:param vocab_table:
:param word_embedding:
:param str_pad:
:param name:
:return:
"""
with tf.device('/cpu:0'):
with tf.name_scope(name, 'entity_content_lookup',
[entities, content, content_len, vocab_table, word_embedding]):
ent_content = tf.string_split(tf.nn.embedding_lookup(content, entities, name='ent_content'), delimiter=' ')
content_len = tf.nn.embedding_lookup(content_len, entities, name='ent_content_len')
ent_content_dense = tf.sparse_tensor_to_dense(ent_content,
default_value=str_pad,
name='ent_content_dense')
ent_embedding = tf.nn.embedding_lookup(word_embedding,
vocab_table.lookup(ent_content_dense,
name='ent_content_ids'))
return ent_embedding, content_len
示例4: get_true_tails
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def get_true_tails(ent_rel_str, targets_lookup_table, targets, name=None):
"""
Given ent \t rel pair return a list of string targets
:param ent_rel_str:
:param targets_lookup_table:
:param name:
:return:
"""
with tf.name_scope(name, 'get_true_tails', [ent_rel_str, targets_lookup_table, targets]):
target_entities_lookup_id = targets_lookup_table.lookup(ent_rel_str)
# CHECK IF WE HAVE -1 HERE, if so the error will be have a -2 that is out of the range
target_entities_lookup_id = tf.where(tf.equal(target_entities_lookup_id, -1),
target_entities_lookup_id - 1,
target_entities_lookup_id)
# sparseTensor
str_targets = tf.string_split(tf.nn.embedding_lookup(targets, target_entities_lookup_id), delimiter=' ')
return str_targets.values
示例5: read_word_freq
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def read_word_freq(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.WholeFileReader()
key, value = reader.read(filename_queue)
lines = tf.string_split([value], "\n")
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
sess.run([lines])
lines_eval = lines.eval()
result = []
for line in lines_eval.values:
s = line.split()
result.append((s[0], int(s[1])))
coord.request_stop()
coord.join(threads)
return result
示例6: testStringSplitWithDelimiter
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def testStringSplitWithDelimiter(self):
strings = ["hello|world", "hello world"]
with self.test_session() as sess:
self.assertRaises(
ValueError, tf.string_split, strings, delimiter="delimiter")
self.assertRaises(
ValueError, tf.string_split, strings, delimiter=["|", ""])
self.assertRaises(ValueError, tf.string_split, strings, delimiter=["a"])
tokens = tf.string_split(strings, delimiter="|")
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
self.assertAllEqual(shape, [2, 2])
示例7: testStringSplitWithDelimiterTensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def testStringSplitWithDelimiterTensor(self):
strings = ["hello|world", "hello world"]
with self.test_session() as sess:
delimiter = tf.placeholder(tf.string)
tokens = tf.string_split(strings, delimiter=delimiter)
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(tokens, feed_dict={delimiter: ["a", "b"]})
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(tokens, feed_dict={delimiter: ["a"]})
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(tokens, feed_dict={delimiter: "abc"})
indices, values, shape = sess.run(tokens, feed_dict={delimiter: "|"})
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
self.assertAllEqual(shape, [2, 2])
示例8: get_label
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def get_label(self, text, null_character=u'\u2591'):
""" Returns the ids of the corresponding text,
Args:
text: a tensor with shape [batch_size, lexicon_size]
and type string
null_character: a unicode character used to replace '<null>'
character. the default value is a light shade block '░'.
"""
batch_size = text.shape[0].value
lexicon_size = text.shape[1].value
text = tf.reshape(text, [-1])
sp_text = tf.string_split(text, delimiter='')
sp_text = tf.sparse_reset_shape(sp_text, [batch_size*lexicon_size,
self.max_sequence_length])
sp_text = tf.sparse_tensor_to_dense(sp_text, default_value=null_character)
ids = self.invert_table.lookup(sp_text)
ids = tf.reshape(ids, [batch_size, lexicon_size, self.max_sequence_length])
return tf.to_int32(ids)
示例9: create_trg_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def create_trg_dataset(input_dataset,
input_data_type,
word_vocab_index,
word_max_length,
word_pad,
word_sos,
word_eos,
word_placeholder_enable,
num_parallel):
"""create dataset for input target data"""
dataset = input_dataset
if input_data_type == "span":
dataset = dataset.map(lambda span: tf.string_split([span], delimiter='|').values, num_parallel_calls=num_parallel)
dataset = dataset.map(lambda span: tf.string_to_number(span, out_type=tf.int32), num_parallel_calls=num_parallel)
dataset = dataset.map(lambda span: tf.expand_dims(span, axis=-1), num_parallel_calls=num_parallel)
elif input_data_type == "text":
dataset = dataset.map(lambda sent: generate_word_feat(sent,
word_vocab_index, word_max_length, word_pad, word_sos, word_eos,
word_placeholder_enable), num_parallel_calls=num_parallel)
return dataset
示例10: generate_word_feat
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def generate_word_feat(sentence,
word_vocab_index,
word_max_length,
word_pad,
word_sos,
word_eos,
word_placeholder_enable):
"""generate word feature for sentence"""
words = tf.string_split([sentence], delimiter=' ').values
if word_placeholder_enable == True:
words = tf.concat([[word_sos], words[:word_max_length], [word_eos],
tf.constant(word_pad, shape=[word_max_length])], axis=0)
word_max_length = word_max_length + 2
else:
words = tf.concat([words[:word_max_length],
tf.constant(word_pad, shape=[word_max_length])], axis=0)
words = tf.reshape(words[:word_max_length], shape=[word_max_length])
words = tf.cast(word_vocab_index.lookup(words), dtype=tf.int32)
words = tf.expand_dims(words, axis=-1)
return words
示例11: decode
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def decode(self, data, items):
decoded_items = {}
# Split tokens
tokens = tf.string_split([data], delimiter=self.delimiter).values
# Optionally prepend a special token
if self.prepend_token is not None:
tokens = tf.concat([[self.prepend_token], tokens], 0)
# Optionally append a special token
if self.append_token is not None:
tokens = tf.concat([tokens, [self.append_token]], 0)
decoded_items[self.length_feature_name] = tf.size(tokens)
decoded_items[self.tokens_feature_name] = tokens
return [decoded_items[_] for _ in items]
开发者ID:akanimax,项目名称:natural-language-summary-generation-from-structured-data,代码行数:19,代码来源:split_tokens_decoder.py
示例12: parse_raw_text
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def parse_raw_text(sentence):
"""Splits text tensor by word to sparse sequence of tokens.
Args:
sentence: `tf.string`, with text record to split.
Returns:
Dictionary mapping feature name to tensors with the following entries
`constants.TOKENS` mapping to a `SparseTensor` and
`constants.SEQUENCE_LENGTH` mapping to a one-dimensional integer `Tensor`.
"""
tokens = tf.regex_replace(sentence, _CHAR_TO_FILTER_OUT, ' ',
replace_global=True)
sparse_sequence = tf.string_split(tokens)
features = {
constants.TOKENS: sparse_sequence,
constants.SEQUENCE_LENGTH: get_sparse_tensor_size(sparse_sequence)
}
return features
示例13: generator
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def generator(ln):
splits = tf.string_split([ln], delimiter=',')
label = splits.values[0]
# 解析 dense 部分
features = {}
for i in range(1, 14):
features['I'+str(i)] = tf.string_to_number(splits.values[i], tf.int64)
return features, label
示例14: __init__
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def __init__(self, config, batch_size, one_hot=False):
self.lookup = None
reader = tf.TextLineReader()
filename_queue = tf.train.string_input_producer(["chargan.txt"])
key, x = reader.read(filename_queue)
vocabulary = self.get_vocabulary()
table = tf.contrib.lookup.string_to_index_table_from_tensor(
mapping = vocabulary, default_value = 0)
x = tf.string_join([x, tf.constant(" " * 64)])
x = tf.substr(x, [0], [64])
x = tf.string_split(x,delimiter='')
x = tf.sparse_tensor_to_dense(x, default_value=' ')
x = tf.reshape(x, [64])
x = table.lookup(x)
self.one_hot = one_hot
if one_hot:
x = tf.one_hot(x, len(vocabulary))
x = tf.cast(x, dtype=tf.float32)
x = tf.reshape(x, [1, int(x.get_shape()[0]), int(x.get_shape()[1]), 1])
else:
x = tf.cast(x, dtype=tf.float32)
x -= len(vocabulary)/2.0
x /= len(vocabulary)/2.0
x = tf.reshape(x, [1,1, 64, 1])
num_preprocess_threads = 8
x = tf.train.shuffle_batch(
[x],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity= 5000,
min_after_dequeue=500,
enqueue_many=True)
self.x = x
self.table = table
示例15: make_preprocessing_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import string_split [as 别名]
def make_preprocessing_fn(frequency_threshold):
"""Creates a preprocessing function for reddit.
Args:
frequency_threshold: The frequency_threshold used when generating
vocabularies for categorical and text features.
Returns:
A preprocessing function.
"""
def preprocessing_fn(inputs):
"""User defined preprocessing function for reddit columns.
Args:
inputs: dictionary of input `tensorflow_transform.Column`.
Returns:
A dictionary of `tensorflow_transform.Column` representing the transformed
columns.
"""
# TODO(b/35001605) Make this "passthrough" more DRY.
result = {'score': inputs['score'], 'toplevel': inputs['toplevel']}
result['subreddit_id'] = tft.string_to_int(
inputs['subreddit'], frequency_threshold=frequency_threshold)
for name in ('author', 'comment_body', 'comment_parent_body'):
words = tf.string_split(inputs[name])
# TODO(b/33467613) Translate these to bag-of-words style sparse features.
result[name + '_bow'] = tft.string_to_int(
words, frequency_threshold=frequency_threshold)
return result
return preprocessing_fn