本文整理汇总了Python中tensorflow.python.ops.string_ops.string_split函数的典型用法代码示例。如果您正苦于以下问题:Python string_split函数的具体用法?Python string_split怎么用?Python string_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了string_split函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testCaptureHashTable
def testCaptureHashTable(self):
# NOTE(mrry): We must use the V2 variants of `HashTable`
# etc. because these produce a `tf.resource`-typed output that is
# compatible with the in-graph function implementation.
default_val = -1
keys = constant_op.constant(["brain", "salad", "surgery"])
values = constant_op.constant([0, 1, 2], dtypes.int64)
table = lookup_ops.HashTable(
lookup_ops.KeyValueTensorInitializer(keys, values), default_val)
input_sentences = dataset_ops.Dataset.from_tensor_slices(
["brain brain tank salad surgery", "surgery brain"])
iterator = (input_sentences
.map(lambda x: string_ops.string_split([x]).values)
.map(table.lookup)
.make_initializable_iterator())
init_op = iterator.initializer
get_next = iterator.get_next()
with self.cached_session() as sess:
sess.run(table.initializer)
sess.run(init_op)
sess.run(get_next)
sess.run(get_next)
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
示例2: testCaptureHashTableInSharedIterator
def testCaptureHashTableInSharedIterator(self):
worker, _ = test_util.create_local_cluster(1, 1)
# NOTE(mrry): We must use the V2 variants of `HashTable`
# etc. because these produce a `tf.resource`-typed output that is
# compatible with the in-graph function implementation.
default_val = -1
keys = constant_op.constant(["brain", "salad", "surgery"])
values = constant_op.constant([0, 1, 2], dtypes.int64)
table = lookup_ops.HashTable(
lookup_ops.KeyValueTensorInitializer(keys, values),
default_val,
shared_name="shared_table")
input_sentences = dataset_ops.Dataset.from_tensor_slices(
["brain brain tank salad surgery", "surgery brain"])
iterator = (
input_sentences.map(lambda x: string_ops.string_split([x]).values).map(
table.lookup)
.make_initializable_iterator(shared_name="shared_iterator"))
init_op = iterator.initializer
get_next = iterator.get_next()
with session.Session(worker[0].target) as sess:
self.evaluate(table.initializer)
self.evaluate(init_op)
self.assertAllEqual([0, 0, -1, 1, 2], self.evaluate(get_next))
with session.Session(worker[0].target) as sess:
self.assertAllEqual([2, 0], self.evaluate(get_next))
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
示例3: testStringSplit
def testStringSplit(self):
strings = ["pigs on the wing", "animals"]
with self.cached_session() as sess:
tokens = string_ops.string_split(strings)
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
self.assertAllEqual(shape, [2, 4])
示例4: testStringSplitWithNoSkipEmpty
def testStringSplitWithNoSkipEmpty(self):
strings = ["#a", "b#", "#c#"]
with self.cached_session() as sess:
tokens = string_ops.string_split(strings, "#", skip_empty=False)
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1],
[1, 0], [1, 1],
[2, 0], [2, 1], [2, 2]])
self.assertAllEqual(values, [b"", b"a", b"b", b"", b"", b"c", b""])
self.assertAllEqual(shape, [3, 3])
with self.cached_session() as sess:
tokens = string_ops.string_split(strings, "#")
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(values, [b"a", b"b", b"c"])
self.assertAllEqual(indices, [[0, 0], [1, 0], [2, 0]])
self.assertAllEqual(shape, [3, 1])
示例5: testStringSplitEmptyToken
def testStringSplitEmptyToken(self):
strings = [" hello ", "", "world "]
with self.test_session() as sess:
tokens = string_ops.string_split(strings)
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [2, 0]])
self.assertAllEqual(values, [b"hello", b"world"])
self.assertAllEqual(shape, [3, 1])
示例6: string_split
def string_split(source, sep=None, skip_empty=True, delimiter=None,
result_type="SparseTensor", name=None): # pylint: disable=invalid-name
"""Split elements of `source` based on `delimiter`.
Let N be the size of `source` (typically N will be the batch size). Split each
element of `source` based on `delimiter` and return a `SparseTensor`
or `RaggedTensor` containing the split tokens. Empty tokens are ignored.
If `sep` is an empty string, each element of the `source` is split
into individual strings, each containing one byte. (This includes splitting
multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is
treated as a set of delimiters with each considered a potential split point.
Examples:
```python
>>> tf.strings.split(['hello world', 'a b c'])
tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
values=['hello', 'world', 'a', 'b', 'c']
dense_shape=[2, 3])
>>> tf.strings.split(['hello world', 'a b c'], result_type="RaggedTensor")
<tf.RaggedTensor [['hello', 'world'], ['a', 'b', 'c']]>
```
Args:
source: `1-D` string `Tensor`, the strings to split.
sep: `0-D` string `Tensor`, the delimiter character, the string should
be length 0 or 1. Default is ' '.
skip_empty: A `bool`. If `True`, skip the empty strings from the result.
delimiter: deprecated alias for `sep`.
result_type: The tensor type for the result: one of `"RaggedTensor"` or
`"SparseTensor"`.
name: A name for the operation (optional).
Raises:
ValueError: If delimiter is not a string.
Returns:
A `SparseTensor` or `RaggedTensor` of rank `2`, the strings split according
to the delimiter. The first column of the indices corresponds to the row
in `source` and the second column corresponds to the index of the split
component in this row.
"""
with ops.name_scope(name, "StringSplit", [source]):
sparse_result = string_ops.string_split(
source, sep=sep, skip_empty=skip_empty, delimiter=delimiter)
if result_type == "SparseTensor":
return sparse_result
elif result_type == "RaggedTensor":
return ragged_tensor.RaggedTensor.from_value_rowids(
values=sparse_result.values,
value_rowids=sparse_result.indices[:, 0],
nrows=sparse_result.dense_shape[0],
validate=False)
else:
raise ValueError("result_type must be 'RaggedTensor' or 'SparseTensor'.")
示例7: testStringSplitOnSetEmptyToken
def testStringSplitOnSetEmptyToken(self):
strings = ["", " a", "b ", " c", " ", " d ", ". e", "f .", " .g. ", " ."]
with self.cached_session() as sess:
tokens = string_ops.string_split(strings, delimiter=" .")
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(
indices,
[[1, 0], [2, 0], [3, 0], [5, 0], [6, 0], [7, 0], [8, 0]])
self.assertAllEqual(values, [b"a", b"b", b"c", b"d", b"e", b"f", b"g"])
self.assertAllEqual(shape, [10, 1])
示例8: testStringSplitEmptyToken
def testStringSplitEmptyToken(self):
strings = ["", " a", "b ", " c", " ", " d ", " e", "f ", " g ", " "]
with self.test_session() as sess:
tokens = string_ops.string_split(strings)
indices, values, shape = sess.run(tokens)
self.assertAllEqual(
indices,
[[1, 0], [2, 0], [3, 0], [5, 0], [6, 0], [7, 0], [8, 0]])
self.assertAllEqual(values, [b"a", b"b", b"c", b"d", b"e", b"f", b"g"])
self.assertAllEqual(shape, [10, 1])
示例9: testStringSplitWithDelimiter
def testStringSplitWithDelimiter(self):
strings = ["hello|world", "hello world"]
with self.cached_session() as sess:
self.assertRaises(
ValueError, string_ops.string_split, strings, delimiter=["|", ""])
self.assertRaises(
ValueError, string_ops.string_split, strings, delimiter=["a"])
tokens = string_ops.string_split(strings, delimiter="|")
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
self.assertAllEqual(shape, [2, 2])
tokens = string_ops.string_split(strings, delimiter="| ")
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0], [1, 1]])
self.assertAllEqual(values, [b"hello", b"world", b"hello", b"world"])
self.assertAllEqual(shape, [2, 2])
示例10: testStringSplitEmptyDelimiter
def testStringSplitEmptyDelimiter(self):
strings = ["hello", "hola", b"\xF0\x9F\x98\x8E"] # Last string is U+1F60E
with self.cached_session() as sess:
tokens = string_ops.string_split(strings, delimiter="")
indices, values, shape = self.evaluate(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
[1, 0], [1, 1], [1, 2], [1, 3], [2, 0],
[2, 1], [2, 2], [2, 3]])
expected = np.array(
[
"h", "e", "l", "l", "o", "h", "o", "l", "a", b"\xf0", b"\x9f",
b"\x98", b"\x8e"
],
dtype="|S1")
self.assertAllEqual(values.tolist(), expected)
self.assertAllEqual(shape, [3, 5])
示例11: testStringSplitWithDelimiterTensor
def testStringSplitWithDelimiterTensor(self):
strings = ["hello|world", "hello world"]
with self.cached_session() as sess:
delimiter = array_ops.placeholder(dtypes.string)
tokens = string_ops.string_split(strings, delimiter=delimiter)
with self.assertRaises(errors_impl.InvalidArgumentError):
sess.run(tokens, feed_dict={delimiter: ["a", "b"]})
with self.assertRaises(errors_impl.InvalidArgumentError):
sess.run(tokens, feed_dict={delimiter: ["a"]})
indices, values, shape = sess.run(tokens, feed_dict={delimiter: "|"})
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
self.assertAllEqual(shape, [2, 2])
示例12: feature_engineering_fn
def feature_engineering_fn(features, labels):
# Github #12205: raise a TypeError if called twice.
_ = string_ops.string_split(features["x"])
features["x"] = constant_op.constant([9.])
labels["y"] = constant_op.constant([99.])
return features, labels