本文整理汇总了Python中tensorflow.RaggedTensor方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.RaggedTensor方法的具体用法?Python tensorflow.RaggedTensor怎么用?Python tensorflow.RaggedTensor使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.RaggedTensor方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tokenize
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokenize(self, text):
"""Tokenizes text.
Args:
text: A string or batch of strings to tokenize as a ``tf.Tensor`` or
Python values.
Returns:
- If :obj:`text` is a Python string, a list of Python strings.
- If :obj:`text` is a list of Python strings, a list of list of Python
strings.
- If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``.
- If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``.
Raises:
ValueError: if the rank of :obj:`text` is greater than 1.
"""
with tf.device("cpu:0"):
return self._tokenize(text)
示例2: detokenize
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def detokenize(self, tokens, sequence_length=None):
"""Detokenizes tokens.
The Tensor version supports batches of tokens.
Args:
tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``,
or Python values.
sequence_length: The length of each sequence. Required if :obj:`tokens`
is a dense 2-D ``tf.Tensor``.
Returns:
- If :obj:`tokens` is a list of list of Python strings, a list of Python strings.
- If :obj:`tokens` is a list of Python strings, a Python string.
- If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a
(N-1)-D ``tf.Tensor``.
Raises:
ValueError: if the rank of :obj:`tokens` is greater than 2.
ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and
:obj:`sequence_length` is not set.
"""
with tf.device("cpu:0"):
return self._detokenize(tokens, sequence_length)
示例3: _detokenize
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize(self, tokens, sequence_length):
if isinstance(tokens, tf.RaggedTensor):
rank = len(tokens.shape)
if rank == 1:
return self._detokenize_tensor(tokens.values)
elif rank == 2:
return self._detokenize_ragged_tensor(tokens)
else:
raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank)
elif tf.is_tensor(tokens):
rank = len(tokens.shape)
if rank == 1:
return self._detokenize_tensor(tokens)
elif rank == 2:
if sequence_length is None:
raise ValueError("sequence_length is required for Tensor detokenization")
return self._detokenize_batch_tensor(tokens, sequence_length)
else:
raise ValueError("Unsupported tensor rank %d for detokenization" % rank)
elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list):
return list(map(self.detokenize, tokens))
else:
tokens = [tf.compat.as_text(token) for token in tokens]
return self._detokenize_string(tokens)
示例4: testRaggedTensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def testRaggedTensor(self, tensor_representation_textpb, record_batch,
expected_type_spec, expected_ragged_tensor):
tensor_representation = text_format.Parse(tensor_representation_textpb,
schema_pb2.TensorRepresentation())
adapter = tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(record_batch.schema,
{"output": tensor_representation}))
converted = adapter.ToBatchTensors(record_batch)
self.assertLen(converted, 1)
self.assertIn("output", converted)
actual_output = converted["output"]
self.assertIsInstance(
actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue))
if tf.executing_eagerly():
self.assertTrue(
expected_type_spec.is_compatible_with(actual_output),
"{} is not compatible with spec {}".format(actual_output,
expected_type_spec))
self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
示例5: _check_tensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _check_tensor(tensor, name, error_label='feature'):
"""Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor."""
if not (isinstance(tensor, tf.Tensor) or
isinstance(tensor, tf.sparse.SparseTensor) or
isinstance(tensor, tf.RaggedTensor)):
fmt_name = ' {}'.format(name) if name else ''
value_error = ValueError('{}{} must be a Tensor, SparseTensor, or '
'RaggedTensor.'.format(error_label, fmt_name))
# NOTE(ericmc): This if-else block is a specific carve-out for
# LabeledTensor, which has a `.tensor` attribute and which is
# convertible to tf.Tensor via ops.convert_to_tensor.
# Allowing all types convertible to tf.Tensor is considered by soergel@
# to be too permissive.
# TODO(soergel): accept any type convertible to Tensor,
# as in cl/193238295 snapshot #6.
if hasattr(tensor, 'tensor'):
try:
ops.convert_to_tensor(tensor)
except TypeError:
raise value_error
else:
raise value_error
示例6: add_sequence_controls
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def add_sequence_controls(ids, length, start_id=None, end_id=None):
"""Adds sequence control tokens.
Args:
ids: Sequence of ids as 1D or 2D (batch) tensor.
length: Sequence length as 0D or 1D (batch) tensor.
start_id: Id to prepend to the sequence (set ``None`` to disable).
end_id: Id to append to the sequence (set ``None`` to disable).
Returns:
A tuple ``(ids, length)``.
"""
rank = ids.shape.rank
if rank not in (1, 2):
raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank)
batch_size = tf.shape(ids)[0] if rank == 2 else None
def _make_column(value):
value = tf.constant(value, dtype=ids.dtype)
if batch_size is not None:
value = tf.fill([batch_size], value)
return tf.expand_dims(value, -1)
if start_id is not None:
start_ids = _make_column(constants.START_OF_SENTENCE_ID)
ids = tf.concat([start_ids, ids], axis=-1)
length += 1
if end_id is not None:
end_ids = _make_column(constants.END_OF_SENTENCE_ID)
if batch_size is not None:
# Run concat on RaggedTensor to handle sequences with variable length.
ids = tf.RaggedTensor.from_tensor(ids, lengths=length)
ids = tf.concat([ids, end_ids], axis=-1)
if batch_size is not None:
ids = ids.to_tensor()
length += 1
return ids, length
示例7: make_features
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def make_features(self, element=None, features=None, training=None):
"""Tokenizes raw text."""
if features is None:
features = {}
if "tokens" in features:
return features
if "text" in features:
element = features.pop("text")
tokens = self.tokenizer.tokenize(element)
if isinstance(tokens, tf.RaggedTensor):
length = tokens.row_lengths()
tokens = tokens.to_tensor()
else:
length = tf.shape(tokens)[0]
if training and self.noiser is not None:
noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False)
if self.in_place_noise:
tokens, length = tf.cond(
tf.random.uniform([]) < self.noise_probability,
true_fn=lambda: (noisy_tokens, noisy_length),
false_fn=lambda: (tokens, length))
else:
# Call make_features again to fill the remaining noisy features.
noisy_features = dict(tokens=noisy_tokens, length=noisy_length)
noisy_features = self.make_features(features=noisy_features, training=training)
for key, value in noisy_features.items():
features["noisy_%s" % key] = value
features["length"] = length
features["tokens"] = tokens
return features
示例8: _detokenize_ragged_tensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize_ragged_tensor(self, tokens):
"""Detokenizes a batch of tokens as a ``tf.RaggedTensor``
When not overriden, this default implementation calls _detokenize_batch_tensor
on the dense representation.
Args:
tokens: A 2-D ``tf.RaggedTensor``.
Returns:
A 1-D string ``tf.Tensor``.
"""
return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths())
示例9: _detokenize_batch_tensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize_batch_tensor(self, tokens, sequence_length):
ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length)
return self._detokenize_ragged_tensor(ragged)
示例10: tokens_to_chars
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokens_to_chars(tokens):
"""Splits tokens into unicode characters.
Example:
>>> opennmt.data.tokens_to_chars(["hello", "world"])
<tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]>
Args:
tokens: A string ``tf.Tensor`` of shape :math:`[T]`.
Returns:
The characters as a 2D string ``tf.RaggedTensor``.
"""
return tf.strings.unicode_split(tokens, "UTF-8")
示例11: tokens_to_words
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokens_to_words(tokens, subword_token="■", is_spacer=None):
"""Converts a sequence of tokens to a sequence of words.
Example:
>>> opennmt.data.tokens_to_words(["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@")
<tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]>
Args:
tokens: A 1D string ``tf.Tensor``.
subword_token: The special token used by the subword tokenizer.
is_spacer: Whether :obj:`subword_token` is used as a spacer (as in
SentencePiece) or a joiner (as in BPE). If ``None``, will infer
directly from :obj:`subword_token`.
Returns:
The words as a 2D string ``tf.RaggedTensor``.
"""
if is_spacer is None:
is_spacer = subword_token == "▁"
if is_spacer:
# First token implicitly starts with a spacer.
left_and_single = tf.logical_or(
tf.strings.regex_full_match(tokens, "%s.*" % subword_token),
tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False))
right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token)
word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single)
else:
right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token)
left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token)
subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left)
word_start = tf.logical_not(subword)
start_indices = tf.squeeze(tf.where(word_start), -1)
return tf.RaggedTensor.from_row_starts(tokens, start_indices)
示例12: _testTokenizerOnBatchTensor
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens):
text = tf.constant(text)
tokens = tokenizer.tokenize(text)
self.assertIsInstance(tokens, tf.RaggedTensor)
self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens))
示例13: word_count
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def word_count(tokens, name=None):
"""Find the token count of each document/row.
`tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized
strings. This function simply returns size of each row, so the dtype is not
constrained to string.
Args:
tokens: either
(1) a two-dimensional `SparseTensor`, or
(2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1
of dtype `tf.string` containing tokens to be counted
name: (Optional) A name for this operation.
Returns:
A one-dimensional `Tensor` the token counts of each row.
Raises:
ValueError: if tokens is neither sparse nor ragged
"""
with tf.compat.v1.name_scope(name, 'word_count'):
if isinstance(tokens, tf.RaggedTensor):
return tokens.row_lengths()
elif isinstance(tokens, tf.SparseTensor):
result = tf.sparse.reduce_sum(
tf.SparseTensor(indices=tokens.indices,
values=tf.ones_like(tokens.values, dtype=tf.int64),
dense_shape=tokens.dense_shape),
axis=1)
result.set_shape([tokens.shape[0]])
return result
else:
raise ValueError('Invalid token tensor')
示例14: test_ragged_roundtrip
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def test_ragged_roundtrip(self):
if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
self.skipTest('This version of TensorFlow does not support '
'CompositeTenors in TensorInfo.')
export_path = os.path.join(tempfile.mkdtemp(), 'export')
with tf.compat.v1.Graph().as_default():
with tf.compat.v1.Session().as_default() as session:
input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
value_shape=[])
output = input_float / 2.0
inputs = {'input': input_float}
outputs = {'output': output}
saved_transform_io.write_saved_transform_from_session(
session, inputs, outputs, export_path)
with tf.compat.v1.Graph().as_default():
with tf.compat.v1.Session().as_default() as session:
splits = np.array([0, 2, 3], dtype=np.int64)
values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
input_ragged = tf.RaggedTensor.from_row_splits(values, splits)
# Using a computed input gives confidence that the graphs are fused
inputs = {'input': input_ragged * 10}
_, outputs = (
saved_transform_io.partially_apply_saved_transform_internal(
export_path, inputs))
output_ragged = outputs['output']
self.assertIsInstance(output_ragged, tf.RaggedTensor)
result = session.run(output_ragged)
# indices and shape unchanged; values multipled by 10 and divided by 2
self.assertAllEqual(splits, result.row_splits)
self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
示例15: test_ragged_roundtrip
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def test_ragged_roundtrip(self):
if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
self.skipTest('This version of TensorFlow does not support '
'CompositeTenors in TensorInfo.')
export_path = os.path.join(tempfile.mkdtemp(), 'export')
with tf.compat.v1.Graph().as_default():
with tf.compat.v1.Session().as_default() as session:
input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
value_shape=[])
output = input_float / 2.0
inputs = {'input': input_float}
outputs = {'output': output}
saved_transform_io.write_saved_transform_from_session(
session, inputs, outputs, export_path)
splits = np.array([0, 2, 3], dtype=np.int64)
values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
input_ragged = tf.RaggedTensor.from_row_splits(values, splits)
# Using a computed input gives confidence that the graphs are fused
inputs = {'input': input_ragged * 10}
saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path)
outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs)
result = outputs['output']
self.assertIsInstance(result, tf.RaggedTensor)
# indices and shape unchanged; values multipled by 10 and divided by 2
self.assertAllEqual(splits, result.row_splits)
self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist())