当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.RaggedTensor方法代码示例

本文整理汇总了Python中tensorflow.RaggedTensor方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.RaggedTensor方法的具体用法?Python tensorflow.RaggedTensor怎么用?Python tensorflow.RaggedTensor使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensorflow的用法示例。


在下文中一共展示了tensorflow.RaggedTensor方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tokenize

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokenize(self, text):
    """Tokenizes text.

    Args:
      text: A string or batch of strings to tokenize as a ``tf.Tensor`` or
        Python values.

    Returns:
      - If :obj:`text` is a Python string, a list of Python strings.
      - If :obj:`text` is a list of Python strings, a list of list of Python
        strings.
      - If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``.
      - If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``.

    Raises:
      ValueError: if the rank of :obj:`text` is greater than 1.
    """
    with tf.device("cpu:0"):
      return self._tokenize(text) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:21,代码来源:tokenizer.py

示例2: detokenize

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def detokenize(self, tokens, sequence_length=None):
    """Detokenizes tokens.

    The Tensor version supports batches of tokens.

    Args:
      tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``,
        or Python values.
      sequence_length: The length of each sequence. Required if :obj:`tokens`
        is a dense 2-D ``tf.Tensor``.

    Returns:
      - If :obj:`tokens` is a list of list of Python strings, a list of Python strings.
      - If :obj:`tokens` is a list of Python strings, a Python string.
      - If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a
        (N-1)-D ``tf.Tensor``.

    Raises:
      ValueError: if the rank of :obj:`tokens` is greater than 2.
      ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and
        :obj:`sequence_length` is not set.
    """
    with tf.device("cpu:0"):
      return self._detokenize(tokens, sequence_length) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:26,代码来源:tokenizer.py

示例3: _detokenize

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize(self, tokens, sequence_length):
    if isinstance(tokens, tf.RaggedTensor):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens.values)
      elif rank == 2:
        return self._detokenize_ragged_tensor(tokens)
      else:
        raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank)
    elif tf.is_tensor(tokens):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens)
      elif rank == 2:
        if sequence_length is None:
          raise ValueError("sequence_length is required for Tensor detokenization")
        return self._detokenize_batch_tensor(tokens, sequence_length)
      else:
        raise ValueError("Unsupported tensor rank %d for detokenization" % rank)
    elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list):
      return list(map(self.detokenize, tokens))
    else:
      tokens = [tf.compat.as_text(token) for token in tokens]
      return self._detokenize_string(tokens) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:26,代码来源:tokenizer.py

示例4: testRaggedTensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def testRaggedTensor(self, tensor_representation_textpb, record_batch,
                       expected_type_spec, expected_ragged_tensor):
    tensor_representation = text_format.Parse(tensor_representation_textpb,
                                              schema_pb2.TensorRepresentation())
    adapter = tensor_adapter.TensorAdapter(
        tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                           {"output": tensor_representation}))
    converted = adapter.ToBatchTensors(record_batch)
    self.assertLen(converted, 1)
    self.assertIn("output", converted)
    actual_output = converted["output"]
    self.assertIsInstance(
        actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue))
    if tf.executing_eagerly():
      self.assertTrue(
          expected_type_spec.is_compatible_with(actual_output),
          "{} is not compatible with spec {}".format(actual_output,
                                                     expected_type_spec))

    self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:23,代码来源:tensor_adapter_test.py

示例5: _check_tensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _check_tensor(tensor, name, error_label='feature'):
  """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor."""
  if not (isinstance(tensor, tf.Tensor) or
          isinstance(tensor, tf.sparse.SparseTensor) or
          isinstance(tensor, tf.RaggedTensor)):
    fmt_name = ' {}'.format(name) if name else ''
    value_error = ValueError('{}{} must be a Tensor, SparseTensor, or '
                             'RaggedTensor.'.format(error_label, fmt_name))
    # NOTE(ericmc): This if-else block is a specific carve-out for
    # LabeledTensor, which has a `.tensor` attribute and which is
    # convertible to tf.Tensor via ops.convert_to_tensor.
    # Allowing all types convertible to tf.Tensor is considered by soergel@
    # to be too permissive.
    # TODO(soergel): accept any type convertible to Tensor,
    # as in cl/193238295 snapshot #6.
    if hasattr(tensor, 'tensor'):
      try:
        ops.convert_to_tensor(tensor)
      except TypeError:
        raise value_error
    else:
      raise value_error 
开发者ID:tensorflow,项目名称:estimator,代码行数:24,代码来源:export.py

示例6: add_sequence_controls

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def add_sequence_controls(ids, length, start_id=None, end_id=None):
  """Adds sequence control tokens.

  Args:
    ids: Sequence of ids as 1D or 2D (batch) tensor.
    length: Sequence length as 0D or 1D (batch) tensor.
    start_id: Id to prepend to the sequence (set ``None`` to disable).
    end_id: Id to append to the sequence (set ``None`` to disable).

  Returns:
    A tuple ``(ids, length)``.
  """
  rank = ids.shape.rank
  if rank not in (1, 2):
    raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank)
  batch_size = tf.shape(ids)[0] if rank == 2 else None

  def _make_column(value):
    value = tf.constant(value, dtype=ids.dtype)
    if batch_size is not None:
      value = tf.fill([batch_size], value)
    return tf.expand_dims(value, -1)

  if start_id is not None:
    start_ids = _make_column(constants.START_OF_SENTENCE_ID)
    ids = tf.concat([start_ids, ids], axis=-1)
    length += 1

  if end_id is not None:
    end_ids = _make_column(constants.END_OF_SENTENCE_ID)
    if batch_size is not None:
      # Run concat on RaggedTensor to handle sequences with variable length.
      ids = tf.RaggedTensor.from_tensor(ids, lengths=length)
    ids = tf.concat([ids, end_ids], axis=-1)
    if batch_size is not None:
      ids = ids.to_tensor()
    length += 1

  return ids, length 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:41,代码来源:text_inputter.py

示例7: make_features

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def make_features(self, element=None, features=None, training=None):
    """Tokenizes raw text."""
    if features is None:
      features = {}
    if "tokens" in features:
      return features
    if "text" in features:
      element = features.pop("text")
    tokens = self.tokenizer.tokenize(element)
    if isinstance(tokens, tf.RaggedTensor):
      length = tokens.row_lengths()
      tokens = tokens.to_tensor()
    else:
      length = tf.shape(tokens)[0]
    if training and self.noiser is not None:
      noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False)
      if self.in_place_noise:
        tokens, length = tf.cond(
            tf.random.uniform([]) < self.noise_probability,
            true_fn=lambda: (noisy_tokens, noisy_length),
            false_fn=lambda: (tokens, length))
      else:
        # Call make_features again to fill the remaining noisy features.
        noisy_features = dict(tokens=noisy_tokens, length=noisy_length)
        noisy_features = self.make_features(features=noisy_features, training=training)
        for key, value in noisy_features.items():
          features["noisy_%s" % key] = value
    features["length"] = length
    features["tokens"] = tokens
    return features 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:32,代码来源:text_inputter.py

示例8: _detokenize_ragged_tensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize_ragged_tensor(self, tokens):
    """Detokenizes a batch of tokens as a ``tf.RaggedTensor``

    When not overriden, this default implementation calls _detokenize_batch_tensor
    on the dense representation.

    Args:
      tokens: A 2-D ``tf.RaggedTensor``.

    Returns:
      A 1-D string ``tf.Tensor``.
    """
    return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths()) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:15,代码来源:tokenizer.py

示例9: _detokenize_batch_tensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _detokenize_batch_tensor(self, tokens, sequence_length):
    ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length)
    return self._detokenize_ragged_tensor(ragged) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:5,代码来源:tokenizer.py

示例10: tokens_to_chars

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokens_to_chars(tokens):
  """Splits tokens into unicode characters.

  Example:

    >>> opennmt.data.tokens_to_chars(["hello", "world"])
    <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]>

  Args:
    tokens: A string ``tf.Tensor`` of shape :math:`[T]`.

  Returns:
    The characters as a 2D string ``tf.RaggedTensor``.
  """
  return tf.strings.unicode_split(tokens, "UTF-8") 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:17,代码来源:text.py

示例11: tokens_to_words

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def tokens_to_words(tokens, subword_token="■", is_spacer=None):
  """Converts a sequence of tokens to a sequence of words.

  Example:

    >>> opennmt.data.tokens_to_words(["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@")
    <tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]>

  Args:
    tokens: A 1D string ``tf.Tensor``.
    subword_token: The special token used by the subword tokenizer.
    is_spacer: Whether :obj:`subword_token` is used as a spacer (as in
      SentencePiece) or a joiner (as in BPE). If ``None``, will infer
      directly from :obj:`subword_token`.

  Returns:
    The words as a 2D string ``tf.RaggedTensor``.
  """
  if is_spacer is None:
    is_spacer = subword_token == "▁"
  if is_spacer:
    # First token implicitly starts with a spacer.
    left_and_single = tf.logical_or(
        tf.strings.regex_full_match(tokens, "%s.*" % subword_token),
        tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False))
    right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token)
    word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single)
  else:
    right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token)
    left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token)
    subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left)
    word_start = tf.logical_not(subword)
  start_indices = tf.squeeze(tf.where(word_start), -1)
  return tf.RaggedTensor.from_row_starts(tokens, start_indices) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:36,代码来源:text.py

示例12: _testTokenizerOnBatchTensor

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens):
    text = tf.constant(text)
    tokens = tokenizer.tokenize(text)
    self.assertIsInstance(tokens, tf.RaggedTensor)
    self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens)) 
开发者ID:OpenNMT,项目名称:OpenNMT-tf,代码行数:7,代码来源:tokenizer_test.py

示例13: word_count

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def word_count(tokens, name=None):
  """Find the token count of each document/row.

  `tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized
  strings. This function simply returns size of each row, so the dtype is not
  constrained to string.

  Args:
    tokens: either
      (1) a two-dimensional `SparseTensor`, or
      (2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1
      of dtype `tf.string` containing tokens to be counted
    name: (Optional) A name for this operation.

  Returns:
    A one-dimensional `Tensor` the token counts of each row.

  Raises:
    ValueError: if tokens is neither sparse nor ragged
  """
  with tf.compat.v1.name_scope(name, 'word_count'):
    if isinstance(tokens, tf.RaggedTensor):
      return tokens.row_lengths()
    elif isinstance(tokens, tf.SparseTensor):
      result = tf.sparse.reduce_sum(
          tf.SparseTensor(indices=tokens.indices,
                          values=tf.ones_like(tokens.values, dtype=tf.int64),
                          dense_shape=tokens.dense_shape),
          axis=1)
      result.set_shape([tokens.shape[0]])
      return result
    else:
      raise ValueError('Invalid token tensor') 
开发者ID:tensorflow,项目名称:transform,代码行数:35,代码来源:mappers.py

示例14: test_ragged_roundtrip

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        splits = np.array([0, 2, 3], dtype=np.int64)
        values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
        input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_ragged * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_ragged = outputs['output']
        self.assertIsInstance(output_ragged, tf.RaggedTensor)
        result = session.run(output_ragged)

        # indices and shape unchanged; values multipled by 10 and divided by 2
        self.assertAllEqual(splits, result.row_splits)
        self.assertEqual([5.0, 10.0, 20.0], result.values.tolist()) 
开发者ID:tensorflow,项目名称:transform,代码行数:36,代码来源:saved_transform_io_test.py

示例15: test_ragged_roundtrip

# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import RaggedTensor [as 别名]
def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    splits = np.array([0, 2, 3], dtype=np.int64)
    values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
    input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

    # Using a computed input gives confidence that the graphs are fused
    inputs = {'input': input_ragged * 10}
    saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path)
    outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs)
    result = outputs['output']
    self.assertIsInstance(result, tf.RaggedTensor)

    # indices and shape unchanged; values multipled by 10 and divided by 2
    self.assertAllEqual(splits, result.row_splits)
    self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist()) 
开发者ID:tensorflow,项目名称:transform,代码行数:32,代码来源:saved_transform_io_v2_test.py


注:本文中的tensorflow.RaggedTensor方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。