當前位置: 首頁>>代碼示例>>Python>>正文


Python special_words.EOS屬性代碼示例

本文整理匯總了Python中skip_thoughts.data.special_words.EOS屬性的典型用法代碼示例。如果您正苦於以下問題:Python special_words.EOS屬性的具體用法?Python special_words.EOS怎麽用?Python special_words.EOS使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在skip_thoughts.data.special_words的用法示例。


在下文中一共展示了special_words.EOS屬性的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _preprocess

# 需要導入模塊: from skip_thoughts.data import special_words [as 別名]
# 或者: from skip_thoughts.data.special_words import EOS [as 別名]
def _preprocess(self, data, use_eos):
    """Preprocesses text for the encoder.

    Args:
      data: A list of input strings.
      use_eos: Whether to append the end-of-sentence word to each sentence.

    Returns:
      embeddings: A list of word embedding sequences corresponding to the input
        strings.
    """
    preprocessed_data = []
    for item in data:
      tokenized = self._tokenize(item)
      if use_eos:
        tokenized.append(special_words.EOS)
      preprocessed_data.append([self._word_to_embedding(w) for w in tokenized])
    return preprocessed_data 
開發者ID:ringringyi,項目名稱:DOTA_models,代碼行數:20,代碼來源:skip_thoughts_encoder.py

示例2: _build_vocabulary

# 需要導入模塊: from skip_thoughts.data import special_words [as 別名]
# 或者: from skip_thoughts.data.special_words import EOS [as 別名]
def _build_vocabulary(input_files):
  """Loads or builds the model vocabulary.

  Args:
    input_files: List of pre-tokenized input .txt files.

  Returns:
    vocab: A dictionary of word to id.
  """
  if FLAGS.vocab_file:
    tf.logging.info("Loading existing vocab file.")
    vocab = collections.OrderedDict()
    with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f:
      for i, line in enumerate(f):
        word = line.decode("utf-8").strip()
        assert word not in vocab, "Attempting to add word twice: %s" % word
        vocab[word] = i
    tf.logging.info("Read vocab of size %d from %s",
                    len(vocab), FLAGS.vocab_file)
    return vocab

  tf.logging.info("Creating vocabulary.")
  num = 0
  wordcount = collections.Counter()
  for input_file in input_files:
    tf.logging.info("Processing file: %s", input_file)
    for sentence in tf.gfile.FastGFile(input_file):
      wordcount.update(sentence.split())

      num += 1
      if num % 1000000 == 0:
        tf.logging.info("Processed %d sentences", num)

  tf.logging.info("Processed %d sentences total", num)

  words = wordcount.keys()
  freqs = wordcount.values()
  sorted_indices = np.argsort(freqs)[::-1]

  vocab = collections.OrderedDict()
  vocab[special_words.EOS] = special_words.EOS_ID
  vocab[special_words.UNK] = special_words.UNK_ID
  for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]):
    vocab[words[w_index]] = w_id + 2  # 0: EOS, 1: UNK.

  tf.logging.info("Created vocab with %d words", len(vocab))

  vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
  with tf.gfile.FastGFile(vocab_file, "w") as f:
    f.write("\n".join(vocab.keys()))
  tf.logging.info("Wrote vocab file to %s", vocab_file)

  word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt")
  with tf.gfile.FastGFile(word_counts_file, "w") as f:
    for i in sorted_indices:
      f.write("%s %d\n" % (words[i], freqs[i]))
  tf.logging.info("Wrote word counts file to %s", word_counts_file)

  return vocab 
開發者ID:ringringyi,項目名稱:DOTA_models,代碼行數:61,代碼來源:preprocess_dataset.py

示例3: _process_input_file

# 需要導入模塊: from skip_thoughts.data import special_words [as 別名]
# 或者: from skip_thoughts.data.special_words import EOS [as 別名]
def _process_input_file(filename, vocab, stats):
  """Processes the sentences in an input file.

  Args:
    filename: Path to a pre-tokenized input .txt file.
    vocab: A dictionary of word to id.
    stats: A Counter object for statistics.

  Returns:
    processed: A list of serialized Example protos
  """
  tf.logging.info("Processing input file: %s", filename)
  processed = []

  predecessor = None  # Predecessor sentence (list of words).
  current = None  # Current sentence (list of words).
  successor = None  # Successor sentence (list of words).

  for successor_str in tf.gfile.FastGFile(filename):
    stats.update(["sentences_seen"])
    successor = successor_str.split()

    # The first 2 sentences per file will be skipped.
    if predecessor and current and successor:
      stats.update(["sentences_considered"])

      # Note that we are going to insert <EOS> later, so we only allow
      # sentences with strictly less than max_sentence_length to pass.
      if FLAGS.max_sentence_length and (
          len(predecessor) >= FLAGS.max_sentence_length or len(current) >=
          FLAGS.max_sentence_length or len(successor) >=
          FLAGS.max_sentence_length):
        stats.update(["sentences_too_long"])
      else:
        serialized = _create_serialized_example(predecessor, current, successor,
                                                vocab)
        processed.append(serialized)
        stats.update(["sentences_output"])

    predecessor = current
    current = successor

    sentences_seen = stats["sentences_seen"]
    sentences_output = stats["sentences_output"]
    if sentences_seen and sentences_seen % 100000 == 0:
      tf.logging.info("Processed %d sentences (%d output)", sentences_seen,
                      sentences_output)
    if FLAGS.max_sentences and sentences_output >= FLAGS.max_sentences:
      break

  tf.logging.info("Completed processing file %s", filename)
  return processed 
開發者ID:ringringyi,項目名稱:DOTA_models,代碼行數:54,代碼來源:preprocess_dataset.py

示例4: _build_vocabulary

# 需要導入模塊: from skip_thoughts.data import special_words [as 別名]
# 或者: from skip_thoughts.data.special_words import EOS [as 別名]
def _build_vocabulary(input_files):
  """Loads or builds the model vocabulary.

  Args:
    input_files: List of pre-tokenized input .txt files.

  Returns:
    vocab: A dictionary of word to id.
  """
  if FLAGS.vocab_file:
    tf.logging.info("Loading existing vocab file.")
    vocab = collections.OrderedDict()
    with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f:
      for i, line in enumerate(f):
        word = line.decode("utf-8").strip()
        assert word not in vocab, "Attempting to add word twice: %s" % word
        vocab[word] = i
    tf.logging.info("Read vocab of size %d from %s",
                    len(vocab), FLAGS.vocab_file)
    return vocab

  tf.logging.info("Creating vocabulary.")
  num = 0
  wordcount = collections.Counter()
  for input_file in input_files:
    tf.logging.info("Processing file: %s", input_file)
    for sentence in tf.gfile.FastGFile(input_file):
      wordcount.update(sentence.split())

      num += 1
      if num % 1000000 == 0:
        tf.logging.info("Processed %d sentences", num)

  tf.logging.info("Processed %d sentences total", num)

  words = list(wordcount)
  freqs = list(wordcount.values())
  sorted_indices = np.argsort(freqs)[::-1]

  vocab = collections.OrderedDict()
  vocab[special_words.EOS] = special_words.EOS_ID
  vocab[special_words.UNK] = special_words.UNK_ID
  for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]):
    vocab[words[w_index]] = w_id + 2  # 0: EOS, 1: UNK.

  tf.logging.info("Created vocab with %d words", len(vocab))

  vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
  with tf.gfile.FastGFile(vocab_file, "w") as f:
    f.write("\n".join(vocab.keys()))
  tf.logging.info("Wrote vocab file to %s", vocab_file)

  word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt")
  with tf.gfile.FastGFile(word_counts_file, "w") as f:
    for i in sorted_indices:
      f.write("%s %d\n" % (words[i], freqs[i]))
  tf.logging.info("Wrote word counts file to %s", word_counts_file)

  return vocab 
開發者ID:generalized-iou,項目名稱:g-tensorflow-models,代碼行數:61,代碼來源:preprocess_dataset.py


注:本文中的skip_thoughts.data.special_words.EOS屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。