本文整理汇总了Python中data.article2ids方法的典型用法代码示例。如果您正苦于以下问题:Python data.article2ids方法的具体用法?Python data.article2ids怎么用?Python data.article2ids使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类data
的用法示例。
在下文中一共展示了data.article2ids方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import article2ids [as 别名]
def __init__(self, article, abstract_sentences, vocab):
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > config.max_enc_steps:
article_words = article_words[:config.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences)
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, config.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if config.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
# NOTE: dec_input does not contain article OOV ids!!!!
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, config.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例2: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import article2ids [as 别名]
def __init__(self, article, abstract_sentences, vocab):
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > config.max_enc_steps:
article_words = article_words[:config.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, config.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if config.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, config.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例3: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import article2ids [as 别名]
def __init__(self, article, abstract_sentences, vocab, hps):
"""Initializes the Example, performing tokenization and truncation to produce the encoder, decoder and target sequences, which are stored in self.
Args:
article: source text; a string. each token is separated by a single space.
abstract_sentences: list of strings, one per abstract sentence. In each sentence, each token is separated by a single space.
vocab: Vocabulary object
hps: hyperparameters
"""
self.hps = hps
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > hps.max_enc_steps:
article_words = article_words[:hps.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, hps.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if hps.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, hps.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例4: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import article2ids [as 别名]
def __init__(self, article, abstract_sentences, vocab, hps):
"""Initializes the Example, performing tokenization and truncation to produce the encoder, decoder and target sequences, which are stored in self.
Args:
article: source text; a string. each token is separated by a single space.
abstract_sentences: list of strings, one per abstract sentence. In each sentence, each token is separated by a single space.
vocab: Vocabulary object
hps: hyperparameters
"""
self.hps = hps
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > hps.max_enc_steps:
article_words = article_words[:hps.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in
article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in
abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, hps.max_dec_steps, start_decoding,
stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if hps.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, hps.max_dec_steps, start_decoding,
stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences