本文整理匯總了Python中data.START_DECODING屬性的典型用法代碼示例。如果您正苦於以下問題:Python data.START_DECODING屬性的具體用法?Python data.START_DECODING怎麽用?Python data.START_DECODING使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類data
的用法示例。
在下文中一共展示了data.START_DECODING屬性的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: import data [as 別名]
# 或者: from data import START_DECODING [as 別名]
def __init__(self, article, abstract_sentences, vocab):
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > config.max_enc_steps:
article_words = article_words[:config.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences)
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, config.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if config.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
# NOTE: dec_input does not contain article OOV ids!!!!
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, config.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例2: __init__
# 需要導入模塊: import data [as 別名]
# 或者: from data import START_DECODING [as 別名]
def __init__(self, article, abstract_sentences, vocab):
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > config.max_enc_steps:
article_words = article_words[:config.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, config.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if config.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, config.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例3: __init__
# 需要導入模塊: import data [as 別名]
# 或者: from data import START_DECODING [as 別名]
def __init__(self, review, label, vocab, hps):
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
review_sentenc_orig = []
self.hps = hps
self.label = label
#abstract_sentences = [x.strip() for x in abstract_sentences]
article_sens = sent_tokenize(review)
article_words = []
for i in range(len(article_sens)):
if i >= hps.max_enc_sen_num:
article_words = article_words[:hps.max_enc_sen_num]
review_sentenc_orig = review_sentenc_orig[:hps.max_enc_sen_num]
break
article_sen = article_sens[i]
article_sen_words = article_sen.split()
if len(article_sen_words) > hps.max_enc_seq_len:
article_sen_words = article_sen_words[:hps.max_enc_seq_len]
article_words.append(article_sen_words)
review_sentenc_orig.append(article_sens[i])
# Process the abstract
#abstract = ' '.join(abstract_sentences) # string
# abstract_words = abstract.split() # list of strings
abs_ids = [[vocab.word2id(w) for w in sen] for sen in
article_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, hps.max_enc_sen_num, hps.max_enc_seq_len,
start_decoding,
stop_decoding) # max_sen_num,max_len, start_doc_id, end_doc_id,start_id, stop_id
self.dec_len = len(self.dec_input)
self.dec_sen_len = [len(sentence) for sentence in self.target]
self.original_reivew = review_sentenc_orig
示例4: __init__
# 需要導入模塊: import data [as 別名]
# 或者: from data import START_DECODING [as 別名]
def __init__(self, article, abstract_sentences, vocab, hps):
"""Initializes the Example, performing tokenization and truncation to produce the encoder, decoder and target sequences, which are stored in self.
Args:
article: source text; a string. each token is separated by a single space.
abstract_sentences: list of strings, one per abstract sentence. In each sentence, each token is separated by a single space.
vocab: Vocabulary object
hps: hyperparameters
"""
self.hps = hps
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > hps.max_enc_steps:
article_words = article_words[:hps.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, hps.max_dec_steps, start_decoding, stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if hps.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, hps.max_dec_steps, start_decoding, stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences
示例5: __init__
# 需要導入模塊: import data [as 別名]
# 或者: from data import START_DECODING [as 別名]
def __init__(self, article, abstract_sentences, vocab, hps):
"""Initializes the Example, performing tokenization and truncation to produce the encoder, decoder and target sequences, which are stored in self.
Args:
article: source text; a string. each token is separated by a single space.
abstract_sentences: list of strings, one per abstract sentence. In each sentence, each token is separated by a single space.
vocab: Vocabulary object
hps: hyperparameters
"""
self.hps = hps
# Get ids of special tokens
start_decoding = vocab.word2id(data.START_DECODING)
stop_decoding = vocab.word2id(data.STOP_DECODING)
# Process the article
article_words = article.split()
if len(article_words) > hps.max_enc_steps:
article_words = article_words[:hps.max_enc_steps]
self.enc_len = len(article_words) # store the length after truncation but before padding
self.enc_input = [vocab.word2id(w) for w in
article_words] # list of word ids; OOVs are represented by the id for UNK token
# Process the abstract
abstract = ' '.join(abstract_sentences) # string
abstract_words = abstract.split() # list of strings
abs_ids = [vocab.word2id(w) for w in
abstract_words] # list of word ids; OOVs are represented by the id for UNK token
# Get the decoder input sequence and target sequence
self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, hps.max_dec_steps, start_decoding,
stop_decoding)
self.dec_len = len(self.dec_input)
# If using pointer-generator mode, we need to store some extra info
if hps.pointer_gen:
# Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
self.enc_input_extend_vocab, self.article_oovs = data.article2ids(article_words, vocab)
# Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
abs_ids_extend_vocab = data.abstract2ids(abstract_words, vocab, self.article_oovs)
# Overwrite decoder target sequence so it uses the temp article OOV ids
_, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, hps.max_dec_steps, start_decoding,
stop_decoding)
# Store the original strings
self.original_article = article
self.original_abstract = abstract
self.original_abstract_sents = abstract_sentences