本文整理匯總了Python中fuel.schemes.ConstantScheme方法的典型用法代碼示例。如果您正苦於以下問題:Python schemes.ConstantScheme方法的具體用法?Python schemes.ConstantScheme怎麽用?Python schemes.ConstantScheme使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類fuel.schemes
的用法示例。
在下文中一共展示了schemes.ConstantScheme方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: setup_datastream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def setup_datastream(path, vocab_file, config):
ds = QADataset(path, vocab_file, config.n_entities, need_sep_token=config.concat_ctx_and_question)
it = QAIterator(path, shuffle=config.shuffle_questions)
stream = DataStream(ds, iteration_scheme=it)
if config.concat_ctx_and_question:
stream = ConcatCtxAndQuestion(stream, config.concat_question_before, ds.reverse_vocab['<SEP>'])
# Sort sets of multiple batches to make batches of similar sizes
stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size * config.sort_batch_count))
comparison = _balanced_batch_helper(stream.sources.index('question' if config.concat_ctx_and_question else 'context'))
stream = Mapping(stream, SortMapping(comparison))
stream = Unpack(stream)
stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size))
stream = Padding(stream, mask_sources=['context', 'question', 'candidates'], mask_dtype='int32')
return ds, stream
示例2: setup_datastream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def setup_datastream(path, batch_size, sort_batch_count, valid=False):
A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))
D = [B[x[0]:x[1], 2] for x in C]
ds = IndexableDataset({'input': A, 'output': D})
stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))
stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count))
comparison = _balanced_batch_helper(stream.sources.index('input'))
stream = Mapping(stream, SortMapping(comparison))
stream = Unpack(stream)
stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A)))
stream = Padding(stream, mask_sources=['input', 'output'])
return ds, stream
示例3: obtain_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def obtain_stream(dataset, batch_size, size=1):
if size == 1:
data_stream = dataset.get_example_stream()
data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
# add padding and masks to the dataset
data_stream = transformers.Padding(data_stream, mask_sources=('data'))
return data_stream
else:
data_streams = [dataset.get_example_stream() for _ in range(size)]
data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
for data_stream in data_streams]
data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams]
return data_streams
示例4: output_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def output_stream(dataset, batch_size, size=1):
data_stream = dataset.get_example_stream()
data_stream = transformers.Batch(data_stream,
iteration_scheme=schemes.ConstantScheme(batch_size))
# add padding and masks to the dataset
# Warning: in multiple output case, will raise ValueError: All dimensions except length must be equal, need padding manually
# data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c'))
# data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target'))
return data_stream
示例5: test_example_iteration_scheme
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def test_example_iteration_scheme(self):
scheme = ConstantScheme(2)
class MinimalDataset(Dataset):
provides_sources = ('data',)
_example_iteration_scheme = scheme
def get_data(self, state=None, request=None):
pass
assert MinimalDataset().example_iteration_scheme is scheme
示例6: setup_datastream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def setup_datastream(batch_size, **kwargs):
ds = ToyDataset(**kwargs)
stream = DataStream(ds, iteration_scheme=SequentialExampleScheme(kwargs['nb_examples']))
stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size))
stream = Padding(stream, mask_sources=['input', 'output'])
return ds, stream
示例7: get_data_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def get_data_stream(iterable):
"""Returns a 'fuel.Batch' datastream of
[x~input~numbers, y~targets~roots], with each iteration returning a
batch of 20 training examples
"""
numbers = numpy.asarray(iterable, dtype=floatX)
dataset = IterableDataset(
{'numbers': numbers, 'roots': numpy.sqrt(numbers)})
return Batch(dataset.get_example_stream(), ConstantScheme(20))
示例8: output_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def output_stream(dataset, batch_size, size=1):
data_stream = dataset.get_example_stream()
data_stream = transformers.Batch(data_stream,
iteration_scheme=schemes.ConstantScheme(batch_size))
# add padding and masks to the dataset
data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target'))
return data_stream
示例9: output_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def output_stream(dataset, batch_size, size=1):
data_stream = dataset.get_example_stream()
data_stream = transformers.Batch(data_stream,
iteration_scheme=schemes.ConstantScheme(batch_size))
# add padding and masks to the dataset
data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c'))
return data_stream
示例10: obtain_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def obtain_stream(dataset, batch_size, size=1):
if size == 1:
data_stream = dataset.get_example_stream()
data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
# add padding and masks to the dataset
data_stream = transformers.Padding(data_stream, mask_sources=('data'))
return data_stream
else:
data_streams = [dataset.get_example_stream() for _ in xrange(size)]
data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
for data_stream in data_streams]
data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams]
return data_streams
示例11: test_data_driven_epochs
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def test_data_driven_epochs():
class TestDataset(IterableDataset):
sources = ('data',)
def __init__(self):
self.axis_labels = None
self.data = [[1, 2, 3, 4],
[5, 6, 7, 8]]
def open(self):
epoch_iter = iter(self.data)
data_iter = iter(next(epoch_iter))
return (epoch_iter, data_iter)
def next_epoch(self, state):
try:
data_iter = iter(next(state[0]))
return (state[0], data_iter)
except StopIteration:
return self.open()
def get_data(self, state, request):
data = []
for i in range(request):
data.append(next(state[1]))
return (data,)
epochs = []
epochs.append([([1],), ([2],), ([3],), ([4],)])
epochs.append([([5],), ([6],), ([7],), ([8],)])
stream = DataStream(TestDataset(), iteration_scheme=ConstantScheme(1))
assert list(stream.get_epoch_iterator()) == epochs[0]
assert list(stream.get_epoch_iterator()) == epochs[1]
assert list(stream.get_epoch_iterator()) == epochs[0]
stream.reset()
for i, epoch in zip(range(2), stream.iterate_epochs()):
assert list(epoch) == epochs[i]
# test scheme resetting between epochs
class TestScheme(BatchSizeScheme):
def get_request_iterator(self):
return iter([1, 2, 1, 3])
epochs = []
epochs.append([([1],), ([2, 3],), ([4],)])
epochs.append([([5],), ([6, 7],), ([8],)])
stream = DataStream(TestDataset(), iteration_scheme=TestScheme())
for i, epoch in zip(range(2), stream.iterate_epochs()):
assert list(epoch) == epochs[i]
示例12: get_src_trg_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def get_src_trg_stream(cg, config, src_datasets=None, trg_datasets=None,
is_training=True, src_vocabs=None, trg_vocabs=None,
logprob_datasets=None):
eid, did = p_(cg)
if is_training:
logger.info(' ... src:[{}] - [{}]'.format(
eid, src_datasets[cg].files[0]))
logger.info(' ... trg:[{}] - [{}]'.format(
did, trg_datasets[cg].files[0]))
stream = Merge([src_datasets[cg].get_example_stream(),
trg_datasets[cg].get_example_stream()],
('source', 'target'))
stream = Filter(stream, predicate=_too_long(config['src_seq_len'],
config['tgt_seq_len']))
if 'min_seq_lens' in config and config['min_seq_lens'][cg] > 0:
stream = Filter(stream,
predicate=_too_short(config['min_seq_lens'][cg]))
stream = Mapping(stream, _oov_to_unk(
src_vocab_size=config['src_vocab_sizes'][eid],
trg_vocab_size=config['trg_vocab_sizes'][did],
unk_id=config['unk_id']))
stream = Batch(
stream, iteration_scheme=ConstantScheme(
config['batch_sizes'][cg]*config['sort_k_batches']))
stream = Mapping(stream, SortMapping(_length))
stream = Unpack(stream)
stream = Batch(stream, iteration_scheme=ConstantScheme(
config['batch_sizes'][cg]))
else: # logprob stream
src_dataset = TextFile([logprob_datasets[cg][0]],
src_vocabs[p_(cg)[0]], None)
trg_dataset = TextFile([logprob_datasets[cg][1]],
trg_vocabs[p_(cg)[1]], None)
stream = Merge([src_dataset.get_example_stream(),
trg_dataset.get_example_stream()],
('source', 'target'))
stream = Mapping(stream, _oov_to_unk(
src_vocab_size=config['src_vocab_sizes'][eid],
trg_vocab_size=config['trg_vocab_sizes'][did],
unk_id=config['unk_id']))
bs = 100
if 'log_prob_bs' in config:
if isinstance(config['log_prob_bs'], dict):
bs = config['log_prob_bs'][cg]
else:
bs = config['log_prob_bs']
stream = Batch(stream, iteration_scheme=ConstantScheme(bs))
masked_stream = Padding(stream)
masked_stream = Mapping(
masked_stream, _remapWordIdx(
[(0, 0, config['src_eos_idxs'][eid]),
(2, 0, config['trg_eos_idxs'][did])]))
return masked_stream
示例13: get_logprob_streams
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def get_logprob_streams(config):
if 'log_prob_sets' not in config:
return None
cgs = config['cgs']
enc_ids, dec_ids = get_enc_dec_ids(cgs)
datasets = config['log_prob_sets']
# Prepare source vocabs and files, make sure special tokens are there
src_vocabs = {k: cPickle.load(open(v))
for k, v in config['src_vocabs'].iteritems()}
for k in src_vocabs.keys():
src_vocabs[k]['<S>'] = 0
src_vocabs[k]['</S>'] = config['src_eos_idxs'][k]
src_vocabs[k]['<UNK>'] = config['unk_id']
# Prepare target vocabs and files, make sure special tokens are there
trg_vocabs = {k: cPickle.load(open(v))
for k, v in config['trg_vocabs'].iteritems()}
for k in trg_vocabs.keys():
trg_vocabs[k]['<S>'] = 0
trg_vocabs[k]['</S>'] = config['trg_eos_idxs'][k]
trg_vocabs[k]['<UNK>'] = config['unk_id']
# Build the preprocessing pipeline for individual streams
ind_streams = {}
for cg in cgs:
eid, did = p_(cg)
if cg not in datasets:
continue
logger.info('Building logprob stream for cg:[{}]'.format(cg))
src_dataset = TextFile([datasets[cg][0]], src_vocabs[p_(cg)[0]], None)
trg_dataset = TextFile([datasets[cg][1]], trg_vocabs[p_(cg)[1]], None)
stream = Merge([src_dataset.get_example_stream(),
trg_dataset.get_example_stream()],
('source', 'target'))
stream = Mapping(stream, _oov_to_unk(
src_vocab_size=config['src_vocab_sizes'][eid],
trg_vocab_size=config['trg_vocab_sizes'][did],
unk_id=config['unk_id']))
bs = 100
if 'log_prob_bs' in config:
if isinstance(config['log_prob_bs'], dict):
bs = config['log_prob_bs'][cg]
else:
bs = config['log_prob_bs']
stream = Batch(
stream,
iteration_scheme=ConstantScheme(
bs, num_examples=get_num_lines(datasets[cg][0])))
masked_stream = Padding(stream)
masked_stream = Mapping(
masked_stream, _remapWordIdx(
[(0, 0, config['src_eos_idxs'][eid]),
(2, 0, config['trg_eos_idxs'][did])]))
ind_streams[cg] = masked_stream
return ind_streams
示例14: get_log_prob_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def get_log_prob_stream(cg, config):
eid, did = p_(cg)
dataset = config['log_prob_sets'][cg]
# Prepare source vocabs and files, make sure special tokens are there
src_vocab = cPickle.load(open(config['src_vocabs'][eid]))
src_vocab['<S>'] = 0
src_vocab['</S>'] = config['src_eos_idxs'][eid]
src_vocab['<UNK>'] = config['unk_id']
# Prepare target vocabs and files, make sure special tokens are there
trg_vocab = cPickle.load(open(config['trg_vocabs'][did]))
trg_vocab['<S>'] = 0
trg_vocab['</S>'] = config['trg_eos_idxs'][did]
trg_vocab['<UNK>'] = config['unk_id']
# Build the preprocessing pipeline for individual streams
logger.info('Building logprob stream for cg:[{}]'.format(cg))
src_dataset = TextFile([dataset[0]], src_vocab, None)
trg_dataset = TextFile([dataset[1]], trg_vocab, None)
stream = Merge([src_dataset.get_example_stream(),
trg_dataset.get_example_stream()],
('source', 'target'))
stream = Mapping(stream, _oov_to_unk(
src_vocab_size=config['src_vocab_sizes'][eid],
trg_vocab_size=config['trg_vocab_sizes'][did],
unk_id=config['unk_id']))
bs = 100
if 'log_prob_bs' in config:
if isinstance(config['log_prob_bs'], dict):
bs = config['log_prob_bs'][cg]
else:
bs = config['log_prob_bs']
stream = Batch(
stream,
iteration_scheme=ConstantScheme(
bs, num_examples=get_num_lines(dataset[0])))
masked_stream = Padding(stream)
masked_stream = Mapping(
masked_stream, _remapWordIdx(
[(0, 0, config['src_eos_idxs'][eid]),
(2, 0, config['trg_eos_idxs'][did])]))
return masked_stream
示例15: get_tr_stream
# 需要導入模塊: from fuel import schemes [as 別名]
# 或者: from fuel.schemes import ConstantScheme [as 別名]
def get_tr_stream(src_vocab, trg_vocab, src_data, trg_data,
src_vocab_size=30000, trg_vocab_size=30000, unk_id=1,
seq_len=50, batch_size=80, sort_k_batches=12, **kwargs):
"""Prepares the training data stream."""
# Load dictionaries and ensure special tokens exist
src_vocab = _ensure_special_tokens(
src_vocab if isinstance(src_vocab, dict)
else cPickle.load(open(src_vocab)),
bos_idx=0, eos_idx=src_vocab_size - 1, unk_idx=unk_id)
trg_vocab = _ensure_special_tokens(
trg_vocab if isinstance(trg_vocab, dict) else
cPickle.load(open(trg_vocab)),
bos_idx=0, eos_idx=trg_vocab_size - 1, unk_idx=unk_id)
# Get text files from both source and target
src_dataset = TextFile([src_data], src_vocab, None)
trg_dataset = TextFile([trg_data], trg_vocab, None)
# Merge them to get a source, target pair
stream = Merge([src_dataset.get_example_stream(),
trg_dataset.get_example_stream()],
('source', 'target'))
# Filter sequences that are too long
stream = Filter(stream,
predicate=_too_long(seq_len=seq_len))
# Replace out of vocabulary tokens with unk token
stream = Mapping(stream,
_oov_to_unk(src_vocab_size=src_vocab_size,
trg_vocab_size=trg_vocab_size,
unk_id=unk_id))
# Build a batched version of stream to read k batches ahead
stream = Batch(stream,
iteration_scheme=ConstantScheme(
batch_size*sort_k_batches))
# Sort all samples in the read-ahead batch
stream = Mapping(stream, SortMapping(_length))
# Convert it into a stream again
stream = Unpack(stream)
# Construct batches from the stream with specified batch size
stream = Batch(
stream, iteration_scheme=ConstantScheme(batch_size))
# Pad sequences that are short
masked_stream = PaddingWithEOS(
stream, [src_vocab_size - 1, trg_vocab_size - 1])
return masked_stream