本文整理汇总了Python中fuel.streams.DataStream方法的典型用法代码示例。如果您正苦于以下问题:Python streams.DataStream方法的具体用法?Python streams.DataStream怎么用?Python streams.DataStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fuel.streams
的用法示例。
在下文中一共展示了streams.DataStream方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_imgs
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def load_imgs(ntrain=None, ntest=None, batch_size=128, data_file=None):
t = time()
print('LOADING DATASET...')
path = os.path.join(data_file)
tr_data = H5PYDataset(path, which_sets=('train',))
te_data = H5PYDataset(path, which_sets=('test',))
if ntrain is None:
ntrain = tr_data.num_examples
else:
ntrain = min(ntrain, tr_data.num_examples)
if ntest is None:
ntest = te_data.num_examples
else:
ntest = min(ntest, te_data.num_examples)
print('name = %s, ntrain = %d, ntest = %d' % (data_file, ntrain, ntest))
tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size)
tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme)
te_scheme = ShuffledScheme(examples=ntest, batch_size=batch_size)
te_stream = DataStream(te_data, iteration_scheme=te_scheme)
print('%.2f secs to load data' % (time() - t))
return tr_data, te_data, tr_stream, te_stream, ntrain, ntest
示例2: load_imgs_seq
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def load_imgs_seq(ntrain=None, ntest=None, batch_size=128, data_file=None):
t = time()
print('LOADING DATASET...')
path = os.path.join(data_file)
tr_data = H5PYDataset(path, which_sets=('train',))
te_data = H5PYDataset(path, which_sets=('test',))
if ntrain is None:
ntrain = tr_data.num_examples
if ntest is None:
ntest = te_data.num_examples
tr_scheme = SequentialScheme(examples=ntrain, batch_size=batch_size)
tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme)
te_scheme = SequentialScheme(examples=ntest, batch_size=batch_size)
te_stream = DataStream(te_data, iteration_scheme=te_scheme)
print('name = %s, ntrain = %d, ntest = %d' % (data_file, ntrain, ntest))
print('%.2f seconds to load data' % (time() - t))
return tr_data, te_data, tr_stream, te_stream, ntrain, ntest
示例3: faces
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def faces(ntrain=None, nval=None, ntest=None, batch_size=128):
path = os.path.join(data_dir, 'faces_364293_128px.hdf5')
tr_data = H5PYDataset(path, which_sets=('train',))
te_data = H5PYDataset(path, which_sets=('test',))
if ntrain is None:
ntrain = tr_data.num_examples
if ntest is None:
ntest = te_data.num_examples
if nval is None:
nval = te_data.num_examples
tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size)
tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme)
te_scheme = SequentialScheme(examples=ntest, batch_size=batch_size)
te_stream = DataStream(te_data, iteration_scheme=te_scheme)
val_scheme = SequentialScheme(examples=nval, batch_size=batch_size)
val_stream = DataStream(tr_data, iteration_scheme=val_scheme)
return tr_data, te_data, tr_stream, val_stream, te_stream
示例4: setup_datastream
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def setup_datastream(path, vocab_file, config):
ds = QADataset(path, vocab_file, config.n_entities, need_sep_token=config.concat_ctx_and_question)
it = QAIterator(path, shuffle=config.shuffle_questions)
stream = DataStream(ds, iteration_scheme=it)
if config.concat_ctx_and_question:
stream = ConcatCtxAndQuestion(stream, config.concat_question_before, ds.reverse_vocab['<SEP>'])
# Sort sets of multiple batches to make batches of similar sizes
stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size * config.sort_batch_count))
comparison = _balanced_batch_helper(stream.sources.index('question' if config.concat_ctx_and_question else 'context'))
stream = Mapping(stream, SortMapping(comparison))
stream = Unpack(stream)
stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size))
stream = Padding(stream, mask_sources=['context', 'question', 'candidates'], mask_dtype='int32')
return ds, stream
示例5: setup_datastream
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def setup_datastream(path, batch_size, sort_batch_count, valid=False):
A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))
D = [B[x[0]:x[1], 2] for x in C]
ds = IndexableDataset({'input': A, 'output': D})
stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))
stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count))
comparison = _balanced_batch_helper(stream.sources.index('input'))
stream = Mapping(stream, SortMapping(comparison))
stream = Unpack(stream)
stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A)))
stream = Padding(stream, mask_sources=['input', 'output'])
return ds, stream
示例6: get_dev_streams
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def get_dev_streams(config):
"""Setup development set stream if necessary."""
dev_streams = {}
for cg in config['cgs']:
if 'val_sets' in config and cg in config['val_sets']:
logger.info('Building development stream for cg:[{}]'.format(cg))
eid = p_(cg)[0]
dev_file = config['val_sets'][cg]
# Get dictionary and fix EOS
dictionary = cPickle.load(open(config['src_vocabs'][eid]))
dictionary['<S>'] = 0
dictionary['<UNK>'] = config['unk_id']
dictionary['</S>'] = config['src_eos_idxs'][eid]
# Get as a text file and convert it into a stream
dev_dataset = TextFile([dev_file], dictionary, None)
dev_streams[cg] = DataStream(dev_dataset)
return dev_streams
示例7: get_stream
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def get_stream(hdf5_file, which_set, batch_size=None):
dataset = H5PYDataset(
hdf5_file, which_sets=(which_set,), load_in_memory=True)
if batch_size == None:
batch_size = dataset.num_examples
stream = DataStream(dataset=dataset, iteration_scheme=ShuffledScheme(
examples=dataset.num_examples, batch_size=batch_size))
# Required because Recurrent bricks receive as input [sequence, batch,
# features]
return Mapping(stream, transpose_stream)
示例8: test_mean_aggregator
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_mean_aggregator():
num_examples = 4
batch_size = 2
features = numpy.array([[0, 3],
[2, 9],
[2, 4],
[5, 1]], dtype=theano.config.floatX)
dataset = IndexableDataset(OrderedDict([('features', features)]))
data_stream = DataStream(dataset,
iteration_scheme=SequentialScheme(num_examples,
batch_size))
x = tensor.matrix('features')
y = (x**2).mean(axis=0)
y.name = 'y'
z = y.sum()
z.name = 'z'
y.tag.aggregation_scheme = Mean(y, 1.)
z.tag.aggregation_scheme = Mean(z, 1.)
assert_allclose(DatasetEvaluator([y]).evaluate(data_stream)['y'],
numpy.array([8.25, 26.75], dtype=theano.config.floatX))
assert_allclose(DatasetEvaluator([z]).evaluate(data_stream)['z'],
numpy.array([35], dtype=theano.config.floatX))
示例9: get_example_stream
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def get_example_stream(self):
return DataStream(self, iteration_scheme=self.example_iteration_scheme)
示例10: setUp
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def setUp(self):
self.data = [1, 2, 3]
self.stream = DataStream(IterableDataset(self.data))
示例11: test_default_transformer
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_default_transformer(self):
class DoublingDataset(IterableDataset):
def apply_default_transformer(self, stream):
return Mapping(
stream, lambda sources: tuple(2 * s for s in sources))
dataset = DoublingDataset(self.data)
stream = dataset.apply_default_transformer(DataStream(dataset))
assert_equal(list(stream.get_epoch_iterator()), [(2,), (4,), (6,)])
示例12: test_sources_selection
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_sources_selection():
features = [5, 6, 7, 1]
targets = [1, 0, 1, 1]
stream = DataStream(IterableDataset(OrderedDict(
[('features', features), ('targets', targets)])))
assert list(stream.get_epoch_iterator()) == list(zip(features, targets))
stream = DataStream(IterableDataset(
{'features': features, 'targets': targets},
sources=('targets',)))
assert list(stream.get_epoch_iterator()) == list(zip(targets))
示例13: test_sources_setter
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_sources_setter(self):
stream = DataStream(self.dataset)
stream.sources = ('features',)
assert_equal(stream.sources, ('features',))
示例14: test_no_axis_labels
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_no_axis_labels(self):
stream = DataStream(self.dataset)
assert stream.axis_labels is None
示例15: test_axis_labels_on_produces_examples
# 需要导入模块: from fuel import streams [as 别名]
# 或者: from fuel.streams import DataStream [as 别名]
def test_axis_labels_on_produces_examples(self):
axis_labels = {'data': ('batch', 'features')}
self.dataset.axis_labels = axis_labels
stream = DataStream(self.dataset)
assert_equal(stream.axis_labels, {'data': ('features',)})