本文整理汇总了Python中cntk.io.MinibatchSource.next_minibatch方法的典型用法代码示例。如果您正苦于以下问题:Python MinibatchSource.next_minibatch方法的具体用法?Python MinibatchSource.next_minibatch怎么用?Python MinibatchSource.next_minibatch使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cntk.io.MinibatchSource
的用法示例。
在下文中一共展示了MinibatchSource.next_minibatch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_max_samples
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_max_samples(tmpdir):
mb_source = MinibatchSource(
create_ctf_deserializer(tmpdir), max_samples=1)
input_map = {'features': mb_source['features']}
mb = mb_source.next_minibatch(10, input_map)
assert 'features' in mb
assert mb['features'].num_samples == 1
assert not mb['features'].end_of_sweep
mb = mb_source.next_minibatch(10, input_map)
assert not mb
示例2: test_text_format
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_text_format(tmpdir):
tmpfile = _write_data(tmpdir, MBDATA_SPARSE)
input_dim = 1000
num_output_classes = 5
mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
features=StreamDef(field='x', shape=input_dim, is_sparse=True),
labels=StreamDef(field='y', shape=num_output_classes, is_sparse=False)
)), randomize=False)
assert isinstance(mb_source, MinibatchSource)
features_si = mb_source.stream_info('features')
labels_si = mb_source.stream_info('labels')
mb = mb_source.next_minibatch(7)
features = mb[features_si]
# 2 samples, max seq len 4, 1000 dim
assert features.shape == (2, 4, input_dim)
assert features.end_of_sweep
assert features.num_sequences == 2
assert features.num_samples == 7
assert features.is_sparse
labels = mb[labels_si]
# 2 samples, max seq len 1, 5 dim
assert labels.shape == (2, 1, num_output_classes)
assert labels.end_of_sweep
assert labels.num_sequences == 2
assert labels.num_samples == 2
assert not labels.is_sparse
label_data = labels.asarray()
assert np.allclose(label_data,
np.asarray([
[[1., 0., 0., 0., 0.]],
[[0., 1., 0., 0., 0.]]
]))
mb = mb_source.next_minibatch(1)
features = mb[features_si]
labels = mb[labels_si]
assert not features.end_of_sweep
assert not labels.end_of_sweep
assert features.num_samples < 7
assert labels.num_samples == 1
示例3: test_large_minibatch
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_large_minibatch(tmpdir):
tmpfile = _write_data(tmpdir, MBDATA_DENSE_2)
mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
features = StreamDef(field='S0', shape=1),
labels = StreamDef(field='S1', shape=1))),
randomization_window_in_chunks=0)
features_si = mb_source.stream_info('features')
labels_si = mb_source.stream_info('labels')
mb = mb_source.next_minibatch(1000)
features = mb[features_si]
labels = mb[labels_si]
# Actually, the minibatch spans over multiple sweeps,
# not sure if this is an artificial situation, but
# maybe instead of a boolean flag we should indicate
# the largest sweep index the data was taken from.
assert features.end_of_sweep
assert labels.end_of_sweep
assert features.num_samples == 1000 - 1000 % 7
assert labels.num_samples == 5 * (1000 // 7)
assert mb[features_si].num_sequences == (1000 // 7)
assert mb[labels_si].num_sequences == (1000 // 7)
示例4: test_MinibatchData_and_Value_as_input
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_MinibatchData_and_Value_as_input(tmpdir):
mbdata = r'''0 |S0 100'''
tmpfile = str(tmpdir/'mbtest.txt')
with open(tmpfile, 'w') as f:
f.write(mbdata)
defs = StreamDefs(f1 = StreamDef(field='S0', shape=1))
mb_source = MinibatchSource(CTFDeserializer(tmpfile, defs),
randomize=False)
f1_si = mb_source.stream_info('f1')
mb = mb_source.next_minibatch(1)
f1 = input_variable(shape=(1,),
needs_gradient=True,
name='f')
res = f1 * 2
assert res.eval({f1: mb[f1_si]}) == [[200]]
# Test MinibatchData
assert res.eval(mb[f1_si]) == [[200]]
# Test Value
assert res.eval(mb[f1_si].data) == [[200]]
# Test NumPy (converted back from MinibatchData)
assert res.eval(mb[f1_si].value) == [[200]]
# Test Value
assert res.eval(mb[f1_si].data) == [[200]]
示例5: test_eval_sparse_dense
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_eval_sparse_dense(tmpdir, device_id):
from cntk import Axis
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
from cntk.device import cpu, gpu, set_default_device
from cntk.ops import input_variable, times
from scipy.sparse import csr_matrix
input_vocab_dim = label_vocab_dim = 69
ctf_data = '''\
0 |S0 3:1 |# <s> |S1 3:1 |# <s>
0 |S0 4:1 |# A |S1 32:1 |# ~AH
0 |S0 5:1 |# B |S1 36:1 |# ~B
0 |S0 4:1 |# A |S1 31:1 |# ~AE
0 |S0 7:1 |# D |S1 38:1 |# ~D
0 |S0 12:1 |# I |S1 47:1 |# ~IY
0 |S0 1:1 |# </s> |S1 1:1 |# </s>
2 |S0 60:1 |# <s> |S1 3:1 |# <s>
2 |S0 61:1 |# A |S1 32:1 |# ~AH
'''
ctf_file = str(tmpdir/'2seqtest.txt')
with open(ctf_file, 'w') as f:
f.write(ctf_data)
mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True),
labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True)
)), randomize=False, epoch_size = 2)
batch_axis = Axis.default_batch_axis()
input_seq_axis = Axis('inputAxis')
label_seq_axis = Axis('labelAxis')
input_dynamic_axes = [batch_axis, input_seq_axis]
raw_input = input_variable(
shape=input_vocab_dim, dynamic_axes=input_dynamic_axes,
name='raw_input', is_sparse=True)
mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100,
input_map={raw_input : mbs.streams.features})
z = times(raw_input, np.eye(input_vocab_dim))
e_reader = z.eval(mb_valid)
# CSR with the raw_input encoding in ctf_data
one_hot_data = [
[3, 4, 5, 4, 7, 12, 1],
[60, 61]
]
data = [csr_matrix(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in
one_hot_data]
e_csr = z.eval({raw_input: data}, device=cntk_device(device_id))
assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)])
# One-hot with the raw_input encoding in ctf_data
data = one_hot(one_hot_data, num_classes=input_vocab_dim)
e_hot = z.eval({raw_input: data}, device=cntk_device(device_id))
assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
示例6: test_mlf_binary_files
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_mlf_binary_files():
os.chdir(data_path)
feature_dim = 33
num_classes = 132
context = 2
features_file = "glob_0000.scp"
fd = HTKFeatureDeserializer(StreamDefs(
amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))
ld = HTKMLFBinaryDeserializer(StreamDefs(awesome_labels = StreamDef(shape=num_classes, mlf=e2e_data_path + "mlf2.bin")))
# Make sure we can read at least one minibatch.
mbsource = MinibatchSource([fd,ld])
mbsource.next_minibatch(1)
os.chdir(abs_path)
示例7: compare_cbf_and_ctf
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def compare_cbf_and_ctf(num_mbs, mb_size, randomize):
ctf = MinibatchSource(CTFDeserializer(tmpfile, streams), randomize=randomize)
cbf = MinibatchSource(CBFDeserializer(tmpfile+'.bin', streams), randomize=randomize)
ctf_stream_names = sorted([x.m_name for x in ctf.stream_infos()])
cbf_stream_names = sorted([x.m_name for x in cbf.stream_infos()])
assert(ctf_stream_names == cbf_stream_names)
for _ in range(num_mbs):
ctf_mb = ctf.next_minibatch(mb_size, device=device)
cbf_mb = cbf.next_minibatch(mb_size, device=device)
for name in cbf_stream_names:
ctf_data = ctf_mb[ctf[name]]
cbf_data = cbf_mb[cbf[name]]
assert ctf_data.num_samples == cbf_data.num_samples
assert ctf_data.num_sequences == cbf_data.num_sequences
assert ctf_data.shape == cbf_data.shape
assert ctf_data.end_of_sweep == cbf_data.end_of_sweep
assert ctf_data.is_sparse == cbf_data.is_sparse
assert ctf_data.data.masked_count() == cbf_data.data.masked_count()
# XXX:
# assert(ctf_data.asarray() == cbf_data.asarray()).all()
# not using asarray because for sparse values it fails with
# some strange exception "sum of the rank of the mask and Variable
#rank does not equal the Value's rank".
assert C.cntk_py.are_equal(ctf_data.data.data, cbf_data.data.data)
if (ctf_data.data.masked_count() > 0):
assert (ctf_data.data.mask == cbf_data.data.mask).all()
# XXX: if mask_count is zero, mb_data.data.mask fails with
# "AttributeError: 'Value' object has no attribute 'mask'"!
# XXX: without invoking erase, next_minibatch will fail with:
# "Resize: Cannot resize the matrix because it is a view."
ctf_data.data.erase()
cbf_data.data.erase()
示例8: test_max_samples_over_several_sweeps
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_max_samples_over_several_sweeps(tmpdir):
mb_source = MinibatchSource(
create_ctf_deserializer(tmpdir), max_samples=11)
input_map = {'features': mb_source['features']}
for i in range(2):
mb = mb_source.next_minibatch(5, input_map)
assert 'features' in mb
assert mb['features'].num_samples == 5
assert mb['features'].end_of_sweep
mb = mb_source.next_minibatch(5, input_map)
assert 'features' in mb
assert mb['features'].num_samples == 1
assert not mb['features'].end_of_sweep
mb = mb_source.next_minibatch(1, input_map)
assert not mb
示例9: test_max_sweeps
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_max_sweeps(tmpdir):
# set max sweeps to 3 (12 samples altogether).
mb_source = MinibatchSource(
create_ctf_deserializer(tmpdir), max_sweeps=3)
input_map = {'features': mb_source['features']}
for i in range(2):
mb = mb_source.next_minibatch(5, input_map)
assert 'features' in mb
assert mb['features'].num_samples == 5
assert mb['features'].end_of_sweep
mb = mb_source.next_minibatch(5, input_map)
assert 'features' in mb
assert mb['features'].num_samples == 2
assert mb['features'].end_of_sweep
mb = mb_source.next_minibatch(1, input_map)
assert not mb
示例10: test_htk_deserializers
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_htk_deserializers():
mbsize = 640
epoch_size = 1000 * mbsize
lr = [0.001]
feature_dim = 33
num_classes = 132
context = 2
os.chdir(data_path)
features_file = "glob_0000.scp"
labels_file = "glob_0000.mlf"
label_mapping_file = "state.list"
fd = HTKFeatureDeserializer(StreamDefs(
amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))
ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))
reader = MinibatchSource([fd,ld])
features = C.input_variable(((2*context+1)*feature_dim))
labels = C.input_variable((num_classes))
model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
Dense(num_classes)])
z = model(features)
ce = C.cross_entropy_with_softmax(z, labels)
errs = C.classification_error (z, labels)
learner = C.adam_sgd(z.parameters,
lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
momentum=C.momentum_as_time_constant_schedule(1000),
low_memory=True,
gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
trainer = C.Trainer(z, (ce, errs), learner)
input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }
pp = C.ProgressPrinter(freq=0)
# just run and verify it doesn't crash
for i in range(3):
mb_data = reader.next_minibatch(mbsize, input_map=input_map)
trainer.train_minibatch(mb_data)
pp.update_with_trainer(trainer, with_metric=True)
assert True
os.chdir(abs_path)
示例11: test_multiple_mlf_files
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_multiple_mlf_files():
os.chdir(data_path)
feature_dim = 33
num_classes = 132
context = 2
test_mlf_path = e2e_data_path+"glob_00001.mlf"
features_file = "glob_0000.scp"
label_files = [ "glob_0000.mlf", test_mlf_path]
label_mapping_file = "state.list"
fd = HTKFeatureDeserializer(StreamDefs(
amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))
ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
awesome_labels = StreamDef(shape=num_classes, mlf=label_files)))
# Make sure we can read at least one minibatch.
mbsource = MinibatchSource([fd,ld])
mbsource.next_minibatch(1)
os.chdir(abs_path)
示例12: test_crop_dimensionality
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_crop_dimensionality(tmpdir):
import io; from PIL import Image
np.random.seed(1)
file_mapping_path = str(tmpdir / 'file_mapping.txt')
with open(file_mapping_path, 'w') as file_mapping:
for i in range(5):
data = np.random.randint(0, 2**8, (20, 40, 3))
image = Image.fromarray(data.astype('uint8'), "RGB")
buf = io.BytesIO()
image.save(buf, format='PNG')
assert image.width == 40 and image.height == 20
label = str(i)
# save to mapping + png file
file_name = label + '.png'
with open(str(tmpdir/file_name), 'wb') as f:
f.write(buf.getvalue())
file_mapping.write('.../%s\t%s\n' % (file_name, label))
transforms1 = [
xforms.scale(width=40, height=20, channels=3),
xforms.crop(crop_type='randomside',
crop_size=(20, 10), side_ratio=(0.2, 0.5),
jitter_type='uniratio')]
transforms2 = [
xforms.crop(crop_type='randomside',
crop_size=(20, 10), side_ratio=(0.2, 0.5),
jitter_type='uniratio')]
d1 = ImageDeserializer(file_mapping_path,
StreamDefs(
images1=StreamDef(field='image', transforms=transforms1),
labels1=StreamDef(field='label', shape=10)))
d2 = ImageDeserializer(file_mapping_path,
StreamDefs(
images2=StreamDef(field='image', transforms=transforms2),
labels2=StreamDef(field='label', shape=10)))
mbs = MinibatchSource([d1, d2])
for j in range(5):
mb = mbs.next_minibatch(1)
images1 = mb[mbs.streams.images1].asarray()
images2 = mb[mbs.streams.images2].asarray()
assert images1.shape == (1, 1, 3, 10, 20)
assert (images1 == images2).all()
示例13: test_multiple_streams_in_htk
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_multiple_streams_in_htk():
feature_dim = 33
context = 2
os.chdir(data_path)
features_file = "glob_0000.scp"
fd = HTKFeatureDeserializer(StreamDefs(
amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file),
amazing_features2 = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))
mbs = MinibatchSource([fd])
mb = mbs.next_minibatch(1)
assert (mb[mbs.streams.amazing_features].asarray() == mb[mbs.streams.amazing_features2].asarray()).all()
os.chdir(abs_path)
示例14: test_base64_is_equal_image
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_base64_is_equal_image(tmpdir):
import io, base64; from PIL import Image
np.random.seed(1)
file_mapping_path = str(tmpdir / 'file_mapping.txt')
base64_mapping_path = str(tmpdir / 'base64_mapping.txt')
with open(file_mapping_path, 'w') as file_mapping:
with open(base64_mapping_path, 'w') as base64_mapping:
for i in range(10):
data = np.random.randint(0, 2**8, (5,7,3))
image = Image.fromarray(data.astype('uint8'), "RGB")
buf = io.BytesIO()
image.save(buf, format='PNG')
assert image.width == 7 and image.height == 5
label = str(i)
# save to base 64 mapping file
encoded = base64.b64encode(buf.getvalue()).decode('ascii')
base64_mapping.write('%s\t%s\n' % (label, encoded))
# save to mapping + png file
file_name = label + '.png'
with open(str(tmpdir/file_name), 'wb') as f:
f.write(buf.getvalue())
file_mapping.write('.../%s\t%s\n' % (file_name, label))
transforms = [xforms.scale(width=7, height=5, channels=3)]
b64_deserializer = Base64ImageDeserializer(base64_mapping_path,
StreamDefs(
images1=StreamDef(field='image', transforms=transforms),
labels1=StreamDef(field='label', shape=10)))
file_image_deserializer = ImageDeserializer(file_mapping_path,
StreamDefs(
images2=StreamDef(field='image', transforms=transforms),
labels2=StreamDef(field='label', shape=10)))
mb_source = MinibatchSource([b64_deserializer, file_image_deserializer])
for j in range(20):
mb = mb_source.next_minibatch(1)
images1_stream = mb_source.streams['images1']
images1 = mb[images1_stream].asarray()
images2_stream = mb_source.streams['images2']
images2 = mb[images2_stream].asarray()
assert(images1 == images2).all()
示例15: test_full_sweep_minibatch
# 需要导入模块: from cntk.io import MinibatchSource [as 别名]
# 或者: from cntk.io.MinibatchSource import next_minibatch [as 别名]
def test_full_sweep_minibatch(tmpdir):
tmpfile = _write_data(tmpdir, MBDATA_DENSE_1)
mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
features = StreamDef(field='S0', shape=1),
labels = StreamDef(field='S1', shape=1))),
randomization_window_in_chunks=0, max_sweeps=1)
features_si = mb_source.stream_info('features')
labels_si = mb_source.stream_info('labels')
mb = mb_source.next_minibatch(1000)
assert mb[features_si].num_sequences == 2
assert mb[labels_si].num_sequences == 2
features = mb[features_si]
assert features.end_of_sweep
assert len(features.as_sequences()) == 2
expected_features = \
[
[[0], [1], [2], [3]],
[[4], [5], [6]]
]
for res, exp in zip(features.as_sequences(), expected_features):
assert np.allclose(res, exp)
assert np.allclose(features.data.mask,
[[2, 1, 1, 1],
[2, 1, 1, 0]])
labels = mb[labels_si]
assert labels.end_of_sweep
assert len(labels.as_sequences()) == 2
expected_labels = \
[
[[0],[1],[3]],
[[1],[2]]
]
for res, exp in zip(labels.as_sequences(), expected_labels):
assert np.allclose(res, exp)
assert np.allclose(labels.data.mask,
[[2, 1, 1],
[2, 1, 0]])