当前位置: 首页>>代码示例>>Python>>正文


Python io.MinibatchSource类代码示例

本文整理汇总了Python中cntk.io.MinibatchSource的典型用法代码示例。如果您正苦于以下问题:Python MinibatchSource类的具体用法?Python MinibatchSource怎么用?Python MinibatchSource使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MinibatchSource类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_large_minibatch

def test_large_minibatch(tmpdir):
    tmpfile = _write_data(tmpdir, MBDATA_DENSE_2)

    mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
        features  = StreamDef(field='S0', shape=1),
        labels    = StreamDef(field='S1', shape=1))),
        randomization_window_in_chunks=0)

    features_si = mb_source.stream_info('features')
    labels_si = mb_source.stream_info('labels')

    mb = mb_source.next_minibatch(1000)
    features = mb[features_si]
    labels = mb[labels_si]

    # Actually, the minibatch spans over multiple sweeps,
    # not sure if this is an artificial situation, but
    # maybe instead of a boolean flag we should indicate
    # the largest sweep index the data was taken from.
    assert features.end_of_sweep
    assert labels.end_of_sweep

    assert features.num_samples == 1000 - 1000 % 7
    assert labels.num_samples == 5 * (1000 // 7)

    assert mb[features_si].num_sequences == (1000 // 7)
    assert mb[labels_si].num_sequences == (1000 // 7)
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:27,代码来源:io_tests.py

示例2: test_MinibatchData_and_Value_as_input

def test_MinibatchData_and_Value_as_input(tmpdir):

    mbdata = r'''0  |S0 100'''

    tmpfile = str(tmpdir/'mbtest.txt')
    with open(tmpfile, 'w') as f:
        f.write(mbdata)

    defs = StreamDefs(f1 = StreamDef(field='S0', shape=1))
    mb_source = MinibatchSource(CTFDeserializer(tmpfile, defs),
                                randomize=False)

    f1_si = mb_source.stream_info('f1')

    mb = mb_source.next_minibatch(1)

    f1 = input_variable(shape=(1,),
                       needs_gradient=True,
                       name='f')
    res = f1 * 2

    assert res.eval({f1: mb[f1_si]}) == [[200]]
    # Test MinibatchData
    assert res.eval(mb[f1_si]) == [[200]]
    # Test Value
    assert res.eval(mb[f1_si].data) == [[200]]
    # Test NumPy (converted back from MinibatchData)
    assert res.eval(mb[f1_si].value) == [[200]]
    # Test Value
    assert res.eval(mb[f1_si].data) == [[200]]
开发者ID:BorisJineman,项目名称:CNTK,代码行数:30,代码来源:function_tests.py

示例3: test_eval_sparse_dense

def test_eval_sparse_dense(tmpdir, device_id):
    from cntk import Axis
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk.device import cpu, gpu, set_default_device
    from cntk.ops import input_variable, times
    from scipy.sparse import csr_matrix

    input_vocab_dim = label_vocab_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_vocab_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=label_vocab_dim,  is_sparse=True)
    )), randomize=False, epoch_size = 2)

    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(
        shape=input_vocab_dim, dynamic_axes=input_dynamic_axes,
        name='raw_input', is_sparse=True)

    mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, 
            input_map={raw_input : mbs.streams.features})

    z = times(raw_input, np.eye(input_vocab_dim))
    e_reader = z.eval(mb_valid)

    # CSR with the raw_input encoding in ctf_data
    one_hot_data = [
            [3, 4, 5, 4, 7, 12, 1], 
            [60, 61]
            ]
    data = [csr_matrix(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in
            one_hot_data]
    e_csr = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)])

    # One-hot with the raw_input encoding in ctf_data
    data = one_hot(one_hot_data, num_classes=input_vocab_dim)
    e_hot = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
开发者ID:jplu,项目名称:CNTK,代码行数:58,代码来源:trainer_test.py

示例4: test_user_deserializer_sequence_mode

def test_user_deserializer_sequence_mode():
    import scipy.sparse as sp
    streams = [StreamInformation('x', 0, 'dense', np.float32, (2, 3)), 
               StreamInformation('y', 1, 'sparse', np.float32, (3,))]

    def run_minibatch_source(minibatch_source, num_chunks, num_sequences_per_value):
        sequence_x_values = np.zeros(num_chunks, dtype=np.int32)
        sequence_y_values = np.zeros(num_chunks, dtype=np.int32)
        mb_count = 0
        while True:
            if mb_count % 10 == 1: # perform checkpointing
                checkpoint_state = minibatch_source.get_checkpoint_state()
                for i in range(3): 
                    minibatch_source.next_minibatch(20)
                minibatch_source.restore_from_checkpoint(checkpoint_state)
                mb_count +=1
                continue            

            mb = minibatch_source.next_minibatch(20)
            mb_count += 1
            if not mb:
                break

            for sequence in mb[minibatch_source.streams.x].asarray():
                sequence_x_values[int(sequence[0][0][0])] +=1

            for sequence in mb[minibatch_source.streams.y].as_sequences(C.sequence.input_variable((3,), True)):             
                sequence_y_values[int(sequence.toarray()[0][0])] += 1
            mb = None

        expected_values = np.full(num_chunks, fill_value=num_sequences_per_value, dtype=np.int32)
        assert (sequence_x_values == expected_values).all()
        assert (sequence_y_values == expected_values).all()

    # Big chunks
    d = GenDeserializer(stream_infos=streams, num_chunks=15, 
                        num_sequences=100, max_sequence_len=10)
    mbs = MinibatchSource([d], randomize=False, max_sweeps=2)
    state = mbs.get_checkpoint_state()
    mbs.restore_from_checkpoint(state)
    run_minibatch_source(mbs, num_chunks=15, num_sequences_per_value=200)
    # Randomized
    mbs = MinibatchSource([d], randomize=True, max_sweeps=2, randomization_window_in_chunks=5)
    state = mbs.get_checkpoint_state()
    mbs.restore_from_checkpoint(state)
    run_minibatch_source(mbs, num_chunks=15, num_sequences_per_value=200)

    # Small chunks of 1
    d = GenDeserializer(stream_infos=streams, num_chunks=15,
                        num_sequences=1, max_sequence_len=10)
    mbs = MinibatchSource([d], randomize=False, max_sweeps=3)
    run_minibatch_source(mbs, num_chunks=15, num_sequences_per_value=3)
    # Randomized
    mbs = MinibatchSource([d], randomize=True, max_sweeps=3, randomization_window_in_chunks=5)
    run_minibatch_source(mbs, num_chunks=15, num_sequences_per_value=3)
开发者ID:AllanYiin,项目名称:CNTK,代码行数:55,代码来源:io_tests.py

示例5: test_max_samples

def test_max_samples(tmpdir):
    mb_source = MinibatchSource(
        create_ctf_deserializer(tmpdir), max_samples=1)

    input_map = {'features': mb_source['features']}
    mb = mb_source.next_minibatch(10, input_map)

    assert 'features' in mb
    assert mb['features'].num_samples == 1
    assert not mb['features'].end_of_sweep

    mb = mb_source.next_minibatch(10, input_map)

    assert not mb
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:14,代码来源:io_tests.py

示例6: test_htk_deserializers

def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.input_variable(((2*context+1)*feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.adam_sgd(z.parameters,
                    lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
开发者ID:rlugojr,项目名称:CNTK,代码行数:49,代码来源:htk_deserializer_test.py

示例7: test_text_format

def test_text_format(tmpdir):
    tmpfile = _write_data(tmpdir, MBDATA_SPARSE)

    input_dim = 1000
    num_output_classes = 5

    mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
        features=StreamDef(field='x', shape=input_dim, is_sparse=True),
        labels=StreamDef(field='y', shape=num_output_classes, is_sparse=False)
    )), randomize=False)

    assert isinstance(mb_source, MinibatchSource)

    features_si = mb_source.stream_info('features')
    labels_si = mb_source.stream_info('labels')

    mb = mb_source.next_minibatch(7)

    features = mb[features_si]
    # 2 samples, max seq len 4, 1000 dim
    assert features.shape == (2, 4, input_dim)
    assert features.end_of_sweep
    assert features.num_sequences == 2
    assert features.num_samples == 7
    assert features.is_sparse

    labels = mb[labels_si]
    # 2 samples, max seq len 1, 5 dim
    assert labels.shape == (2, 1, num_output_classes)
    assert labels.end_of_sweep
    assert labels.num_sequences == 2
    assert labels.num_samples == 2
    assert not labels.is_sparse

    label_data = labels.asarray()
    assert np.allclose(label_data,
                       np.asarray([
                           [[1.,  0.,  0.,  0.,  0.]],
                           [[0.,  1.,  0.,  0.,  0.]]
                       ]))

    mb = mb_source.next_minibatch(1)
    features = mb[features_si]
    labels = mb[labels_si]

    assert not features.end_of_sweep
    assert not labels.end_of_sweep
    assert features.num_samples < 7
    assert labels.num_samples == 1
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:49,代码来源:io_tests.py

示例8: test_crop_dimensionality

def test_crop_dimensionality(tmpdir):
    import io; from PIL import Image
    np.random.seed(1)

    file_mapping_path = str(tmpdir / 'file_mapping.txt')
    with open(file_mapping_path, 'w') as file_mapping:
        for i in range(5):
            data = np.random.randint(0, 2**8, (20, 40, 3))
            image = Image.fromarray(data.astype('uint8'), "RGB")
            buf = io.BytesIO()
            image.save(buf, format='PNG')
            assert image.width == 40 and image.height == 20
            
            label = str(i) 
            # save to mapping + png file
            file_name = label + '.png'
            with open(str(tmpdir/file_name), 'wb') as f:
                f.write(buf.getvalue())
            file_mapping.write('.../%s\t%s\n' % (file_name, label))

    transforms1 = [
        xforms.scale(width=40, height=20, channels=3),
        xforms.crop(crop_type='randomside', 
                    crop_size=(20, 10), side_ratio=(0.2, 0.5),
                    jitter_type='uniratio')]

    transforms2 = [
        xforms.crop(crop_type='randomside', 
                    crop_size=(20, 10), side_ratio=(0.2, 0.5),
                    jitter_type='uniratio')]

    d1 = ImageDeserializer(file_mapping_path,
        StreamDefs(
            images1=StreamDef(field='image', transforms=transforms1),
            labels1=StreamDef(field='label', shape=10)))

    d2 = ImageDeserializer(file_mapping_path,
        StreamDefs(
            images2=StreamDef(field='image', transforms=transforms2),
            labels2=StreamDef(field='label', shape=10)))

    mbs = MinibatchSource([d1, d2])
    for j in range(5):
        mb = mbs.next_minibatch(1)
        images1 = mb[mbs.streams.images1].asarray()
        images2 = mb[mbs.streams.images2].asarray()
        assert images1.shape == (1, 1, 3, 10, 20)
        assert (images1 == images2).all()
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:48,代码来源:io_tests.py

示例9: test_multiple_streams_in_htk

def test_multiple_streams_in_htk():
    feature_dim = 33
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file),
        amazing_features2 = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    mbs = MinibatchSource([fd])
    mb = mbs.next_minibatch(1)
    assert (mb[mbs.streams.amazing_features].asarray() == mb[mbs.streams.amazing_features2].asarray()).all()
    os.chdir(abs_path)
开发者ID:fly-fisher,项目名称:CNTK,代码行数:16,代码来源:htk_deserializer_test.py

示例10: test_base64_is_equal_image

def test_base64_is_equal_image(tmpdir):
    import io, base64; from PIL import Image
    np.random.seed(1)

    file_mapping_path = str(tmpdir / 'file_mapping.txt')
    base64_mapping_path = str(tmpdir / 'base64_mapping.txt')

    with open(file_mapping_path, 'w') as file_mapping:
        with open(base64_mapping_path, 'w') as base64_mapping:
            for i in range(10):
                data = np.random.randint(0, 2**8, (5,7,3))
                image = Image.fromarray(data.astype('uint8'), "RGB")
                buf = io.BytesIO()
                image.save(buf, format='PNG')
                assert image.width == 7 and image.height == 5
                
                label = str(i) 
                # save to base 64 mapping file
                encoded = base64.b64encode(buf.getvalue()).decode('ascii')
                base64_mapping.write('%s\t%s\n' % (label, encoded))
         
                # save to mapping + png file
                file_name = label + '.png'
                with open(str(tmpdir/file_name), 'wb') as f:
                    f.write(buf.getvalue())
                file_mapping.write('.../%s\t%s\n' % (file_name, label))

    transforms = [xforms.scale(width=7, height=5, channels=3)]
    b64_deserializer = Base64ImageDeserializer(base64_mapping_path,
        StreamDefs(
            images1=StreamDef(field='image', transforms=transforms),
            labels1=StreamDef(field='label', shape=10)))

    file_image_deserializer = ImageDeserializer(file_mapping_path,
        StreamDefs(
            images2=StreamDef(field='image', transforms=transforms),
            labels2=StreamDef(field='label', shape=10)))

    mb_source = MinibatchSource([b64_deserializer, file_image_deserializer])
    for j in range(20):
        mb = mb_source.next_minibatch(1)

        images1_stream = mb_source.streams['images1']
        images1 = mb[images1_stream].asarray()
        images2_stream = mb_source.streams['images2']
        images2 = mb[images2_stream].asarray()
        assert(images1 == images2).all()
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:47,代码来源:io_tests.py

示例11: test_full_sweep_minibatch

def test_full_sweep_minibatch(tmpdir):
    tmpfile = _write_data(tmpdir, MBDATA_DENSE_1)

    mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
        features  = StreamDef(field='S0', shape=1),
        labels    = StreamDef(field='S1', shape=1))),
        randomization_window_in_chunks=0, max_sweeps=1)

    features_si = mb_source.stream_info('features')
    labels_si = mb_source.stream_info('labels')

    mb = mb_source.next_minibatch(1000)

    assert mb[features_si].num_sequences == 2
    assert mb[labels_si].num_sequences == 2

    features = mb[features_si]
    assert features.end_of_sweep
    assert len(features.as_sequences()) == 2
    expected_features = \
        [
            [[0], [1], [2], [3]],
            [[4], [5], [6]]
        ]

    for res, exp in zip(features.as_sequences(), expected_features):
        assert np.allclose(res, exp)

    assert np.allclose(features.data.mask,
            [[2, 1, 1, 1],
             [2, 1, 1, 0]])

    labels = mb[labels_si]
    assert labels.end_of_sweep
    assert len(labels.as_sequences()) == 2
    expected_labels = \
            [
                [[0],[1],[3]],
                [[1],[2]]
            ]
    for res, exp in zip(labels.as_sequences(), expected_labels):
        assert np.allclose(res, exp)

    assert np.allclose(labels.data.mask,
            [[2, 1, 1],
             [2, 1, 0]])
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:46,代码来源:io_tests.py

示例12: test_mlf_binary_files

def test_mlf_binary_files():
    os.chdir(data_path)

    feature_dim = 33
    num_classes = 132
    context = 2

    features_file = "glob_0000.scp"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFBinaryDeserializer(StreamDefs(awesome_labels = StreamDef(shape=num_classes, mlf=e2e_data_path + "mlf2.bin")))

    # Make sure we can read at least one minibatch.
    mbsource = MinibatchSource([fd,ld])
    mbsource.next_minibatch(1)

    os.chdir(abs_path)
开发者ID:fly-fisher,项目名称:CNTK,代码行数:19,代码来源:htk_deserializer_test.py

示例13: test_prefetch_with_unpacking

def test_prefetch_with_unpacking(tmpdir):
    data = r'''0  |S0 1 1 1 1   |S1 1000
1   |S0 2 2 2 2  |S1 100
2   |S0 3 3 3 3  |S1 100
3   |S0 1 1 1 1  |S1 10
4   |S0 2 2 2 2  |S1 1
5   |S0 3 3 3 3  |S1 2000
6   |S0 1 1 1 1  |S1 200
7   |S0 2 2 2 2  |S1 200
8   |S0 3 3 3 3  |S1 20
9   |S0 1 1 1 1  |S1 2
'''
    import time
    tmpfile = _write_data(tmpdir, data)

    input_dim = 4
    num_output_classes = 1

    mb_source = MinibatchSource(CTFDeserializer(tmpfile, StreamDefs(
        features=StreamDef(field='S0', shape=input_dim, is_sparse=False),
        labels=StreamDef(field='S1', shape=num_output_classes, is_sparse=False)
    )), randomize=False, max_samples=FULL_DATA_SWEEP)

    input_map = { 'S0' : mb_source.streams.features, 'S1' : mb_source.streams.labels }
    empty = False
    mb_size = 3
    # On the last minibatch there will be resize called, 
    # due to 10%3 = 1 sample  in the minibatch
    while not empty:
        mb = mb_source.next_minibatch(mb_size, input_map=input_map)
        time.sleep(1) # make sure the prefetch kicks in
        if mb:
            # Force unpacking to check that we do 
            # not break prefetch 
            actual_size = mb['S0'].shape[0]
            assert (mb['S0'].asarray() == np.array([[[1, 1, 1, 1]],
                                                    [[2, 2, 2, 2]],
                                                    [[3, 3, 3, 3]]], dtype=np.float32)[0:actual_size]).all()
        else:
            empty = True
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:40,代码来源:io_tests.py

示例14: test_max_samples_over_several_sweeps

def test_max_samples_over_several_sweeps(tmpdir):
    mb_source = MinibatchSource(
        create_ctf_deserializer(tmpdir), max_samples=11)

    input_map = {'features': mb_source['features']}

    for i in range(2):
        mb = mb_source.next_minibatch(5, input_map)

        assert 'features' in mb
        assert mb['features'].num_samples == 5
        assert mb['features'].end_of_sweep

    mb = mb_source.next_minibatch(5, input_map)

    assert 'features' in mb
    assert mb['features'].num_samples == 1
    assert not mb['features'].end_of_sweep

    mb = mb_source.next_minibatch(1, input_map)

    assert not mb
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:22,代码来源:io_tests.py

示例15: test_max_sweeps

def test_max_sweeps(tmpdir):
    # set max sweeps to 3 (12 samples altogether).
    mb_source = MinibatchSource(
        create_ctf_deserializer(tmpdir), max_sweeps=3)

    input_map = {'features': mb_source['features']}

    for i in range(2):
        mb = mb_source.next_minibatch(5, input_map)

        assert 'features' in mb
        assert mb['features'].num_samples == 5
        assert mb['features'].end_of_sweep

    mb = mb_source.next_minibatch(5, input_map)

    assert 'features' in mb
    assert mb['features'].num_samples == 2
    assert mb['features'].end_of_sweep

    mb = mb_source.next_minibatch(1, input_map)

    assert not mb
开发者ID:junaidnaseer,项目名称:CNTK,代码行数:23,代码来源:io_tests.py


注:本文中的cntk.io.MinibatchSource类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。