当前位置: 首页>>代码示例>>Python>>正文


Python schemes.ConstantScheme方法代码示例

本文整理汇总了Python中fuel.schemes.ConstantScheme方法的典型用法代码示例。如果您正苦于以下问题:Python schemes.ConstantScheme方法的具体用法?Python schemes.ConstantScheme怎么用?Python schemes.ConstantScheme使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fuel.schemes的用法示例。


在下文中一共展示了schemes.ConstantScheme方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setup_datastream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def setup_datastream(path, vocab_file, config):
    ds = QADataset(path, vocab_file, config.n_entities, need_sep_token=config.concat_ctx_and_question)
    it = QAIterator(path, shuffle=config.shuffle_questions)

    stream = DataStream(ds, iteration_scheme=it)

    if config.concat_ctx_and_question:
        stream = ConcatCtxAndQuestion(stream, config.concat_question_before, ds.reverse_vocab['<SEP>'])

    # Sort sets of multiple batches to make batches of similar sizes
    stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size * config.sort_batch_count))
    comparison = _balanced_batch_helper(stream.sources.index('question' if config.concat_ctx_and_question else 'context'))
    stream = Mapping(stream, SortMapping(comparison))
    stream = Unpack(stream)

    stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size))
    stream = Padding(stream, mask_sources=['context', 'question', 'candidates'], mask_dtype='int32')

    return ds, stream 
开发者ID:thomasmesnard,项目名称:DeepMind-Teaching-Machines-to-Read-and-Comprehend,代码行数:21,代码来源:data.py

示例2: setup_datastream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def setup_datastream(path, batch_size, sort_batch_count, valid=False):
    A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
    B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
    C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))

    D = [B[x[0]:x[1], 2] for x in C]

    ds = IndexableDataset({'input': A, 'output': D})
    stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count))
    comparison = _balanced_batch_helper(stream.sources.index('input'))
    stream = Mapping(stream, SortMapping(comparison))
    stream = Unpack(stream)

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A)))
    stream = Padding(stream, mask_sources=['input', 'output'])

    return ds, stream 
开发者ID:thomasmesnard,项目名称:CTC-LSTM,代码行数:21,代码来源:timit.py

示例3: obtain_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def obtain_stream(dataset, batch_size, size=1):
    if size == 1:
        data_stream = dataset.get_example_stream()
        data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))

        # add padding and masks to the dataset
        data_stream = transformers.Padding(data_stream, mask_sources=('data'))
        return data_stream
    else:
        data_streams = [dataset.get_example_stream() for _ in range(size)]
        data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
                        for data_stream in data_streams]
        data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams]
        return data_streams 
开发者ID:memray,项目名称:seq2seq-keyphrase,代码行数:16,代码来源:build_dataset.py

示例4: output_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def output_stream(dataset, batch_size, size=1):
    data_stream = dataset.get_example_stream()
    data_stream = transformers.Batch(data_stream,
                                     iteration_scheme=schemes.ConstantScheme(batch_size))

    # add padding and masks to the dataset
    # Warning: in multiple output case, will raise ValueError: All dimensions except length must be equal, need padding manually
    # data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c'))
    # data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target'))
    return data_stream 
开发者ID:memray,项目名称:seq2seq-keyphrase,代码行数:12,代码来源:keyphrase_copynet.py

示例5: test_example_iteration_scheme

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def test_example_iteration_scheme(self):
        scheme = ConstantScheme(2)

        class MinimalDataset(Dataset):
            provides_sources = ('data',)
            _example_iteration_scheme = scheme

            def get_data(self, state=None, request=None):
                pass

        assert MinimalDataset().example_iteration_scheme is scheme 
开发者ID:rizar,项目名称:attention-lvcsr,代码行数:13,代码来源:test_datasets.py

示例6: setup_datastream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def setup_datastream(batch_size, **kwargs):
    ds = ToyDataset(**kwargs)
    stream = DataStream(ds, iteration_scheme=SequentialExampleScheme(kwargs['nb_examples']))

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size))
    stream = Padding(stream, mask_sources=['input', 'output'])

    return ds, stream 
开发者ID:thomasmesnard,项目名称:CTC-LSTM,代码行数:10,代码来源:toy_dataset.py

示例7: get_data_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def get_data_stream(iterable):
    """Returns a 'fuel.Batch' datastream of
    [x~input~numbers, y~targets~roots], with each iteration returning a
    batch of 20 training examples
    """
    numbers = numpy.asarray(iterable, dtype=floatX)
    dataset = IterableDataset(
        {'numbers': numbers, 'roots': numpy.sqrt(numbers)})
    return Batch(dataset.get_example_stream(), ConstantScheme(20)) 
开发者ID:mila-iqia,项目名称:blocks-examples,代码行数:11,代码来源:__init__.py

示例8: output_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def output_stream(dataset, batch_size, size=1):
        data_stream = dataset.get_example_stream()
        data_stream = transformers.Batch(data_stream,
                                         iteration_scheme=schemes.ConstantScheme(batch_size))

        # add padding and masks to the dataset
        data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target'))
        return data_stream 
开发者ID:MultiPath,项目名称:CopyNet,代码行数:10,代码来源:weibo_vest.py

示例9: output_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def output_stream(dataset, batch_size, size=1):
        data_stream = dataset.get_example_stream()
        data_stream = transformers.Batch(data_stream,
                                         iteration_scheme=schemes.ConstantScheme(batch_size))

        # add padding and masks to the dataset
        data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c'))
        return data_stream 
开发者ID:MultiPath,项目名称:CopyNet,代码行数:10,代码来源:lcsts_test.py

示例10: obtain_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def obtain_stream(dataset, batch_size, size=1):
    if size == 1:
        data_stream = dataset.get_example_stream()
        data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))

        # add padding and masks to the dataset
        data_stream = transformers.Padding(data_stream, mask_sources=('data'))
        return data_stream
    else:
        data_streams = [dataset.get_example_stream() for _ in xrange(size)]
        data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size))
                        for data_stream in data_streams]
        data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams]
        return data_streams 
开发者ID:MultiPath,项目名称:CopyNet,代码行数:16,代码来源:build_dataset.py

示例11: test_data_driven_epochs

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def test_data_driven_epochs():
    class TestDataset(IterableDataset):
        sources = ('data',)

        def __init__(self):
            self.axis_labels = None
            self.data = [[1, 2, 3, 4],
                         [5, 6, 7, 8]]

        def open(self):
            epoch_iter = iter(self.data)
            data_iter = iter(next(epoch_iter))
            return (epoch_iter, data_iter)

        def next_epoch(self, state):
            try:
                data_iter = iter(next(state[0]))
                return (state[0], data_iter)
            except StopIteration:
                return self.open()

        def get_data(self, state, request):
            data = []
            for i in range(request):
                data.append(next(state[1]))
            return (data,)

    epochs = []
    epochs.append([([1],), ([2],), ([3],), ([4],)])
    epochs.append([([5],), ([6],), ([7],), ([8],)])
    stream = DataStream(TestDataset(), iteration_scheme=ConstantScheme(1))
    assert list(stream.get_epoch_iterator()) == epochs[0]
    assert list(stream.get_epoch_iterator()) == epochs[1]
    assert list(stream.get_epoch_iterator()) == epochs[0]

    stream.reset()
    for i, epoch in zip(range(2), stream.iterate_epochs()):
        assert list(epoch) == epochs[i]

    # test scheme resetting between epochs
    class TestScheme(BatchSizeScheme):

        def get_request_iterator(self):
            return iter([1, 2, 1, 3])

    epochs = []
    epochs.append([([1],), ([2, 3],), ([4],)])
    epochs.append([([5],), ([6, 7],), ([8],)])
    stream = DataStream(TestDataset(), iteration_scheme=TestScheme())
    for i, epoch in zip(range(2), stream.iterate_epochs()):
        assert list(epoch) == epochs[i] 
开发者ID:rizar,项目名称:attention-lvcsr,代码行数:53,代码来源:test_datasets.py

示例12: get_src_trg_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def get_src_trg_stream(cg, config, src_datasets=None, trg_datasets=None,
                       is_training=True, src_vocabs=None, trg_vocabs=None,
                       logprob_datasets=None):
    eid, did = p_(cg)
    if is_training:
        logger.info(' ... src:[{}] - [{}]'.format(
            eid, src_datasets[cg].files[0]))
        logger.info(' ... trg:[{}] - [{}]'.format(
            did, trg_datasets[cg].files[0]))
        stream = Merge([src_datasets[cg].get_example_stream(),
                        trg_datasets[cg].get_example_stream()],
                       ('source', 'target'))
        stream = Filter(stream, predicate=_too_long(config['src_seq_len'],
                                                    config['tgt_seq_len']))

        if 'min_seq_lens' in config and config['min_seq_lens'][cg] > 0:
            stream = Filter(stream,
                            predicate=_too_short(config['min_seq_lens'][cg]))

        stream = Mapping(stream, _oov_to_unk(
                         src_vocab_size=config['src_vocab_sizes'][eid],
                         trg_vocab_size=config['trg_vocab_sizes'][did],
                         unk_id=config['unk_id']))
        stream = Batch(
            stream, iteration_scheme=ConstantScheme(
                config['batch_sizes'][cg]*config['sort_k_batches']))

        stream = Mapping(stream, SortMapping(_length))
        stream = Unpack(stream)
        stream = Batch(stream, iteration_scheme=ConstantScheme(
            config['batch_sizes'][cg]))
    else:  # logprob stream
        src_dataset = TextFile([logprob_datasets[cg][0]],
                               src_vocabs[p_(cg)[0]], None)
        trg_dataset = TextFile([logprob_datasets[cg][1]],
                               trg_vocabs[p_(cg)[1]], None)
        stream = Merge([src_dataset.get_example_stream(),
                        trg_dataset.get_example_stream()],
                       ('source', 'target'))
        stream = Mapping(stream, _oov_to_unk(
                         src_vocab_size=config['src_vocab_sizes'][eid],
                         trg_vocab_size=config['trg_vocab_sizes'][did],
                         unk_id=config['unk_id']))
        bs = 100
        if 'log_prob_bs' in config:
            if isinstance(config['log_prob_bs'], dict):
                bs = config['log_prob_bs'][cg]
            else:
                bs = config['log_prob_bs']
        stream = Batch(stream, iteration_scheme=ConstantScheme(bs))

    masked_stream = Padding(stream)
    masked_stream = Mapping(
        masked_stream, _remapWordIdx(
            [(0, 0, config['src_eos_idxs'][eid]),
             (2, 0, config['trg_eos_idxs'][did])]))
    return masked_stream 
开发者ID:nyu-dl,项目名称:dl4mt-multi,代码行数:59,代码来源:stream.py

示例13: get_logprob_streams

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def get_logprob_streams(config):
    if 'log_prob_sets' not in config:
        return None

    cgs = config['cgs']
    enc_ids, dec_ids = get_enc_dec_ids(cgs)
    datasets = config['log_prob_sets']

    # Prepare source vocabs and files, make sure special tokens are there
    src_vocabs = {k: cPickle.load(open(v))
                  for k, v in config['src_vocabs'].iteritems()}
    for k in src_vocabs.keys():
        src_vocabs[k]['<S>'] = 0
        src_vocabs[k]['</S>'] = config['src_eos_idxs'][k]
        src_vocabs[k]['<UNK>'] = config['unk_id']

    # Prepare target vocabs and files, make sure special tokens are there
    trg_vocabs = {k: cPickle.load(open(v))
                  for k, v in config['trg_vocabs'].iteritems()}
    for k in trg_vocabs.keys():
        trg_vocabs[k]['<S>'] = 0
        trg_vocabs[k]['</S>'] = config['trg_eos_idxs'][k]
        trg_vocabs[k]['<UNK>'] = config['unk_id']

    # Build the preprocessing pipeline for individual streams
    ind_streams = {}
    for cg in cgs:
        eid, did = p_(cg)
        if cg not in datasets:
            continue
        logger.info('Building logprob stream for cg:[{}]'.format(cg))
        src_dataset = TextFile([datasets[cg][0]], src_vocabs[p_(cg)[0]], None)
        trg_dataset = TextFile([datasets[cg][1]], trg_vocabs[p_(cg)[1]], None)
        stream = Merge([src_dataset.get_example_stream(),
                        trg_dataset.get_example_stream()],
                       ('source', 'target'))

        stream = Mapping(stream, _oov_to_unk(
                         src_vocab_size=config['src_vocab_sizes'][eid],
                         trg_vocab_size=config['trg_vocab_sizes'][did],
                         unk_id=config['unk_id']))
        bs = 100
        if 'log_prob_bs' in config:
            if isinstance(config['log_prob_bs'], dict):
                bs = config['log_prob_bs'][cg]
            else:
                bs = config['log_prob_bs']

        stream = Batch(
            stream,
            iteration_scheme=ConstantScheme(
                bs, num_examples=get_num_lines(datasets[cg][0])))

        masked_stream = Padding(stream)
        masked_stream = Mapping(
            masked_stream, _remapWordIdx(
                [(0, 0, config['src_eos_idxs'][eid]),
                 (2, 0, config['trg_eos_idxs'][did])]))
        ind_streams[cg] = masked_stream

    return ind_streams 
开发者ID:nyu-dl,项目名称:dl4mt-multi,代码行数:63,代码来源:stream.py

示例14: get_log_prob_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def get_log_prob_stream(cg, config):
    eid, did = p_(cg)
    dataset = config['log_prob_sets'][cg]

    # Prepare source vocabs and files, make sure special tokens are there
    src_vocab = cPickle.load(open(config['src_vocabs'][eid]))
    src_vocab['<S>'] = 0
    src_vocab['</S>'] = config['src_eos_idxs'][eid]
    src_vocab['<UNK>'] = config['unk_id']

    # Prepare target vocabs and files, make sure special tokens are there
    trg_vocab = cPickle.load(open(config['trg_vocabs'][did]))
    trg_vocab['<S>'] = 0
    trg_vocab['</S>'] = config['trg_eos_idxs'][did]
    trg_vocab['<UNK>'] = config['unk_id']

    # Build the preprocessing pipeline for individual streams
    logger.info('Building logprob stream for cg:[{}]'.format(cg))
    src_dataset = TextFile([dataset[0]], src_vocab, None)
    trg_dataset = TextFile([dataset[1]], trg_vocab, None)
    stream = Merge([src_dataset.get_example_stream(),
                    trg_dataset.get_example_stream()],
                   ('source', 'target'))

    stream = Mapping(stream, _oov_to_unk(
                     src_vocab_size=config['src_vocab_sizes'][eid],
                     trg_vocab_size=config['trg_vocab_sizes'][did],
                     unk_id=config['unk_id']))
    bs = 100
    if 'log_prob_bs' in config:
        if isinstance(config['log_prob_bs'], dict):
            bs = config['log_prob_bs'][cg]
        else:
            bs = config['log_prob_bs']
    stream = Batch(
        stream,
        iteration_scheme=ConstantScheme(
            bs, num_examples=get_num_lines(dataset[0])))

    masked_stream = Padding(stream)
    masked_stream = Mapping(
        masked_stream, _remapWordIdx(
            [(0, 0, config['src_eos_idxs'][eid]),
             (2, 0, config['trg_eos_idxs'][did])]))

    return masked_stream 
开发者ID:nyu-dl,项目名称:dl4mt-multi,代码行数:48,代码来源:stream.py

示例15: get_tr_stream

# 需要导入模块: from fuel import schemes [as 别名]
# 或者: from fuel.schemes import ConstantScheme [as 别名]
def get_tr_stream(src_vocab, trg_vocab, src_data, trg_data,
                  src_vocab_size=30000, trg_vocab_size=30000, unk_id=1,
                  seq_len=50, batch_size=80, sort_k_batches=12, **kwargs):
    """Prepares the training data stream."""

    # Load dictionaries and ensure special tokens exist
    src_vocab = _ensure_special_tokens(
        src_vocab if isinstance(src_vocab, dict)
        else cPickle.load(open(src_vocab)),
        bos_idx=0, eos_idx=src_vocab_size - 1, unk_idx=unk_id)
    trg_vocab = _ensure_special_tokens(
        trg_vocab if isinstance(trg_vocab, dict) else
        cPickle.load(open(trg_vocab)),
        bos_idx=0, eos_idx=trg_vocab_size - 1, unk_idx=unk_id)

    # Get text files from both source and target
    src_dataset = TextFile([src_data], src_vocab, None)
    trg_dataset = TextFile([trg_data], trg_vocab, None)

    # Merge them to get a source, target pair
    stream = Merge([src_dataset.get_example_stream(),
                    trg_dataset.get_example_stream()],
                   ('source', 'target'))

    # Filter sequences that are too long
    stream = Filter(stream,
                    predicate=_too_long(seq_len=seq_len))

    # Replace out of vocabulary tokens with unk token
    stream = Mapping(stream,
                     _oov_to_unk(src_vocab_size=src_vocab_size,
                                 trg_vocab_size=trg_vocab_size,
                                 unk_id=unk_id))

    # Build a batched version of stream to read k batches ahead
    stream = Batch(stream,
                   iteration_scheme=ConstantScheme(
                       batch_size*sort_k_batches))

    # Sort all samples in the read-ahead batch
    stream = Mapping(stream, SortMapping(_length))

    # Convert it into a stream again
    stream = Unpack(stream)

    # Construct batches from the stream with specified batch size
    stream = Batch(
        stream, iteration_scheme=ConstantScheme(batch_size))

    # Pad sequences that are short
    masked_stream = PaddingWithEOS(
        stream, [src_vocab_size - 1, trg_vocab_size - 1])

    return masked_stream 
开发者ID:mila-iqia,项目名称:blocks-examples,代码行数:56,代码来源:stream.py


注:本文中的fuel.schemes.ConstantScheme方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。