当前位置: 首页>>代码示例>>Python>>正文


Python dataset.load_data方法代码示例

本文整理汇总了Python中dataset.load_data方法的典型用法代码示例。如果您正苦于以下问题:Python dataset.load_data方法的具体用法?Python dataset.load_data怎么用?Python dataset.load_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dataset的用法示例。


在下文中一共展示了dataset.load_data方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def __init__(self, model_folder, checkpoint_file):
        sys.path.append(model_folder)

        from model import get_model
        from dataset import load_data

        self.dataset = load_data('validation')

        self.sess = tf.InteractiveSession()
        self.model = get_model('policy')

        saver = tf.train.Saver()
        saver.restore(self.sess, checkpoint_file) 
开发者ID:milkpku,项目名称:BetaElephant,代码行数:15,代码来源:model_eval.py

示例2: reload_state

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def reload_state(checkpoint, training_state, config, args):
    """
    Reload state when resuming training.
    """
    model, id_to_token, id_to_char = BidafModel.from_checkpoint(
        config['bidaf'], checkpoint)
    if torch.cuda.is_available() and args.cuda:
        model.cuda()
    model.train()

    optimizer = get_optimizer(model, config, training_state)

    token_to_id = {tok: id_ for id_, tok in id_to_token.items()}
    char_to_id = {char: id_ for id_, char in id_to_char.items()}

    len_tok_voc = len(token_to_id)
    len_char_voc = len(char_to_id)

    with open(args.data) as f_o:
        data, _ = load_data(json.load(f_o),
                            span_only=True, answered_only=True)
    limit_passage = config.get('training', {}).get('limit')
    data = tokenize_data(data, token_to_id, char_to_id, limit_passage)

    data = get_loader(data, config)

    assert len(token_to_id) == len_tok_voc
    assert len(char_to_id) == len_char_voc

    return model, id_to_token, id_to_char, optimizer, data 
开发者ID:spacemanidol,项目名称:MSMARCO,代码行数:32,代码来源:train.py

示例3: __init__

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def __init__(self, config, **opt):
        # Load config used for training and merge with testing options
        self.config = yaml.load(open(config, "r"))
        self.config = Namespace(**{**self.config, **opt})

        # Load training data.pkl for src and tgt vocabs
        self.data = load_data(self.config)

        # Load trained model checkpoints
        device, devices_ids = misc_utils.set_cuda(self.config)
        self.model, _ = build_model(None, self.config, device)
        self.model.eval() 
开发者ID:THUDM,项目名称:KOBE,代码行数:14,代码来源:api.py

示例4: train

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):

    device = args.device
    load_path = args.load_path
    # load data
    train_data = load_data('train')
    val_data = load_data('validation')

    # load model
    with tf.device('/gpu:%d' % device):
        model = get_model('train')

    # trainer init
    optimizer = Config.optimizer
    train_step = optimizer.minimize(model.loss)

    # init session and server
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    if load_path==None:
        sess.run(tf.initialize_all_variables())
    else:
        saver.restore(sess, load_path)
        print("Model restored from %s" % load_path)

    # accuracy
    pred = tf.reshape(model.pred, [-1, 9*10*16])
    label = tf.reshape(model.label, [-1, 9*10*16])
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    logging.basicConfig(filename='log.txt', level=logging.DEBUG)
    # train steps
    for i in range(Config.n_epoch):

        # training step
        batch_data, batch_label = train_data.next_batch(Config.minibatch_size)

        input_dict = {model.label:batch_label}
        for var, data in zip(model.inputs, batch_data):
            input_dict[var]=data

        #from IPython import embed;embed()
        sess.run(train_step, feed_dict=input_dict)

        # evalue step
        if (i+1)%Config.evalue_point == 0:
            batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
            val_dict = {model.label:batch_label}
            for var, data in zip(model.inputs, batch_data):
                val_dict[var]=data
            score = accuracy.eval(feed_dict=val_dict)
            print("epoch %d, accuracy is %.2f" % (i,score))
            logging.info("epoch %d, accuracy is %.2f" % (i,score))

        # save step
        if (i+1)%Config.check_point == 0:
            save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
            print("Model saved in file: %s" % save_path)
            logging.info("Model saved in file: %s" % save_path) 
开发者ID:milkpku,项目名称:BetaElephant,代码行数:62,代码来源:trainer.py

示例5: train

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):

    device = args.device
    load_path = args.load_path
    # load data
    train_data = load_data('train')
    val_data = load_data('validation')

    # load model
    with tf.device('/gpu:%d' % device):
        model = get_model('train')

    # trainer init
    optimizer = Config.optimizer
    train_step = optimizer.minimize(model.loss)

    # init session and server
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    if load_path==None:
        sess.run(tf.initialize_all_variables())
    else:
        saver.restore(sess, load_path)
        print("Model restored from %s" % load_path)

    # accuracy
    pred = tf.reshape(model.pred, [-1, 9*10*16])
    label = tf.reshape(model.label, [-1, 9*10*16])
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # train steps
    for i in range(Config.n_epoch):

        # training step
        batch_data, batch_label = train_data.next_batch(Config.minibatch_size)

        input_dict = {model.label:batch_label}
        for var, data in zip(model.inputs, batch_data):
            input_dict[var]=data

        #from IPython import embed;embed()
        sess.run(train_step, feed_dict=input_dict)

        # evalue step
        if (i+1)%Config.evalue_point == 0:
            batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
            val_dict = {model.label:batch_label}
            for var, data in zip(model.inputs, batch_data):
                val_dict[var]=data
            score = accuracy.eval(feed_dict=val_dict)
            print("epoch %d, accuracy is %.2f" % (i,score))

        # save step
        if (i+1)%Config.check_point == 0:
            save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
            print("Model saved in file: %s" % save_path) 
开发者ID:milkpku,项目名称:BetaElephant,代码行数:59,代码来源:trainer.py

示例6: train

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def train(args):

    device = args.device
    load_path = args.load_path
    # load data
    train_data = load_data('train')
    val_data = load_data('validation')

    # load model
    with tf.device('/gpu:%d' % device):
        model = get_model('policy')

    # trainer init
    optimizer = Config.optimizer
    train_step = optimizer.minimize(model.loss)

    # init session and server
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    if load_path==None:
        sess.run(tf.initialize_all_variables())
    else:
        saver.restore(sess, load_path)
        print("Model restored from %s" % load_path)

    # accuracy
    pred = tf.reshape(model.pred, [-1, 9*10*16])
    label = tf.reshape(model.label, [-1, 9*10*16])
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    logging.basicConfig(filename='log.txt', level=logging.DEBUG)
    # train steps
    for i in range(Config.n_epoch):

        # training step
        batch_data, batch_label = train_data.next_batch(Config.minibatch_size)

        input_dict = {model.label:batch_label}
        for var, data in zip(model.inputs, batch_data):
            input_dict[var]=data

        #from IPython import embed;embed()
        sess.run(train_step, feed_dict=input_dict)

        # evalue step
        if (i+1)%Config.evalue_point == 0:
            batch_data, batch_label = val_data.next_batch(Config.minibatch_size)
            val_dict = {model.label:batch_label}
            for var, data in zip(model.inputs, batch_data):
                val_dict[var]=data
            score = accuracy.eval(feed_dict=val_dict)
            print("epoch %d, accuracy is %.2f" % (i,score))
            logging.info("epoch %d, accuracy is %.2f" % (i,score))

        # save step
        if (i+1)%Config.check_point == 0:
            save_path = saver.save(sess, "%s/epoch-%d" %(Config.save_path, i))
            print("Model saved in file: %s" % save_path)
            logging.info("Model saved in file: %s" % save_path) 
开发者ID:milkpku,项目名称:BetaElephant,代码行数:62,代码来源:trainer.py

示例7: init_state

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def init_state(config, args):
    token_to_id = {'': 0}
    char_to_id = {'': 0}
    print('Loading data...')
    with open(args.data) as f_o:
        data, _ = load_data(json.load(f_o), span_only=True, answered_only=True)
    print('Tokenizing data...')
    data = tokenize_data(data, token_to_id, char_to_id)
    data = get_loader(data, config)

    id_to_token = {id_: tok for tok, id_ in token_to_id.items()}
    id_to_char = {id_: char for char, id_ in char_to_id.items()}

    print('Creating model...')
    model = BidafModel.from_config(config['bidaf'], id_to_token, id_to_char)

    if args.word_rep:
        print('Loading pre-trained embeddings...')
        with open(args.word_rep) as f_o:
            pre_trained = SymbolEmbSourceText(
                    f_o,
                    set(tok for id_, tok in id_to_token.items() if id_ != 0))
        mean, cov = pre_trained.get_norm_stats(args.use_covariance)
        rng = np.random.RandomState(2)
        oovs = SymbolEmbSourceNorm(mean, cov, rng, args.use_covariance)

        model.embedder.embeddings[0].embeddings.weight.data = torch.from_numpy(
            symbol_injection(
                id_to_token, 0,
                model.embedder.embeddings[0].embeddings.weight.data.numpy(),
                pre_trained, oovs))
    else:
        pass  # No pretraining, just keep the random values.

    # Char embeddings are already random, so we don't need to update them.

    if torch.cuda.is_available() and args.cuda:
        model.cuda()
    model.train()

    optimizer = get_optimizer(model, config, state=None)
    return model, id_to_token, id_to_char, optimizer, data 
开发者ID:spacemanidol,项目名称:MSMARCO,代码行数:44,代码来源:train.py

示例8: main

# 需要导入模块: import dataset [as 别名]
# 或者: from dataset import load_data [as 别名]
def main():
    print('Loading data...')
    train, valid, test = load_data(args.dataset_path, valid_portion=args.valid_portion)
    
    train_data = RecSysDataset(train)
    valid_data = RecSysDataset(valid)
    test_data = RecSysDataset(test)
    train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn)
    valid_loader = DataLoader(valid_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)
    test_loader = DataLoader(test_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)

    if args.dataset_path.split('/')[-2] == 'diginetica':
        n_items = 43098
    elif args.dataset_path.split('/')[-2] in ['yoochoose1_64', 'yoochoose1_4']:
        n_items = 37484
    else:
        raise Exception('Unknown Dataset!')

    model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device)

    if args.test:
        ckpt = torch.load('latest_checkpoint.pth.tar')
        model.load_state_dict(ckpt['state_dict'])
        recall, mrr = validate(test_loader, model)
        print("Test: Recall@{}: {:.4f}, MRR@{}: {:.4f}".format(args.topk, recall, args.topk, mrr))
        return

    optimizer = optim.Adam(model.parameters(), args.lr)
    criterion = nn.CrossEntropyLoss()
    scheduler = StepLR(optimizer, step_size = args.lr_dc_step, gamma = args.lr_dc)

    for epoch in tqdm(range(args.epoch)):
        # train for one epoch
        scheduler.step(epoch = epoch)
        trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr = 200)

        recall, mrr = validate(valid_loader, model)
        print('Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'.format(epoch, args.topk, recall, args.topk, mrr))

        # store best loss and save a model checkpoint
        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(ckpt_dict, 'latest_checkpoint.pth.tar') 
开发者ID:Wang-Shuo,项目名称:Neural-Attentive-Session-Based-Recommendation-PyTorch,代码行数:49,代码来源:main.py


注:本文中的dataset.load_data方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。