当前位置: 首页>>代码示例>>Python>>正文


Python Dataset.get_data方法代码示例

本文整理汇总了Python中dataset.Dataset.get_data方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.get_data方法的具体用法?Python Dataset.get_data怎么用?Python Dataset.get_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dataset.Dataset的用法示例。


在下文中一共展示了Dataset.get_data方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: master

# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def master(src_cfg, suffix_in ,suffix_out, K, N, nr_processes, double_norm):
    D = 64

    dataset = Dataset(src_cfg, nr_clusters=K)
    samples = [str(sample) for sample in dataset.get_data('train')[0] +
               dataset.get_data('test')[0]]

    if double_norm:
        worker = double_normalization
        suffix = '.double_norm'
        gmm = load_gmm(
            os.path.join(
                dataset.FEAT_DIR + suffix_in, 'gmm',
                'gmm_%d' % K))
    else:
        worker = merge
        suffix = ''
        gmm = None

    path_in = os.path.join(
        dataset.FEAT_DIR + suffix_in,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
    path_out = os.path.join(
        dataset.FEAT_DIR + suffix_out,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix)

    sstats_in = SstatsMap(path_in)
    sstats_out = SstatsMap(path_out)

    len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE

    kwargs = {
        'N': N,
        'sstats_in': sstats_in,
        'sstats_out': sstats_out,
        'len_sstats': len_sstats,
        'gmm': gmm}

    if nr_processes > 1:
        nr_samples_per_process = len(samples) / nr_processes + 1
        for ii in xrange(nr_processes):
            mp.Process(target=worker,
                       args=(samples[
                           ii * nr_samples_per_process:
                           (ii + 1) * nr_samples_per_process], ),
                       kwargs=kwargs).start()
    else:
        worker(samples, **kwargs)
开发者ID:PierreHao,项目名称:fisher_vectors,代码行数:50,代码来源:merge_n_slices.py

示例2: evaluate_main

# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def evaluate_main(src_cfg, analytical_fim, nr_slices_to_aggregate, verbose):

    dataset = Dataset(CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params'])
    te_samples, _ = dataset.get_data('test')
    nr_parts = int(np.ceil(float(len(te_samples)) / CFG[src_cfg]['samples_chunk']))

    preds_path = os.path.join(
        CACHE_PATH, "%s_predictions_afim_%s_pi_%s_sqrt_nr_descs_%s_nagg_%d_part_%s.dat" % (
            src_cfg, analytical_fim, False, False, nr_slices_to_aggregate, "%d"))

    true_labels = None

    for part in xrange(nr_parts):

        # Loads scores from file.
        with open(preds_path % part, 'r') as ff:
            tl = cPickle.load(ff)
            pd = cPickle.load(ff)

        # Prepares labels.
        if true_labels is None:
            true_labels = tl
            predictions = pd
        else:
            for cls in true_labels.keys():
                true_labels[cls] = np.hstack((true_labels[cls], tl[cls])).squeeze()
                predictions[cls] = np.hstack((predictions[cls], pd[cls])).squeeze()

    # Remove scores of duplicate samples.
    str_te_samples = map(str, te_samples)
    idxs = [str_te_samples.index(elem) for elem in set(str_te_samples)]

    for cls in true_labels.keys():
        true_labels[cls] = true_labels[cls][idxs]
        predictions[cls] = predictions[cls][idxs]

    # Scores results.
    metric = CFG[src_cfg]['metric']
    if metric == 'average_precision':
        compute_average_precision(true_labels, predictions, verbose=verbose)
    elif metric == 'accuracy':
        compute_accuracy(true_labels, predictions, verbose=verbose)
    else:
        assert False, "Unknown metric %s." % metric
开发者ID:danoneata,项目名称:approx_norm_fv,代码行数:46,代码来源:ssqrt_l2_approx.py

示例3: predict_main

# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def predict_main(
    src_cfg, sqrt_type, empirical_standardizations, l2_norm_type,
    prediction_type, analytical_fim, part, nr_slices_to_aggregate=1,
    nr_threads=4, verbose=0):

    dataset = Dataset(CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params'])
    D, K = dataset.D, dataset.VOC_SIZE

    if verbose:
        print "Loading train data."

    tr_outfile = os.path.join(
        CACHE_PATH, "%s_train_afim_%s_pi_%s_sqrt_nr_descs_%s.dat" % (
            src_cfg, analytical_fim, False, False))
    tr_video_data, tr_video_labels, tr_scalers = load_normalized_tr_data(
        dataset, nr_slices_to_aggregate, l2_norm_type,
        empirical_standardizations, sqrt_type, analytical_fim, tr_outfile,
        verbose)

    # Computing kernel.
    tr_kernel = np.dot(tr_video_data, tr_video_data.T)

    if verbose > 1:
        print '\tTrain data:   %dx%d.' % tr_video_data.shape
        print '\tTrain kernel: %dx%d.' % tr_kernel.shape

    if verbose:
        print "Training classifier."

    eval = Evaluation(CFG[src_cfg]['eval_name'], **CFG[src_cfg]['eval_params'])
    eval.fit(tr_kernel, tr_video_labels)
    clfs = [
        compute_weights(eval.get_classifier(cls), tr_video_data, tr_std=None)
        for cls in xrange(eval.nr_classes)]

    if verbose:
        print "Loading test data."

    te_samples, _ = dataset.get_data('test')
    visual_word_mask = build_visual_word_mask(D, K)

    te_outfile = os.path.join(
        CACHE_PATH, "%s_test_afim_%s_pi_%s_sqrt_nr_descs_%s_part_%s.dat" % (
            src_cfg, analytical_fim, False, False, "%d"))

    low = CFG[src_cfg]['samples_chunk'] * part
    high = np.minimum(CFG[src_cfg]['samples_chunk'] * (part + 1), len(te_samples))

    if verbose:
        print "\tPart %3d from %5d to %5d." % (part, low, high)
        print "\tEvaluating on %d threads." % nr_threads

    te_outfile_ii = te_outfile % part
    fisher_vectors, counts, nr_descs, nr_slices, _, te_labels = load_slices(
        dataset, te_samples[low: high], analytical_fim, outfile=te_outfile_ii,
        verbose=verbose)
    slice_data = SliceData(fisher_vectors, counts, nr_descs)

    agg_slice_data = slice_aggregator(slice_data, nr_slices, nr_slices_to_aggregate)
    agg_slice_data = agg_slice_data._replace(
        fisher_vectors=(agg_slice_data.fisher_vectors *
                        agg_slice_data.nr_descriptors[:, np.newaxis]))

    video_mask = build_aggregation_mask(
        sum([[ii] * int(np.ceil(float(nn) / nr_slices_to_aggregate))
             for ii, nn in enumerate(nr_slices)],
            []))

    if verbose:
        print "\tTest data: %dx%d." % agg_slice_data.fisher_vectors.shape

    # Scale the FVs in the main program, to avoid blowing up the memory
    # when doing multi-threading, since each thread will make a copy of the
    # data when transforming the data.
    if prediction_type == 'approx':
        for tr_scaler in tr_scalers:
            if tr_scaler is None:
                continue
            agg_slice_data = agg_slice_data._replace(
                fisher_vectors=tr_scaler.transform(
                    agg_slice_data.fisher_vectors))

    eval_args = [
        (ii, clfs[ii][0], clfs[ii][1], tr_scalers, agg_slice_data, video_mask,
         visual_word_mask, prediction_type, verbose)
        for ii in xrange(eval.nr_classes)]
    evaluator = threads.ParallelIter(nr_threads, eval_args, evaluate_worker)

    if verbose > 1:
        print "\t\tClasses:",

    true_labels = {}
    predictions = {}

    for ii, pd in evaluator:
        tl = eval.lb.transform(te_labels)[:, ii]
        true_labels[ii] = tl
        predictions[ii] = pd

    if verbose > 1:
#.........这里部分代码省略.........
开发者ID:danoneata,项目名称:approx_norm_fv,代码行数:103,代码来源:ssqrt_l2_approx.py

示例4: compute_statistics

# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def compute_statistics(src_cfg, **kwargs):
    """ Computes sufficient statistics needed for the bag-of-words or
    Fisher vector model.

    """
    # Default parameters.
    ip_type = kwargs.get("ip_type", "dense5.track15mbh")
    suffix = kwargs.get("suffix", "")
    dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix)

    nr_clusters = kwargs.get("nr_clusters", 128)
    dataset.VOC_SIZE = nr_clusters

    model_type = kwargs.get("model_type", "fv")
    worker_type = kwargs.get("worker_type", "normal")

    outfilename = kwargs.get("outfilename", "stats.tmp")
    if worker_type == "normal":
        worker = compute_statistics_from_video_worker
        outfilename = kwargs.get("outfilename", "stats.tmp")
    elif worker_type == "per_slice":
        from per_slice.compute_sstats_worker import compute_statistics_worker

        worker = compute_statistics_worker

    if kwargs.has_key("spm"):
        from spatial_pyramids import compute_statistics_worker

        worker = compute_statistics_worker
        outfilename = "stats.tmp_spm%d%d%d" % kwargs.get("spm")

    fn_pca = os.path.join(dataset.FEAT_DIR, "pca", "pca_64.pkl")
    pca = kwargs.get("pca", load_pca(fn_pca))

    fn_gmm = os.path.join(dataset.FEAT_DIR, "gmm", "gmm_%d" % nr_clusters)
    gmm = kwargs.get("gmm", load_gmm(fn_gmm))
    descs_to_sstats = Model(model_type, gmm).descs_to_sstats

    nr_processes = kwargs.get("nr_processes", multiprocessing.cpu_count())

    train_samples, train_labels = dataset.get_data("train")
    test_samples, test_labels = dataset.get_data("test")
    _samples = train_samples + test_samples
    _labels = train_labels + test_labels
    samples, labels = get_tupled_data(_samples, _labels)

    sstats_out = SstatsMap(os.path.join(dataset.FEAT_DIR, "statistics_k_%d" % nr_clusters, outfilename))

    if nr_processes > 1:
        import multiprocessing as mp

        processes = []
        nr_samples_per_process = len(samples) // nr_processes + 1
        for ii in xrange(nr_processes):
            process = mp.Process(
                target=worker,
                args=(
                    dataset,
                    samples[ii * nr_samples_per_process : (ii + 1) * nr_samples_per_process],
                    labels[ii * nr_samples_per_process : (ii + 1) * nr_samples_per_process],
                    sstats_out,
                    descs_to_sstats,
                    pca,
                    gmm,
                ),
                kwargs=kwargs,
            )
            processes.append(process)
            process.start()
        # Wait for jobs to finish.
        for process in processes:
            process.join()
    else:
        # We use this special case, because it makes possible to debug.
        worker(dataset, samples, labels, sstats_out, descs_to_sstats, pca, gmm, **kwargs)
开发者ID:danoneata,项目名称:fisher_vectors,代码行数:77,代码来源:compute_sstats.py


注:本文中的dataset.Dataset.get_data方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。