本文整理汇总了Python中dataset.Dataset.get_data方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.get_data方法的具体用法?Python Dataset.get_data怎么用?Python Dataset.get_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dataset.Dataset
的用法示例。
在下文中一共展示了Dataset.get_data方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: master
# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def master(src_cfg, suffix_in ,suffix_out, K, N, nr_processes, double_norm):
D = 64
dataset = Dataset(src_cfg, nr_clusters=K)
samples = [str(sample) for sample in dataset.get_data('train')[0] +
dataset.get_data('test')[0]]
if double_norm:
worker = double_normalization
suffix = '.double_norm'
gmm = load_gmm(
os.path.join(
dataset.FEAT_DIR + suffix_in, 'gmm',
'gmm_%d' % K))
else:
worker = merge
suffix = ''
gmm = None
path_in = os.path.join(
dataset.FEAT_DIR + suffix_in,
'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
path_out = os.path.join(
dataset.FEAT_DIR + suffix_out,
'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix)
sstats_in = SstatsMap(path_in)
sstats_out = SstatsMap(path_out)
len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE
kwargs = {
'N': N,
'sstats_in': sstats_in,
'sstats_out': sstats_out,
'len_sstats': len_sstats,
'gmm': gmm}
if nr_processes > 1:
nr_samples_per_process = len(samples) / nr_processes + 1
for ii in xrange(nr_processes):
mp.Process(target=worker,
args=(samples[
ii * nr_samples_per_process:
(ii + 1) * nr_samples_per_process], ),
kwargs=kwargs).start()
else:
worker(samples, **kwargs)
示例2: evaluate_main
# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def evaluate_main(src_cfg, analytical_fim, nr_slices_to_aggregate, verbose):
dataset = Dataset(CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params'])
te_samples, _ = dataset.get_data('test')
nr_parts = int(np.ceil(float(len(te_samples)) / CFG[src_cfg]['samples_chunk']))
preds_path = os.path.join(
CACHE_PATH, "%s_predictions_afim_%s_pi_%s_sqrt_nr_descs_%s_nagg_%d_part_%s.dat" % (
src_cfg, analytical_fim, False, False, nr_slices_to_aggregate, "%d"))
true_labels = None
for part in xrange(nr_parts):
# Loads scores from file.
with open(preds_path % part, 'r') as ff:
tl = cPickle.load(ff)
pd = cPickle.load(ff)
# Prepares labels.
if true_labels is None:
true_labels = tl
predictions = pd
else:
for cls in true_labels.keys():
true_labels[cls] = np.hstack((true_labels[cls], tl[cls])).squeeze()
predictions[cls] = np.hstack((predictions[cls], pd[cls])).squeeze()
# Remove scores of duplicate samples.
str_te_samples = map(str, te_samples)
idxs = [str_te_samples.index(elem) for elem in set(str_te_samples)]
for cls in true_labels.keys():
true_labels[cls] = true_labels[cls][idxs]
predictions[cls] = predictions[cls][idxs]
# Scores results.
metric = CFG[src_cfg]['metric']
if metric == 'average_precision':
compute_average_precision(true_labels, predictions, verbose=verbose)
elif metric == 'accuracy':
compute_accuracy(true_labels, predictions, verbose=verbose)
else:
assert False, "Unknown metric %s." % metric
示例3: predict_main
# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def predict_main(
src_cfg, sqrt_type, empirical_standardizations, l2_norm_type,
prediction_type, analytical_fim, part, nr_slices_to_aggregate=1,
nr_threads=4, verbose=0):
dataset = Dataset(CFG[src_cfg]['dataset_name'], **CFG[src_cfg]['dataset_params'])
D, K = dataset.D, dataset.VOC_SIZE
if verbose:
print "Loading train data."
tr_outfile = os.path.join(
CACHE_PATH, "%s_train_afim_%s_pi_%s_sqrt_nr_descs_%s.dat" % (
src_cfg, analytical_fim, False, False))
tr_video_data, tr_video_labels, tr_scalers = load_normalized_tr_data(
dataset, nr_slices_to_aggregate, l2_norm_type,
empirical_standardizations, sqrt_type, analytical_fim, tr_outfile,
verbose)
# Computing kernel.
tr_kernel = np.dot(tr_video_data, tr_video_data.T)
if verbose > 1:
print '\tTrain data: %dx%d.' % tr_video_data.shape
print '\tTrain kernel: %dx%d.' % tr_kernel.shape
if verbose:
print "Training classifier."
eval = Evaluation(CFG[src_cfg]['eval_name'], **CFG[src_cfg]['eval_params'])
eval.fit(tr_kernel, tr_video_labels)
clfs = [
compute_weights(eval.get_classifier(cls), tr_video_data, tr_std=None)
for cls in xrange(eval.nr_classes)]
if verbose:
print "Loading test data."
te_samples, _ = dataset.get_data('test')
visual_word_mask = build_visual_word_mask(D, K)
te_outfile = os.path.join(
CACHE_PATH, "%s_test_afim_%s_pi_%s_sqrt_nr_descs_%s_part_%s.dat" % (
src_cfg, analytical_fim, False, False, "%d"))
low = CFG[src_cfg]['samples_chunk'] * part
high = np.minimum(CFG[src_cfg]['samples_chunk'] * (part + 1), len(te_samples))
if verbose:
print "\tPart %3d from %5d to %5d." % (part, low, high)
print "\tEvaluating on %d threads." % nr_threads
te_outfile_ii = te_outfile % part
fisher_vectors, counts, nr_descs, nr_slices, _, te_labels = load_slices(
dataset, te_samples[low: high], analytical_fim, outfile=te_outfile_ii,
verbose=verbose)
slice_data = SliceData(fisher_vectors, counts, nr_descs)
agg_slice_data = slice_aggregator(slice_data, nr_slices, nr_slices_to_aggregate)
agg_slice_data = agg_slice_data._replace(
fisher_vectors=(agg_slice_data.fisher_vectors *
agg_slice_data.nr_descriptors[:, np.newaxis]))
video_mask = build_aggregation_mask(
sum([[ii] * int(np.ceil(float(nn) / nr_slices_to_aggregate))
for ii, nn in enumerate(nr_slices)],
[]))
if verbose:
print "\tTest data: %dx%d." % agg_slice_data.fisher_vectors.shape
# Scale the FVs in the main program, to avoid blowing up the memory
# when doing multi-threading, since each thread will make a copy of the
# data when transforming the data.
if prediction_type == 'approx':
for tr_scaler in tr_scalers:
if tr_scaler is None:
continue
agg_slice_data = agg_slice_data._replace(
fisher_vectors=tr_scaler.transform(
agg_slice_data.fisher_vectors))
eval_args = [
(ii, clfs[ii][0], clfs[ii][1], tr_scalers, agg_slice_data, video_mask,
visual_word_mask, prediction_type, verbose)
for ii in xrange(eval.nr_classes)]
evaluator = threads.ParallelIter(nr_threads, eval_args, evaluate_worker)
if verbose > 1:
print "\t\tClasses:",
true_labels = {}
predictions = {}
for ii, pd in evaluator:
tl = eval.lb.transform(te_labels)[:, ii]
true_labels[ii] = tl
predictions[ii] = pd
if verbose > 1:
#.........这里部分代码省略.........
示例4: compute_statistics
# 需要导入模块: from dataset import Dataset [as 别名]
# 或者: from dataset.Dataset import get_data [as 别名]
def compute_statistics(src_cfg, **kwargs):
""" Computes sufficient statistics needed for the bag-of-words or
Fisher vector model.
"""
# Default parameters.
ip_type = kwargs.get("ip_type", "dense5.track15mbh")
suffix = kwargs.get("suffix", "")
dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix)
nr_clusters = kwargs.get("nr_clusters", 128)
dataset.VOC_SIZE = nr_clusters
model_type = kwargs.get("model_type", "fv")
worker_type = kwargs.get("worker_type", "normal")
outfilename = kwargs.get("outfilename", "stats.tmp")
if worker_type == "normal":
worker = compute_statistics_from_video_worker
outfilename = kwargs.get("outfilename", "stats.tmp")
elif worker_type == "per_slice":
from per_slice.compute_sstats_worker import compute_statistics_worker
worker = compute_statistics_worker
if kwargs.has_key("spm"):
from spatial_pyramids import compute_statistics_worker
worker = compute_statistics_worker
outfilename = "stats.tmp_spm%d%d%d" % kwargs.get("spm")
fn_pca = os.path.join(dataset.FEAT_DIR, "pca", "pca_64.pkl")
pca = kwargs.get("pca", load_pca(fn_pca))
fn_gmm = os.path.join(dataset.FEAT_DIR, "gmm", "gmm_%d" % nr_clusters)
gmm = kwargs.get("gmm", load_gmm(fn_gmm))
descs_to_sstats = Model(model_type, gmm).descs_to_sstats
nr_processes = kwargs.get("nr_processes", multiprocessing.cpu_count())
train_samples, train_labels = dataset.get_data("train")
test_samples, test_labels = dataset.get_data("test")
_samples = train_samples + test_samples
_labels = train_labels + test_labels
samples, labels = get_tupled_data(_samples, _labels)
sstats_out = SstatsMap(os.path.join(dataset.FEAT_DIR, "statistics_k_%d" % nr_clusters, outfilename))
if nr_processes > 1:
import multiprocessing as mp
processes = []
nr_samples_per_process = len(samples) // nr_processes + 1
for ii in xrange(nr_processes):
process = mp.Process(
target=worker,
args=(
dataset,
samples[ii * nr_samples_per_process : (ii + 1) * nr_samples_per_process],
labels[ii * nr_samples_per_process : (ii + 1) * nr_samples_per_process],
sstats_out,
descs_to_sstats,
pca,
gmm,
),
kwargs=kwargs,
)
processes.append(process)
process.start()
# Wait for jobs to finish.
for process in processes:
process.join()
else:
# We use this special case, because it makes possible to debug.
worker(dataset, samples, labels, sstats_out, descs_to_sstats, pca, gmm, **kwargs)