本文整理匯總了Python中mvpa2.misc.attrmap.AttributeMap類的典型用法代碼示例。如果您正苦於以下問題:Python AttributeMap類的具體用法?Python AttributeMap怎麽用?Python AttributeMap使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了AttributeMap類的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_attrmap_conflicts
def test_attrmap_conflicts():
am_n = AttributeMap({'a':1, 'b':2, 'c':1})
am_t = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='tuple')
am_l = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='lucky')
q_f = ['a', 'b', 'a', 'c']
# should have no effect on forward mapping
ok_(np.all(am_n.to_numeric(q_f) == am_t.to_numeric(q_f)))
ok_(np.all(am_t.to_numeric(q_f) == am_l.to_numeric(q_f)))
assert_raises(ValueError, am_n.to_literal, [2])
r_t = am_t.to_literal([2, 1])
r_l = am_l.to_literal([2, 1])
示例2: _call
def _call(self, dataset):
sens = super(self.__class__, self)._call(dataset)
clf = self.clf
targets_attr = clf.get_space()
if targets_attr in sens.sa:
# if labels are present -- transform them into meaningful tuples
# (or not if just a single beast)
am = AttributeMap(dict([(l, -1) for l in clf.neglabels] + [(l, +1) for l in clf.poslabels]))
# XXX here we still can get a sensitivity per each label
# (e.g. with SMLR as the slave clf), so I guess we should
# tune up Multiclass...Analyzer to add an additional sa
# And here we might need to check if asobjarray call is necessary
# and should be actually done
# asobjarray(
sens.sa[targets_attr] = am.to_literal(sens.sa[targets_attr].value, recurse=True)
return sens
示例3: _test_gpr_model_selection
def _test_gpr_model_selection(self): # pragma: no cover
"""Smoke test for running model selection while getting GPRWeights
TODO: DISABLED because setting of hyperparameters was not adopted for 0.6 (yet)
"""
if not externals.exists('openopt'):
return
amap = AttributeMap() # we would need to pass numbers into the GPR
dataset = datasets['uni2small'].copy() #data_generators.linear1d_gaussian_noise()
dataset.targets = amap.to_numeric(dataset.targets).astype(float)
k = GeneralizedLinearKernel()
clf = GPR(k, enable_ca=['log_marginal_likelihood'])
sa = clf.get_sensitivity_analyzer() # should be regular weights
sa_ms = clf.get_sensitivity_analyzer(flavor='model_select') # with model selection
def prints():
print clf.ca.log_marginal_likelihood, clf.kernel.Sigma_p, clf.kernel.sigma_0
sa(dataset)
lml = clf.ca.log_marginal_likelihood
sa_ms(dataset)
lml_ms = clf.ca.log_marginal_likelihood
self.assertTrue(lml_ms > lml)
示例4: __init__
def __init__(self, space=None, **kwargs):
# by default we want classifiers to use the 'targets' sample attribute
# for training/testing
if space is None:
space = 'targets'
Learner.__init__(self, space=space, **kwargs)
# XXX
# the place to map literal to numerical labels (and back)
# this needs to be in the base class, since some classifiers also
# have this nasty 'regression' mode, and the code in this class
# needs to deal with converting the regression output into discrete
# labels
# however, preferably the mapping should be kept in the respective
# low-level implementations that need it
self._attrmap = AttributeMap()
self.__trainednfeatures = 0
"""Stores number of features for which classifier was trained.
If 0 -- it wasn't trained at all"""
self._set_retrainable(self.params.retrainable, force=True)
示例5: Classifier
class Classifier(Learner):
"""Abstract classifier class to be inherited by all classifiers
"""
# Kept separate from doc to don't pollute help(clf), especially if
# we including help for the parent class
_DEV__doc__ = """
Required behavior:
For every classifier is has to be possible to be instantiated without
having to specify the training pattern.
Repeated calls to the train() method with different training data have to
result in a valid classifier, trained for the particular dataset.
It must be possible to specify all classifier parameters as keyword
arguments to the constructor.
Recommended behavior:
Derived classifiers should provide access to *estimates* -- i.e. that
information that is finally used to determine the predicted class label.
Michael: Maybe it works well if each classifier provides a 'estimates'
state member. This variable is a list as long as and in same order
as Dataset.uniquetargets (training data). Each item in the list
corresponds to the likelyhood of a sample to belong to the
respective class. However the semantics might differ between
classifiers, e.g. kNN would probably store distances to class-
neighbors, where PLR would store the raw function value of the
logistic function. So in the case of kNN low is predictive and for
PLR high is predictive. Don't know if there is the need to unify
that.
As the storage and/or computation of this information might be
demanding its collection should be switchable and off be default.
Nomenclature
* predictions : result of the last call to .predict()
* estimates : might be different from predictions if a classifier's predict()
makes a decision based on some internal value such as
probability or a distance.
"""
# Dict that contains the parameters of a classifier.
# This shall provide an interface to plug generic parameter optimizer
# on all classifiers (e.g. grid- or line-search optimizer)
# A dictionary is used because Michael thinks that access by name is nicer.
# Additionally Michael thinks ATM that additional information might be
# necessary in some situations (e.g. reasonably predefined parameter range,
# minimal iteration stepsize, ...), therefore the value to each key should
# also be a dict or we should use mvpa2.base.param.Parameter'...
training_stats = ConditionalAttribute(enabled=False,
doc="Confusion matrix of learning performance")
predictions = ConditionalAttribute(enabled=True,
doc="Most recent set of predictions")
estimates = ConditionalAttribute(enabled=True,
doc="Internal classifier estimates the most recent " +
"predictions are based on")
predicting_time = ConditionalAttribute(enabled=True,
doc="Time (in seconds) which took classifier to predict")
__tags__ = []
"""Describes some specifics about the classifier -- is that it is
doing regression for instance...."""
# TODO: make it available only for actually retrainable classifiers
retrainable = Parameter(False, allowedtype='bool',
doc="""Either to enable retraining for 'retrainable' classifier.""",
index=1002)
def __init__(self, space=None, **kwargs):
# by default we want classifiers to use the 'targets' sample attribute
# for training/testing
if space is None:
space = 'targets'
Learner.__init__(self, space=space, **kwargs)
# XXX
# the place to map literal to numerical labels (and back)
# this needs to be in the base class, since some classifiers also
# have this nasty 'regression' mode, and the code in this class
# needs to deal with converting the regression output into discrete
# labels
# however, preferably the mapping should be kept in the respective
# low-level implementations that need it
self._attrmap = AttributeMap()
self.__trainednfeatures = 0
"""Stores number of features for which classifier was trained.
If 0 -- it wasn't trained at all"""
self._set_retrainable(self.params.retrainable, force=True)
# deprecate
#self.__trainedidhash = None
#.........這裏部分代碼省略.........
示例6: _train
def _train(self, dataset):
"""Train SVM
"""
# XXX watchout
# self.untrain()
newkernel, newsvm = False, False
# local bindings for faster lookup
params = self.params
retrainable = self.params.retrainable
targets_sa_name = self.get_space() # name of targets sa
targets_sa = dataset.sa[targets_sa_name] # actual targets sa
if retrainable:
_changedData = self._changedData
# LABELS
ul = None
self.__traindataset = dataset
# OK -- we have to map labels since
# binary ones expect -1/+1
# Multiclass expect labels starting with 0, otherwise they puke
# when ran from ipython... yikes
if __debug__:
debug("SG_", "Creating labels instance")
if self.__is_regression__:
labels_ = np.asarray(targets_sa.value, dtype='double')
else:
ul = targets_sa.unique
# ul.sort()
if len(ul) == 2:
# assure that we have -1/+1
_labels_dict = {ul[0]:-1.0, ul[1]:+1.0}
elif len(ul) < 2:
raise FailedToTrainError, \
"We do not have 1-class SVM brought into SG yet"
else:
# can't use plain enumerate since we need them swapped
_labels_dict = dict([ (ul[i], i) for i in range(len(ul))])
# Create SG-customized attrmap to assure -1 / +1 if necessary
self._attrmap = AttributeMap(_labels_dict, mapnumeric=True)
if __debug__:
debug("SG__", "Mapping labels using dict %s" % _labels_dict)
labels_ = self._attrmap.to_numeric(targets_sa.value).astype(float)
labels = shogun.Features.Labels(labels_)
_setdebug(labels, 'Labels')
# KERNEL
# XXX cruel fix for now... whole retraining business needs to
# be rethought
if retrainable:
_changedData['kernel_params'] = _changedData.get('kernel_params', False)
# TODO: big RF to move non-kernel classifiers away
if 'kernel-based' in self.__tags__ and (not retrainable
or _changedData['traindata'] or _changedData['kernel_params']):
# If needed compute or just collect arguments for SVM and for
# the kernel
if retrainable and __debug__:
if _changedData['traindata']:
debug("SG",
"Re-Creating kernel since training data has changed")
if _changedData['kernel_params']:
debug("SG",
"Re-Creating kernel since params %s has changed" %
_changedData['kernel_params'])
k = self.params.kernel
k.compute(dataset)
self.__kernel = kernel = k.as_raw_sg()
newkernel = True
self.kernel_params.reset() # mark them as not-changed
#_setdebug(kernel, 'Kernels')
#self.__condition_kernel(kernel)
if retrainable:
if __debug__:
debug("SG_", "Resetting test kernel for retrainable SVM")
self.__kernel_test = None
# TODO -- handle _changedData['params'] correctly, ie without recreating
# whole SVM
Cs = None
if not retrainable or self.__svm is None or _changedData['params']:
# SVM
if self.params.has_key('C'):
#.........這裏部分代碼省略.........
示例7: SVM
#.........這裏部分代碼省略.........
if retrainable:
_changedData = self._changedData
# LABELS
ul = None
self.__traindataset = dataset
# OK -- we have to map labels since
# binary ones expect -1/+1
# Multiclass expect labels starting with 0, otherwise they puke
# when ran from ipython... yikes
if __debug__:
debug("SG_", "Creating labels instance")
if self.__is_regression__:
labels_ = np.asarray(targets_sa.value, dtype='double')
else:
ul = targets_sa.unique
# ul.sort()
if len(ul) == 2:
# assure that we have -1/+1
_labels_dict = {ul[0]:-1.0, ul[1]:+1.0}
elif len(ul) < 2:
raise FailedToTrainError, \
"We do not have 1-class SVM brought into SG yet"
else:
# can't use plain enumerate since we need them swapped
_labels_dict = dict([ (ul[i], i) for i in range(len(ul))])
# Create SG-customized attrmap to assure -1 / +1 if necessary
self._attrmap = AttributeMap(_labels_dict, mapnumeric=True)
if __debug__:
debug("SG__", "Mapping labels using dict %s" % _labels_dict)
labels_ = self._attrmap.to_numeric(targets_sa.value).astype(float)
labels = shogun.Features.Labels(labels_)
_setdebug(labels, 'Labels')
# KERNEL
# XXX cruel fix for now... whole retraining business needs to
# be rethought
if retrainable:
_changedData['kernel_params'] = _changedData.get('kernel_params', False)
# TODO: big RF to move non-kernel classifiers away
if 'kernel-based' in self.__tags__ and (not retrainable
or _changedData['traindata'] or _changedData['kernel_params']):
# If needed compute or just collect arguments for SVM and for
# the kernel
if retrainable and __debug__:
if _changedData['traindata']:
debug("SG",
"Re-Creating kernel since training data has changed")
if _changedData['kernel_params']:
debug("SG",
"Re-Creating kernel since params %s has changed" %
_changedData['kernel_params'])
示例8: plot_decision_boundary_2d
def plot_decision_boundary_2d(dataset, clf=None,
targets=None, regions=None, maps=None,
maps_res=50, vals=None,
data_callback=None):
"""Plot a scatter of a classifier's decision boundary and data points
Assumes data is 2d (no way to visualize otherwise!!)
Parameters
----------
dataset : `Dataset`
Data points to visualize (might be the data `clf` was train on, or
any novel data).
clf : `Classifier`, optional
Trained classifier
targets : string, optional
What samples attributes to use for targets. If None and clf is
provided, then `clf.params.targets_attr` is used.
regions : string, optional
Plot regions (polygons) around groups of samples with the same
attribute (and target attribute) values. E.g. chunks.
maps : string in {'targets', 'estimates'}, optional
Either plot underlying colored maps, such as clf predictions
within the spanned regions, or estimates from the classifier
(might not work for some).
maps_res : int, optional
Number of points in each direction to evaluate.
Points are between axis limits, which are set automatically by
matplotlib. Higher number will yield smoother decision lines but come
at the cost of O^2 classifying time/memory.
vals : array of floats, optional
Where to draw the contour lines if maps='estimates'
data_callback : callable, optional
Callable object to preprocess the new data points.
Classified points of the form samples = data_callback(xysamples).
I.e. this can be a function to normalize them, or cache them
before they are classified.
"""
if vals is None:
vals = [-1, 0, 1]
if False:
## from mvpa2.misc.data_generators import *
## from mvpa2.clfs.svm import *
## from mvpa2.clfs.knn import *
## ds = dumb_feature_binary_dataset()
dataset = normal_feature_dataset(nfeatures=2, nchunks=5,
snr=10, nlabels=4, means=[ [0,1], [1,0], [1,1], [0,0] ])
dataset.samples += dataset.sa.chunks[:, None]*0.1 # slight shifts for chunks ;)
#dataset = normal_feature_dataset(nfeatures=2, nlabels=3, means=[ [0,1], [1,0], [1,1] ])
#dataset = normal_feature_dataset(nfeatures=2, nlabels=2, means=[ [0,1], [1,0] ])
#clf = LinearCSVMC(C=-1)
clf = kNN(4)#LinearCSVMC(C=-1)
clf.train(dataset)
#clf = None
#plot_decision_boundary_2d(ds, clf)
targets = 'targets'
regions = 'chunks'
#maps = 'estimates'
maps = 'targets'
#maps = None #'targets'
res = 50
vals = [-1, 0, 1]
data_callback=None
pl.clf()
if dataset.nfeatures != 2:
raise ValueError('Can only plot a decision boundary in 2D')
Pioff()
a = pl.gca() # f.add_subplot(1,1,1)
attrmap = None
if clf:
estimates_were_enabled = clf.ca.is_enabled('estimates')
clf.ca.enable('estimates')
if targets is None:
targets = clf.get_space()
# Lets reuse classifiers attrmap if it is good enough
attrmap = clf._attrmap
predictions = clf.predict(dataset)
targets_sa_name = targets # bad Yarik -- will rebind targets to actual values
targets_lit = dataset.sa[targets_sa_name].value
utargets_lit = dataset.sa[targets_sa_name].unique
if not (attrmap is not None
and len(attrmap)
and set(clf._attrmap.keys()).issuperset(utargets_lit)):
# create our own
attrmap = AttributeMap(mapnumeric=True)
targets = attrmap.to_numeric(targets_lit)
utargets = attrmap.to_numeric(utargets_lit)
vmin = min(utargets)
vmax = max(utargets)
cmap = pl.cm.RdYlGn # argument
#.........這裏部分代碼省略.........
示例9: AttributeMap
except ValueError, e:
print "Sorry - plotting of estimates isn't full supported for %s. " \
"Got exception %s" % (clf, e)
elif maps == 'targets':
map_values = attrmap.to_numeric(predictions_new).reshape(x.shape)
a.imshow(map_values.T, **imshow_kwargs)
#CS = a.contour(x, y, map_values, vals, zorder=6,
# linestyles=linestyles, extent=extent, colors='k')
# Plot regions belonging to the same pair of attribute given
# (e.g. chunks) and targets attribute
if regions:
chunks_sa = dataset.sa[regions]
chunks_lit = chunks_sa.value
uchunks_lit = chunks_sa.value
chunks_attrmap = AttributeMap(mapnumeric=True)
chunks = chunks_attrmap.to_numeric(chunks_lit)
uchunks = chunks_attrmap.to_numeric(uchunks_lit)
from matplotlib.delaunay.triangulate import Triangulation
from matplotlib.patches import Polygon
# Lets figure out convex halls for each chunk/label pair
for target in utargets:
t_mask = targets == target
for chunk in uchunks:
tc_mask = np.logical_and(t_mask,
chunk == chunks)
tc_samples = dataset.samples[tc_mask]
tr = Triangulation(tc_samples[:, 0],
tc_samples[:, 1])
poly = pl.fill(tc_samples[tr.hull, 0],
示例10: to_lightsvm_format
def to_lightsvm_format(dataset, out, targets_attr='targets',
domain=None, am=None):
"""Export dataset into LightSVM format
Parameters
----------
dataset : Dataset
out
Anything understanding .write(string), such as `File`
targets_attr : string, optional
Name of the samples attribute to be output
domain : {None, 'regression', 'binary', 'multiclass'}, optional
What domain dataset belongs to. If `None`, it would be deduced
depending on the datatype ('regression' if float, classification
in case of int or string, with 'binary'/'multiclass' depending on
the number of unique targets)
am : `AttributeMap` or None, optional
Which mapping to use for storing the non-conformant targets. If
None was provided, new one would be automagically generated
depending on the given/deduced domain.
Returns
-------
am
LightSVM format is an ASCII representation with a single sample per
each line::
output featureIndex:featureValue ... featureIndex:featureValue
where ``output`` is specific for a given domain:
regression
float number
binary
integer labels from {-1, 1}
multiclass
integer labels from {1..ds.targets_attr.nunique}
"""
targets_a = dataset.sa[targets_attr]
targets = targets_a.value
# XXX this all below
# * might become cleaner
# * might be RF to become more generic to be used may be elsewhere as well
if domain is None:
if targets.dtype.kind in ['S', 'i']:
if len(targets_a.unique) == 2:
domain = 'binary'
else:
domain = 'multiclass'
else:
domain = 'regression'
if domain in ['multiclass', 'binary']:
# check if labels are appropriate and provide mapping if necessary
utargets = targets_a.unique
if domain == 'binary' and set(utargets) != set([-1, 1]):
# need mapping
if len(utargets) != 2:
raise ValueError, \
"We need 2 unique targets in %s of %s. Got targets " \
"from set %s" % (targets_attr, dataset, utargets)
if am is None:
am = AttributeMap(dict(zip(utargets, [-1, 1])))
elif set(am.keys()) != set([-1, 1]):
raise ValueError, \
"Provided %s doesn't map into binary " \
"labels -1,+1" % (am,)
elif domain == 'multiclass' \
and set(utargets) != set(range(1, len(utargets)+1)):
if am is None:
am = AttributeMap(dict(zip(utargets,
range(1, len(utargets) + 1))))
elif set(am.keys()) != set([-1, 1]):
raise ValueError, \
"Provided %s doesn't map into multiclass " \
"range 1..N" % (am, )
if am is not None:
# map the targets
targets = am.to_numeric(targets)
for t, s in zip(targets, dataset.samples):
out.write('%g %s\n'
% (t,
' '.join(
'%i:%.8g' % (i, v)
for i,v in zip(range(1, dataset.nfeatures+1), s))))
out.flush() # push it out
return am
示例11: test_attrmap
def test_attrmap():
map_default = {'eins': 0, 'zwei': 2, 'sieben': 1}
map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33}
literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins']
literal_nonmatching = ['uno', 'dos', 'tres']
num_default = [0, 2, 1, 0, 1, 0]
num_custom = [11, 22, 33, 11, 33, 11]
# no custom mapping given
am = AttributeMap()
assert_false(am)
ok_(len(am) == 0)
assert_array_equal(am.to_numeric(literal), num_default)
assert_array_equal(am.to_literal(num_default), literal)
ok_(am)
ok_(len(am) == 3)
#
# Tests for recursive mapping + preserving datatype
class myarray(np.ndarray):
pass
assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0])
literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)]
literal_fancy_tuple = tuple(literal_fancy)
literal_fancy_array = np.array(literal_fancy, dtype=object)
for l in (literal_fancy, literal_fancy_tuple,
literal_fancy_array):
res = am.to_literal(l, recurse=True)
assert_equal(res[0], ('sieben', 'zwei'))
assert_equal(res[1], 'zwei')
assert_equal(res[2], ['eins'])
assert_array_equal(res[3], ['eins', 'sieben'])
# types of result and subsequences should be preserved
ok_(isinstance(res, l.__class__))
ok_(isinstance(res[0], tuple))
ok_(isinstance(res[1], str))
ok_(isinstance(res[2], list))
ok_(isinstance(res[3], myarray))
# yet another example
a = np.empty(1, dtype=object)
a[0] = (0, 1)
res = am.to_literal(a, recurse=True)
ok_(isinstance(res[0], tuple))
#
# with custom mapping
am = AttributeMap(map=map_custom)
assert_array_equal(am.to_numeric(literal), num_custom)
assert_array_equal(am.to_literal(num_custom), literal)
# if not numeric nothing is mapped
assert_array_equal(am.to_numeric(num_custom), num_custom)
# even if the map doesn't fit
assert_array_equal(am.to_numeric(num_default), num_default)
# need to_numeric first
am = AttributeMap()
assert_raises(RuntimeError, am.to_literal, [1,2,3])
# stupid args
assert_raises(ValueError, AttributeMap, map=num_custom)
# map mismatch
am = AttributeMap(map=map_custom)
if __debug__:
# checked only in __debug__
assert_raises(KeyError, am.to_numeric, literal_nonmatching)
# needs reset and should work afterwards
am.clear()
assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1])
# and now reverse
am = AttributeMap(map=map_custom)
assert_raises(KeyError, am.to_literal, num_default)
# dict-like interface
am = AttributeMap()
ok_([(k, v) for k, v in am.iteritems()] == [])