本文整理汇总了Python中numpy.unique函数的典型用法代码示例。如果您正苦于以下问题:Python unique函数的具体用法?Python unique怎么用?Python unique使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unique函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_classifiers_classes
def check_classifiers_classes(name, Classifier):
X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
X, y = shuffle(X, y, random_state=7)
X = StandardScaler().fit_transform(X)
# We need to make sure that we have non negative data, for things
# like NMF
X -= X.min() - .1
y_names = np.array(["one", "two", "three"])[y]
for y_names in [y_names, y_names.astype('O')]:
if name in ["LabelPropagation", "LabelSpreading"]:
# TODO some complication with -1 label
y_ = y
else:
y_ = y_names
classes = np.unique(y_)
# catch deprecation warnings
with warnings.catch_warnings(record=True):
classifier = Classifier()
if name == 'BernoulliNB':
classifier.set_params(binarize=X.mean())
set_fast_parameters(classifier)
# fit
classifier.fit(X, y_)
y_pred = classifier.predict(X)
# training set performance
assert_array_equal(np.unique(y_), np.unique(y_pred))
if np.any(classifier.classes_ != classes):
print("Unexpected classes_ attribute for %r: "
"expected %s, got %s" %
(classifier, classes, classifier.classes_))
示例2: by_lblimg
def by_lblimg(self, lbldata):
"""
Get specific template regions by rois given by user
All regions overlapped with a specific label region will be covered
Parameters:
-----------
lbldata: rois given by user
Return:
-------
out_template: new template contains part of regions
if lbldata has multiple different rois, then new template will extract regions with each of roi given by user
Example:
--------
>>> glr_cls = GetLblRegion(template)
>>> out_template = glr_cls.by_lblimg(lbldata)
"""
assert lbldata.shape == self._template.shape, "the shape of template should be equal to the shape of lbldata"
labels = np.sort(np.unique(lbldata)[1:]).astype('int')
out_template = np.zeros_like(lbldata)
out_template = out_template[...,np.newaxis]
out_template = np.tile(out_template, (1, len(labels)))
for i,lbl in enumerate(labels):
lbldata_tmp = tools.get_specificroi(lbldata, lbl)
lbldata_tmp[lbldata_tmp!=0] = 1
part_template = self._template*lbldata_tmp
template_lbl = np.sort(np.unique(part_template)[1:])
out_template[...,i] = tools.get_specificroi(self._template, template_lbl)
return out_template
示例3: _pick_sources
def _pick_sources(self, data, include, exclude, eid):
"""Aux method."""
fast_dot = _get_fast_dot()
if exclude is None:
exclude = self.exclude
else:
exclude = list(set(list(self.exclude) + list(exclude)))
logger.info('Transforming to Xdawn space')
# Apply unmixing
sources = fast_dot(self.filters_[eid].T, data)
if include not in (None, []):
mask = np.ones(len(sources), dtype=np.bool)
mask[np.unique(include)] = False
sources[mask] = 0.
logger.info('Zeroing out %i Xdawn components' % mask.sum())
elif exclude not in (None, []):
exclude_ = np.unique(exclude)
sources[exclude_] = 0.
logger.info('Zeroing out %i Xdawn components' % len(exclude_))
logger.info('Inverse transforming to sensor space')
data = fast_dot(self.patterns_[eid], sources)
return data
示例4: makeThresholdMap
def makeThresholdMap(image, findCars, scales=[1.5], percentOfHeapmapToToss=.5):
print("scales:", scales, ", type:", type(scales), "image.shape:", image.shape, ", dtype:", image.dtype, ", percentOfHeapmapToToss:", percentOfHeapmapToToss)
boundingBoxList=[]
boundingBoxWeights=[]
for scale in scales:
listOfBoundingBoxes, listOfWeights = findCars(image, scale)
boundingBoxList+=listOfBoundingBoxes
boundingBoxWeights+=listOfWeights
if USEBOUNDINGBOXWEIGHTS:
unNormalizedHeatMap=addWeightedHeat(image.shape, boundingBoxList, boundingBoxWeights)
else:
unNormalizedHeatMap=addHeat(image.shape, boundingBoxList)
if USESTACKOFHEATMAPS:
unNormalizedHeatMap,_=totalHeatmapStack(unNormalizedHeatMap)
unNormalizedHeatMapCounts=np.unique(unNormalizedHeatMap, return_counts=True)
if TESTING: print("makeThresholdMap-unNormalizedHeatMapCounts:", unNormalizedHeatMapCounts, ", len(unNormalizedHeatMapCounts):", len(unNormalizedHeatMapCounts), ", len(unNormalizedHeatMapCounts[0]):", len(unNormalizedHeatMapCounts[0]))
unNormalizedHeatMapMidpoint=unNormalizedHeatMapCounts[0][int(round(len(unNormalizedHeatMapCounts[0])*percentOfHeapmapToToss))]
thresholdMap=applyThreshold(unNormalizedHeatMap, unNormalizedHeatMapMidpoint)
print("makeThresholdMap-max(thresholdMap):", np.max(thresholdMap), ", min(thresholdMap):", np.min(thresholdMap))
if TESTING: print("makeThresholdMap-thresholdMap counts:", (np.unique(thresholdMap, return_counts=True)), ", len(thresholdMap):", len(thresholdMap), ", len(thresholdMap[0]):", len(thresholdMap[0]))
normalizedMap=normalizeMap(thresholdMap)
if TESTING: print("makeThresholdMap-normalizedMap counts:", (np.unique(normalizedMap, return_counts=True)), ", len(normalizedMap):", len(normalizedMap), ", len(normalizedMap[0]):", len(normalizedMap[0]))
print("makeThresholdMap-max(normalizedMap):", np.max(normalizedMap), ", min(normalizedMap):", np.min(normalizedMap))
return normalizedMap, boundingBoxList, unNormalizedHeatMap, boundingBoxWeights
示例5: plot_decision_regions
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
# setup marker generator and color map
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y))])
# plot the decision surface
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
# plot class samples
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
alpha=0.8, c=cmap(idx),
marker=markers[idx], label=cl)
# Highlight test samples
if test_idx:
X_test, y_test = X[test_idx, :], y[test_idx]
plt.scatter(X_test[:, 0],
X_test[:, 1],
c='',
alpha=1.0,
linewidths=1,
marker='o',
s=55, label='test set')
示例6: seems_like_discrete_data
def seems_like_discrete_data(arr, dictionary=None):
if numpy.issubdtype(arr.dtype, numpy.bool_):
#print('seems_like_discrete_data? YES bool')
return True
else:
pass
#print('seems_like_discrete_data? not bool but',arr.dtype)
if dictionary is None:
if len(numpy.unique(arr[:100]))<6:
if len(numpy.unique(arr[:1000])) < 6:
if len(numpy.unique(arr)) < 6:
#print('seems_like_discrete_data? YES uniques < 6')
return True
#print('seems_like_discrete_data? too many and no dictionary')
else:
uniq = numpy.unique(arr)
not_in_dict = 0
for i in uniq:
if i not in dictionary:
not_in_dict += 1
if not_in_dict > 2:
#print(f'seems_like_discrete_data? dictionary but {not_in_dict} missing keys')
return False
else:
#print(f'seems_like_discrete_data? dictionary with {not_in_dict} missing keys')
return True
return False
示例7: test_value_counts_inferred
def test_value_counts_inferred(self):
klasses = [Index, Series]
for klass in klasses:
s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
s = klass(s_values)
expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
tm.assert_series_equal(s.value_counts(), expected)
if isinstance(s, Index):
exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
tm.assert_index_equal(s.unique(), exp)
else:
exp = np.unique(np.array(s_values, dtype=np.object_))
tm.assert_numpy_array_equal(s.unique(), exp)
assert s.nunique() == 4
# don't sort, have to sort after the fact as not sorting is
# platform-dep
hist = s.value_counts(sort=False).sort_values()
expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
tm.assert_series_equal(hist, expected)
# sort ascending
hist = s.value_counts(ascending=True)
expected = Series([1, 2, 3, 4], index=list('cdab'))
tm.assert_series_equal(hist, expected)
# relative histogram.
hist = s.value_counts(normalize=True)
expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
tm.assert_series_equal(hist, expected)
示例8: __init__
def __init__(self, filename, diets = False, ctrlgrp = 99):
self.rawdata = pd.read_csv(filename, sep=" ")
self.rawdata['days'] = self.rawdata['days']/365.0 - 1.0 # scale days
# select subgroups
if diets == False: # select all diet groups
self.data = self.rawdata
else:
self.data = self.rawdata[np.in1d(self.rawdata['diet'], diets)]
# set parameters
self.unidays = np.unique(self.data['days'])
self.unidiets = np.unique(self.data['diet'])
self.ctrlidx = np.where(self.unidiets == ctrlgrp)[0][0]
self.uniids = np.unique(self.data['id'])
self.grp = self.unidiets.size # total number of diets
self.ntot = self.uniids.size # total number of mouse
self.grp_uniids = {}
self.grp_ntot = {}
self.grp_dtot = {}
for g in self.unidiets:
temp = self.data['id'][self.data['diet']==g]
self.grp_uniids.update({g: np.unique(temp)})
# number of total number of measurements in a group
self.grp_dtot.update({g: temp.size})
# number of unique ids in a group
self.grp_ntot.update({g: self.grp_uniids[g].size})
self.id_dtot = {}
for i in self.uniids:
temp = self.data['days'][self.data['id']==i]
# number of measurements for each ids
self.id_dtot.update({i: temp.size})
示例9: fit
def fit(self,X,y=None):
"""Fit a model:
Parameters
----------
X : pandas dataframe or array-like
training samples. If pandas dataframe can handle dict of feature in one column or cnvert a set of columns
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
if not self.dict_feature is None:
if not self.target_readable is None:
self.create_class_id_map(df,self.target,self.target_readable)
(X,y) = self._load_from_dict(df)
num_class = len(np.unique(y))
else:
(X,y,self.vectorizer) = self.convert_numpy(df)
num_class = len(y.unique())
else:
check_X_y(X,y)
num_class = len(np.unique(y))
self.clf = xgb.XGBClassifier(**self.params)
print self.clf.get_params(deep=True)
self.clf.fit(X,y,verbose=True)
return self
示例10: kmeans
def kmeans(xx, centroids, maxIters = 20, minclust=30, maxDiff = 2):
# Cluster Assignment step
ca = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx])
# all clusters have at least minclust?
(unique, counts) = np.unique(ca, return_counts=True)
for cc in counts:
if cc < minclust:
return("error: too few", np.array(centroids), ca)
# Move centroids step
centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])])
iter=1
while (iter<maxIters):
# Cluster Assignment step
canew = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx])
# all clusters have at least minclust?
(unique, counts) = np.unique(canew, return_counts=True)
for cc in counts:
if cc < minclust:
return("error: too few", np.array(centroids), canew)
numdiff = sum(ca != canew)
if numdiff < maxDiff:
return("converged", np.array(centroids), canew)
ca = canew
# Move centroids step
centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])])
iter += 1
return("error: not converged", np.array(centroids), ca)
示例11: _get_obs_index_groups
def _get_obs_index_groups(self):
" Computes index groups for given observation scheme. "
J = np.zeros((self._p, self.num_subpops), dtype=bool)
def any_observed(x):
return x.size > 0
for i in np.where(map(any_observed, self._sub_pops))[0]:
J[self._sub_pops[i],i] = 1
twoexp = np.power(2,np.arange(self.num_subpops))
hsh = np.sum(J*twoexp,1)
lbls = np.unique(hsh)
idx_grp = []
for i in range(lbls.size):
idx_grp.append(np.where(hsh==lbls[i])[0])
obs_idx = []
for i in range(self.num_obstime):
obs_idx.append([])
for j in np.unique(hsh[np.where(J[:,self._obs_pops[i]]==1)]):
obs_idx[i].append(np.where(lbls==j)[0][0])
return tuple(obs_idx), tuple(idx_grp)
示例12: evaluateSpeakerDiarization
def evaluateSpeakerDiarization(flags, flagsGT):
minLength = min( flags.shape[0], flagsGT.shape[0] )
flags = flags[0:minLength]
flagsGT = flagsGT[0:minLength]
uFlags = numpy.unique(flags)
uFlagsGT = numpy.unique(flagsGT)
# compute contigency table:
cMatrix = numpy.zeros(( uFlags.shape[0], uFlagsGT.shape[0] ))
for i in range(minLength):
cMatrix[ int(numpy.nonzero(uFlags==flags[i])[0]), int(numpy.nonzero(uFlagsGT==flagsGT[i])[0]) ] += 1.0
Nc, Ns = cMatrix.shape;
N_s = numpy.sum(cMatrix,axis=0);
N_c = numpy.sum(cMatrix,axis=1);
N = numpy.sum(cMatrix);
purityCluster = numpy.zeros( (Nc,) )
puritySpeaker = numpy.zeros( (Ns,) )
# compute cluster purity:
for i in range(Nc):
purityCluster[i] = numpy.max( (cMatrix[i,:]) )/ (N_c[i]);
for j in range(Ns):
puritySpeaker[j] = numpy.max( (cMatrix[:,j]) )/ (N_s[j]);
purityClusterMean = numpy.sum(purityCluster*N_c) / N;
puritySpeakerMean = numpy.sum(puritySpeaker*N_s) / N;
return purityClusterMean, puritySpeakerMean
示例13: spatio_temporal_src_connectivity
def spatio_temporal_src_connectivity(src, n_times):
"""Compute connectivity for a source space activation over time
Parameters
----------
src : source space
The source space.
n_times : int
Number of time instants
Returns
-------
connectivity : sparse COO matrix
The connectivity matrix describing the spatio-temporal
graph structure. If N is the number of vertices in the
source space, the N first nodes in the graph are the
vertices are time 1, the nodes from 2 to 2N are the vertices
during time 2, etc.
"""
if src[0]['use_tris'] is None:
raise Exception("The source space does not appear to be an ico "
"surface. Connectivity cannot be extracted from "
"non-ico source spaces.")
lh_tris = np.searchsorted(np.unique(src[0]['use_tris']),
src[0]['use_tris'])
rh_tris = np.searchsorted(np.unique(src[1]['use_tris']),
src[1]['use_tris'])
tris = np.concatenate((lh_tris, rh_tris + np.max(lh_tris) + 1))
return spatio_temporal_tris_connectivity(tris, n_times)
示例14: check_and_set_idx
def check_and_set_idx(ids, idx, prefix):
""" Reconciles passed-in IDs and indices and returns indices, as well as unique IDs
in the order specified by the indices. If only IDs supplied, returns the sort-arg
as the index. If only indices supplied, returns None for IDs. If both supplied,
checks that the correspondence is unique and returns unique IDs in the sort order of
the associated index.
:param np.ndarray ids: array of IDs
:param np.ndarray[int] idx: array of indices
:param str prefix: variable name (for error logging)
:return: unique IDs and indices (passed in or derived from the IDs)
:rtype: np.ndarray, np.ndarray
"""
if ids is None and idx is None:
raise ValueError('Both {}_ids and {}_idx cannot be None'.format(prefix, prefix))
if ids is None:
return None, np.asarray_chkfinite(idx)
if idx is None:
return np.unique(ids, return_inverse=True)
else:
ids = np.asarray(ids)
idx = np.asarray_chkfinite(idx)
if len(idx) != len(ids):
raise ValueError('{}_ids ({}) and {}_idx ({}) must have the same length'.format(
prefix, len(ids), prefix, len(idx)))
uniq_idx, idx_sort_index = np.unique(idx, return_index=True)
# make sure each unique index corresponds to a unique id
if not all(len(set(ids[idx == i])) == 1 for i in uniq_idx):
raise ValueError("Each index must correspond to a unique {}_id".format(prefix))
return ids[idx_sort_index], idx
示例15: check_classifiers_classes
def check_classifiers_classes(name, Classifier, X, y, y_names):
if name in ["LabelPropagation", "LabelSpreading"]:
# TODO some complication with -1 label
y_ = y
else:
y_ = y_names
classes = np.unique(y_)
# catch deprecation warnings
with warnings.catch_warnings(record=True):
classifier = Classifier()
# fit
try:
classifier.fit(X, y_)
except Exception as e:
print(e)
y_pred = classifier.predict(X)
# training set performance
assert_array_equal(np.unique(y_), np.unique(y_pred))
accuracy = accuracy_score(y_, y_pred)
assert_greater(accuracy, 0.78,
"accuracy %f of %s not greater than 0.78"
% (accuracy, name))
#assert_array_equal(
#clf.classes_, classes,
#"Unexpected classes_ attribute for %r" % clf)
if np.any(classifier.classes_ != classes):
print("Unexpected classes_ attribute for %r: "
"expected %s, got %s" %
(classifier, classes, classifier.classes_))