当前位置: 首页>>代码示例>>Python>>正文

Python numpy.unique函数代码示例

本文整理汇总了Python中numpy.unique函数的典型用法代码示例。如果您正苦于以下问题:Python unique函数的具体用法?Python unique怎么用?Python unique使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


示例1: check_classifiers_classes

def check_classifiers_classes(name, Classifier):
    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
    X, y = shuffle(X, y, random_state=7)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1
    y_names = np.array(["one", "two", "three"])[y]

    for y_names in [y_names, y_names.astype('O')]:
        if name in ["LabelPropagation", "LabelSpreading"]:
            # TODO some complication with -1 label
            y_ = y
            y_ = y_names

        classes = np.unique(y_)
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            classifier = Classifier()
        if name == 'BernoulliNB':
        # fit
        classifier.fit(X, y_)

        y_pred = classifier.predict(X)
        # training set performance
        assert_array_equal(np.unique(y_), np.unique(y_pred))
        if np.any(classifier.classes_ != classes):
            print("Unexpected classes_ attribute for %r: "
                  "expected %s, got %s" %
                  (classifier, classes, classifier.classes_))

示例2: by_lblimg

    def by_lblimg(self, lbldata):
        Get specific template regions by rois given by user
        All regions overlapped with a specific label region will be covered

        lbldata: rois given by user

        out_template: new template contains part of regions
                      if lbldata has multiple different rois, then new template will extract regions with each of roi given by user

        >>> glr_cls = GetLblRegion(template)
        >>> out_template = glr_cls.by_lblimg(lbldata)
        assert lbldata.shape == self._template.shape, "the shape of template should be equal to the shape of lbldata"
        labels = np.sort(np.unique(lbldata)[1:]).astype('int')
        out_template = np.zeros_like(lbldata)
        out_template = out_template[...,np.newaxis]
        out_template = np.tile(out_template, (1, len(labels)))
        for i,lbl in enumerate(labels):
            lbldata_tmp = tools.get_specificroi(lbldata, lbl)
            lbldata_tmp[lbldata_tmp!=0] = 1
            part_template = self._template*lbldata_tmp
            template_lbl = np.sort(np.unique(part_template)[1:])
            out_template[...,i] = tools.get_specificroi(self._template, template_lbl)
        return out_template

示例3: _pick_sources

    def _pick_sources(self, data, include, exclude, eid):
        """Aux method."""
        fast_dot = _get_fast_dot()
        if exclude is None:
            exclude = self.exclude
            exclude = list(set(list(self.exclude) + list(exclude)))

        logger.info('Transforming to Xdawn space')

        # Apply unmixing
        sources = fast_dot(self.filters_[eid].T, data)

        if include not in (None, []):
            mask = np.ones(len(sources), dtype=np.bool)
            mask[np.unique(include)] = False
            sources[mask] = 0.
            logger.info('Zeroing out %i Xdawn components' % mask.sum())
        elif exclude not in (None, []):
            exclude_ = np.unique(exclude)
            sources[exclude_] = 0.
            logger.info('Zeroing out %i Xdawn components' % len(exclude_))
        logger.info('Inverse transforming to sensor space')
        data = fast_dot(self.patterns_[eid], sources)

        return data

示例4: makeThresholdMap

def makeThresholdMap(image, findCars, scales=[1.5], percentOfHeapmapToToss=.5):
    print("scales:", scales, ", type:", type(scales), "image.shape:", image.shape, ", dtype:", image.dtype, ", percentOfHeapmapToToss:", percentOfHeapmapToToss)
    for scale in scales:
        listOfBoundingBoxes, listOfWeights = findCars(image, scale)

        unNormalizedHeatMap=addWeightedHeat(image.shape, boundingBoxList, boundingBoxWeights)
        unNormalizedHeatMap=addHeat(image.shape, boundingBoxList)


    unNormalizedHeatMapCounts=np.unique(unNormalizedHeatMap, return_counts=True)
    if TESTING: print("makeThresholdMap-unNormalizedHeatMapCounts:", unNormalizedHeatMapCounts, ", len(unNormalizedHeatMapCounts):", len(unNormalizedHeatMapCounts), ", len(unNormalizedHeatMapCounts[0]):", len(unNormalizedHeatMapCounts[0]))
    thresholdMap=applyThreshold(unNormalizedHeatMap, unNormalizedHeatMapMidpoint)
    print("makeThresholdMap-max(thresholdMap):", np.max(thresholdMap), ", min(thresholdMap):", np.min(thresholdMap))
    if TESTING: print("makeThresholdMap-thresholdMap counts:", (np.unique(thresholdMap, return_counts=True)), ", len(thresholdMap):", len(thresholdMap), ", len(thresholdMap[0]):", len(thresholdMap[0]))
    if TESTING: print("makeThresholdMap-normalizedMap counts:", (np.unique(normalizedMap, return_counts=True)), ", len(normalizedMap):", len(normalizedMap), ", len(normalizedMap[0]):", len(normalizedMap[0]))
    print("makeThresholdMap-max(normalizedMap):", np.max(normalizedMap), ", min(normalizedMap):", np.min(normalizedMap))
    return normalizedMap, boundingBoxList, unNormalizedHeatMap, boundingBoxWeights

示例5: plot_decision_regions

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)

    # Highlight test samples
    if test_idx:
        X_test, y_test = X[test_idx, :], y[test_idx]
        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    s=55, label='test set')

示例6: seems_like_discrete_data

def seems_like_discrete_data(arr, dictionary=None):
	if numpy.issubdtype(arr.dtype, numpy.bool_):
		#print('seems_like_discrete_data? YES bool')
		return True
		#print('seems_like_discrete_data? not bool but',arr.dtype)
	if dictionary is None:
		if len(numpy.unique(arr[:100]))<6:
			if len(numpy.unique(arr[:1000])) < 6:
				if len(numpy.unique(arr)) < 6:
					#print('seems_like_discrete_data? YES uniques < 6')
					return True
		#print('seems_like_discrete_data? too many and no dictionary')
		uniq = numpy.unique(arr)
		not_in_dict = 0
		for i in uniq:
			if i not in dictionary:
				not_in_dict += 1
		if not_in_dict > 2:
			#print(f'seems_like_discrete_data? dictionary but {not_in_dict} missing keys')
			return False
			#print(f'seems_like_discrete_data? dictionary with {not_in_dict} missing keys')
			return True
	return False

示例7: test_value_counts_inferred

    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            if isinstance(s, Index):
                exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
                tm.assert_index_equal(s.unique(), exp)
                exp = np.unique(np.array(s_values, dtype=np.object_))
                tm.assert_numpy_array_equal(s.unique(), exp)

            assert s.nunique() == 4
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

示例8: __init__

    def __init__(self, filename, diets = False, ctrlgrp = 99):
        self.rawdata = pd.read_csv(filename, sep=" ")
        self.rawdata['days'] = self.rawdata['days']/365.0 - 1.0 # scale days

        # select subgroups
        if diets == False:  # select all diet groups
            self.data = self.rawdata
            self.data = self.rawdata[np.in1d(self.rawdata['diet'], diets)]

        # set parameters
        self.unidays = np.unique(self.data['days'])
        self.unidiets = np.unique(self.data['diet'])
        self.ctrlidx = np.where(self.unidiets == ctrlgrp)[0][0]
        self.uniids = np.unique(self.data['id'])
        self.grp = self.unidiets.size # total number of diets
        self.ntot = self.uniids.size # total number of mouse
        self.grp_uniids = {}
        self.grp_ntot = {}
        self.grp_dtot = {}
        for g in self.unidiets:
            temp = self.data['id'][self.data['diet']==g]
            self.grp_uniids.update({g: np.unique(temp)})
            # number of total number of measurements in a group
            self.grp_dtot.update({g: temp.size})
            # number of unique ids in a group
            self.grp_ntot.update({g: self.grp_uniids[g].size})
        self.id_dtot = {}
        for i in self.uniids:
            temp = self.data['days'][self.data['id']==i]
            # number of measurements for each ids
            self.id_dtot.update({i: temp.size})

示例9: fit

    def fit(self,X,y=None):
        """Fit a model: 


        X : pandas dataframe or array-like
           training samples. If pandas dataframe can handle dict of feature in one column or cnvert a set of columns
        y : array like, required for array-like X and not used presently for pandas dataframe
           class labels

        self: object

        if isinstance(X,pd.DataFrame):
            df = X
            if not self.dict_feature is None:
                if not self.target_readable is None:
                (X,y) = self._load_from_dict(df)
                num_class = len(np.unique(y))
                (X,y,self.vectorizer) = self.convert_numpy(df)
                num_class = len(y.unique())
            num_class = len(np.unique(y))

        self.clf = xgb.XGBClassifier(**self.params)
        print self.clf.get_params(deep=True)
        return self

示例10: kmeans

def kmeans(xx, centroids, maxIters = 20, minclust=30, maxDiff = 2):

  # Cluster Assignment step
  ca = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx])
  # all clusters have at least minclust?
  (unique, counts) = np.unique(ca, return_counts=True)
  for cc in counts:
    if cc < minclust:
      return("error: too few", np.array(centroids), ca)
  # Move centroids step
  centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])])

  while (iter<maxIters):
      # Cluster Assignment step
      canew = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in xx])
      # all clusters have at least minclust?
      (unique, counts) = np.unique(canew, return_counts=True)
      for cc in counts:
        if cc < minclust:
          return("error: too few", np.array(centroids), canew)
      numdiff = sum(ca != canew)
      if numdiff < maxDiff:
        return("converged", np.array(centroids), canew)
      ca = canew
      # Move centroids step
      centroids = np.array([xx[ca == k].mean(axis = 0) for k in range(centroids.shape[0])])
      iter += 1

  return("error: not converged", np.array(centroids), ca)

示例11: _get_obs_index_groups

	def _get_obs_index_groups(self):
		" Computes index groups for given observation scheme. "

		J = np.zeros((self._p, self.num_subpops), dtype=bool) 

		def any_observed(x):
			return x.size > 0

		for i in np.where(map(any_observed, self._sub_pops))[0]:
			J[self._sub_pops[i],i] = 1

		twoexp = np.power(2,np.arange(self.num_subpops))
		hsh = np.sum(J*twoexp,1)                     

		lbls = np.unique(hsh)

		idx_grp = []
		for i in range(lbls.size):

		obs_idx = []
		for i in range(self.num_obstime):
			for j in np.unique(hsh[np.where(J[:,self._obs_pops[i]]==1)]):

		return tuple(obs_idx), tuple(idx_grp)

示例12: evaluateSpeakerDiarization

def evaluateSpeakerDiarization(flags, flagsGT):

	minLength = min( flags.shape[0], flagsGT.shape[0] )
	flags = flags[0:minLength]
	flagsGT = flagsGT[0:minLength]

	uFlags = numpy.unique(flags)
	uFlagsGT = numpy.unique(flagsGT)	

	# compute contigency table:
	cMatrix = numpy.zeros(( uFlags.shape[0], uFlagsGT.shape[0] ))
	for i in range(minLength):
		cMatrix[ int(numpy.nonzero(uFlags==flags[i])[0]), int(numpy.nonzero(uFlagsGT==flagsGT[i])[0]) ] += 1.0

	Nc, Ns = cMatrix.shape;
	N_s = numpy.sum(cMatrix,axis=0);
	N_c = numpy.sum(cMatrix,axis=1);
	N   = numpy.sum(cMatrix);

	purityCluster = numpy.zeros( (Nc,) )
	puritySpeaker = numpy.zeros( (Ns,) )
	# compute cluster purity:
	for i in range(Nc):
		purityCluster[i] = numpy.max( (cMatrix[i,:]) )/ (N_c[i]);

	for j in range(Ns):
		puritySpeaker[j] = numpy.max( (cMatrix[:,j]) )/ (N_s[j]);

	purityClusterMean = numpy.sum(purityCluster*N_c) / N;
	puritySpeakerMean = numpy.sum(puritySpeaker*N_s) / N;
	return purityClusterMean, puritySpeakerMean

示例13: spatio_temporal_src_connectivity

def spatio_temporal_src_connectivity(src, n_times):
    """Compute connectivity for a source space activation over time

    src : source space
        The source space.

    n_times : int
        Number of time instants

    connectivity : sparse COO matrix
        The connectivity matrix describing the spatio-temporal
        graph structure. If N is the number of vertices in the
        source space, the N first nodes in the graph are the
        vertices are time 1, the nodes from 2 to 2N are the vertices
        during time 2, etc.

    if src[0]['use_tris'] is None:
        raise Exception("The source space does not appear to be an ico "
                        "surface. Connectivity cannot be extracted from "
                        "non-ico source spaces.")
    lh_tris = np.searchsorted(np.unique(src[0]['use_tris']),
    rh_tris = np.searchsorted(np.unique(src[1]['use_tris']),
    tris = np.concatenate((lh_tris, rh_tris + np.max(lh_tris) + 1))
    return spatio_temporal_tris_connectivity(tris, n_times)

示例14: check_and_set_idx

def check_and_set_idx(ids, idx, prefix):
    """ Reconciles passed-in IDs and indices and returns indices, as well as unique IDs
    in the order specified by the indices.  If only IDs supplied, returns the sort-arg
    as the index.  If only indices supplied, returns None for IDs.  If both supplied,
    checks that the correspondence is unique and returns unique IDs in the sort order of
    the associated index.
    :param np.ndarray ids: array of IDs
    :param np.ndarray[int] idx: array of indices
    :param str prefix: variable name (for error logging)
    :return: unique IDs and indices (passed in or derived from the IDs)
    :rtype: np.ndarray, np.ndarray
    if ids is None and idx is None:
        raise ValueError('Both {}_ids and {}_idx cannot be None'.format(prefix, prefix))
    if ids is None:
        return None, np.asarray_chkfinite(idx)
    if idx is None:
        return np.unique(ids, return_inverse=True)
        ids = np.asarray(ids)
        idx = np.asarray_chkfinite(idx)
        if len(idx) != len(ids):
            raise ValueError('{}_ids ({}) and {}_idx ({}) must have the same length'.format(
                prefix, len(ids), prefix, len(idx)))
        uniq_idx, idx_sort_index = np.unique(idx, return_index=True)
        # make sure each unique index corresponds to a unique id
        if not all(len(set(ids[idx == i])) == 1 for i in uniq_idx):
            raise ValueError("Each index must correspond to a unique {}_id".format(prefix))
        return ids[idx_sort_index], idx

示例15: check_classifiers_classes

def check_classifiers_classes(name, Classifier, X, y, y_names):
    if name in ["LabelPropagation", "LabelSpreading"]:
        # TODO some complication with -1 label
        y_ = y
        y_ = y_names

    classes = np.unique(y_)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    # fit
        classifier.fit(X, y_)
    except Exception as e:

    y_pred = classifier.predict(X)
    # training set performance
    assert_array_equal(np.unique(y_), np.unique(y_pred))
    accuracy = accuracy_score(y_, y_pred)
    assert_greater(accuracy, 0.78,
                   "accuracy %f of %s not greater than 0.78"
                   % (accuracy, name))
        #clf.classes_, classes,
        #"Unexpected classes_ attribute for %r" % clf)
    if np.any(classifier.classes_ != classes):
        print("Unexpected classes_ attribute for %r: "
              "expected %s, got %s" %
              (classifier, classes, classifier.classes_))
