当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.binarize函数代码示例

本文整理汇总了Python中sklearn.preprocessing.binarize函数的典型用法代码示例。如果您正苦于以下问题:Python binarize函数的具体用法?Python binarize怎么用?Python binarize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了binarize函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ig

def ig(X, y):
    """
    This method calculates the information gain for two random variables I(X, Y).
    """

    # binarization: from counts to presence/abscence
    binarize(X, threshold=0.0, copy=False)

    # una columna por cada clase
    Y = LabelBinarizer().fit_transform(y)
    if Y.shape[1] == 1: # binary problem case
        Y = np.append(1-Y, Y, axis=1)

    Y_prob = (np.sum(Y, axis=0, dtype=np.float64) / len(Y)).reshape(-1, 1)

    # calculate the class entropy H(Y)
    class_entropy = _entropy(Y_prob)

    X_y_count = safe_sparse_dot(Y.T, X)
    # TODO XXX FIXME ver si estoy calculando bien esta probabilidad
    X_y_prob = \
        X_y_count / np.sum(X_y_count, axis=0, dtype=np.float64)

    # calculate the conditional entropy of the class given the feature H(y|f_i)
    cond_entropy = _entropy(X_y_prob) # TODO XXX FIXME ver si estoy calculando bien la entropia condicional
    print "class:", class_entropy
    print "cond_entropy:", cond_entropy

    infogain = class_entropy - cond_entropy

    return infogain, None
开发者ID:mac2bua,项目名称:text_feature_selection,代码行数:31,代码来源:feature_scoring.py

示例2: bns

def bns(X, y):
    """
    Implements the bi-normal separation scoring.
    """

    # binarization: from counts to presence/abscence
    binarize(X, threshold=0.0, copy=False)

    # one column per class
    Y = LabelBinarizer().fit_transform(y)
    if Y.shape[1] == 1: # binary problem case
        Y = np.append(1-Y, Y, axis=1)

    pos = np.sum(Y, axis=0)
    neg = Y.shape[0] - pos

    tp = safe_sparse_dot(X.T, Y)
    fp = np.sum(tp, axis=1).reshape(-1, 1) - tp

    tpr = bounded(tp/pos.astype(float))
    fpr = bounded(fp/neg.astype(float))

    bns = np.abs(_z_score(tpr) - _z_score(fpr))

    return bns[:,1], None
开发者ID:mac2bua,项目名称:text_feature_selection,代码行数:25,代码来源:feature_scoring.py

示例3: test_binarize

    def test_binarize(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.preprocessing.binarize()
        expected = pp.binarize(iris.data)

        self.assertTrue(isinstance(result, pdml.ModelFrame))
        self.assert_numpy_array_almost_equal(result.data.values, expected)
        self.assert_index_equal(result.columns, df.data.columns)

        result = df.preprocessing.binarize(threshold=5)
        expected = pp.binarize(iris.data, threshold=5)

        self.assertTrue(isinstance(result, pdml.ModelFrame))
        self.assert_numpy_array_almost_equal(result.data.values, expected)
        self.assert_index_equal(result.columns, df.data.columns)

        s = df['sepal length (cm)']
        self.assertTrue(isinstance(s, pdml.ModelSeries))
        result = s.preprocessing.binarize()
        expected = pp.binarize(iris.data[:, 0])[0]

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
        self.assertEqual(result.name, 'sepal length (cm)')

        result = s.preprocessing.binarize(threshold=6)
        expected = pp.binarize(iris.data[:, 0], threshold=6)[0]

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
        self.assertEqual(result.name, 'sepal length (cm)')
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:33,代码来源:test_preprocessing.py

示例4: predict

 def predict(self, X):
     ''' Predict class labels. '''
     if self.mode == 'average':
         return binarize(self.predict_proba(X)[:,[1]], 0.5)
     else:
         res = binarize(X, 0.5)
         return np.apply_along_axis(lambda x: np.bincount(x.astype(int), self.weights).argmax(), axis=1, arr=res)
开发者ID:amitsingh2783,项目名称:kaggle,代码行数:7,代码来源:transform.py

示例5: get_score

def get_score(X, y, clf, scoring = 'accuracy'):
    from sklearn.preprocessing import binarize

    if scoring == 'accuracy':
        from sklearn.metrics import accuracy_score
        score = accuracy_score(y, binarize(clf.predict(X), 0.5))
    elif scoring =='f1':
        from sklearn.metrics import f1_score
        score = f1_score(y, binarize(clf.predict(X), 0.5))
    else:
        score = clf.score(X, y)

    return score
开发者ID:jdnc,项目名称:ml-project,代码行数:13,代码来源:classify.py

示例6: do_transformations

    def do_transformations(self):
        # binarize counts
        if self.transform == 'binarize':
            print "Binarizing"
            self.feature_counts = binarize(self.feature_counts, copy=False)
            #self.feature_counts = sparse.csr_matrix(self.feature_counts > 0, dtype=int)

        elif self.transform == 'tfidf':
            print "Doing tf-idf transform"
            #doc_sums = self.feature_counts.sum(axis=1)
            #if np.min(doc_sums) == 0:
            #    doc_sums[doc_sums == 0] = 1.0
            #tf = sparse.csr_matrix(self.feature_counts.multiply(1.0/doc_sums))

            n_items, n_features = self.feature_counts.shape
            tf = normalize(self.feature_counts, norm='l1', axis=1, copy=False)
            doc_counts = self.vocab.get_all_doc_counts()
            n_docs = doc_counts.max()
            # add one to avoid zeros which might screw up the matrix size
            idf = sparse.csr_matrix(np.log(float(n_docs+1) / doc_counts), dtype=float)
            print tf.shape, idf.shape
            self.feature_counts = tf.multiply(idf)
            assert self.feature_counts.shape == (n_items, n_features)

        elif self.transform == 'normalizel1' or self.transform == 'normalize':
            print "Normalizing rows"
            self.feature_counts = normalize(self.feature_counts, norm='l1', axis=1, copy=False)

        elif self.transform == 'normalizel2':
            print "Normalizing rows"
            self.feature_counts = normalize(self.feature_counts, norm='l2', axis=1, copy=False)

        if self.scale_factor is not None:
            self.feature_counts = self.feature_counts * self.scale_factor
开发者ID:dallascard,项目名称:guac,代码行数:34,代码来源:feature_extractor_counts.py

示例7: resc

def resc(patch):
    """
    :param patch:  [image,mask]
    :return: random rescaling of the pair [image,mask]

    --- Rescaling reinforces axons size diversity ---
    """


    s = random.choice([0.5, 0.75, 1.0, 1.5, 2.0])
    data_rescale=[]
    for scale in s:

        image_rescale = rescale(patch[0], scale)
        mask_rescale = rescale(patch[1], scale)
        s_r = mask_rescale.shape[0]
        q_h, r_h = divmod(256-s_r,2)

        if q_h > 0 :
            image_rescale = np.pad(image_rescale,(q_h, q_h+r_h), mode = "reflect")
            mask_rescale = np.pad(mask_rescale,(q_h, q_h+r_h), mode = "reflect")
        else :
            patches = extract_patch(image_rescale,mask_rescale, 256)
            i = np.random.randint(len(patches), size=1)
            image_rescale,mask_rescale = patches[i]

        mask_rescale = preprocessing.binarize(np.array(mask_rescale), threshold=0.001)
        data_rescale = [image_rescale, mask_rescale]

    return data_rescale
开发者ID:vherman3,项目名称:AxonSegmentation,代码行数:30,代码来源:input_data.py

示例8: op_vs_ip

def op_vs_ip(subid, image_types, imagepaths, op_direc, overlays):
	
	
	img_data_group=[]
	img_shape_group=[]
	ol_data_group=[]
	ol_shape_group=[]
	for i, path in enumerate(imagepaths):	

		axial_slice, cor_slice, sag_slice, img_aspect_axial, img_aspect_cor, img_aspect_sag = pull_midslices(path)
		if os.path.isfile(overlays[i]):
			axial_slice_ol, cor_slice_ol, sag_slice_ol, img_aspect_axial_ol, img_aspect_cor_ol, img_aspect_sag_ol = pull_midslices(overlays[i])
			ol_data_group.append([axial_slice_ol, cor_slice_ol, sag_slice_ol])
			ol_shape_group.append([img_aspect_axial_ol, img_aspect_cor_ol, img_aspect_sag_ol])
		else:
			ol_data_group.append(['null','null','null'])
			ol_shape_group.append(['null','null','null'])
		## Append to Matrices
		img_data_group.append([axial_slice, cor_slice, sag_slice])
		img_shape_group.append([img_aspect_axial,img_aspect_cor,img_aspect_sag])
		


	my_cmap=plt.cm.gray


	fig, axarr = plt.subplots(ncols=np.shape(img_shape_group)[1], nrows=np.shape(img_shape_group)[0], figsize=(np.shape(img_shape_group)[0]*5,np.shape(img_shape_group)[1]*5))
	plt.suptitle(subid+' File Comparison', fontsize=20)	
	
	titlearray=['Axial', 'Coronal', 'Saggital']
	
	for x in range(0,np.shape(img_shape_group)[0]):
		for y in range(0,np.shape(img_shape_group)[1]):
			im = axarr[x, y].imshow(img_data_group[x][y], cmap=my_cmap, aspect=img_shape_group[x][y])
			axarr[x, y].set_xlabel('(Right) Radiological Convention (Left)', fontsize=10)
			axarr[x, y].set_title(image_types[x]+' '+titlearray[y])
			#divider = make_axes_locatable(axarr[x, y])
			#cax_ = divider.append_axes("right", size="5%", pad=0.05)
			#cbar = plt.colorbar(im, cax=cax_, ticks=MultipleLocator(round(np.max(img_data_group[x][y])/5, 1)))
			axarr[x, y].xaxis.set_visible(False)
			axarr[x, y].yaxis.set_visible(False)




			if os.path.isfile(overlays[x]):
				if x == 1:
					thresh=0.25
				if x == 2:
					thresh=0.4
				sl=np.array(ol_data_group[x][y]).astype(np.float64)
				sl=filters.sobel(sl)
				sl=preprocessing.binarize(sl, np.max(sl)*thresh)
				sl[sl < 1] = 'Nan'
				axarr[x, y].imshow(sl, cmap='autumn', aspect=ol_shape_group[x][y])

	#plt.show()
	plt.tight_layout()
	plt.autoscale()
	plt.savefig(op_direc)
开发者ID:DaveOC90,项目名称:Tissue-Segmentation,代码行数:60,代码来源:plot_overlay_imgs.py

示例9: example2

def example2():
    """方法2[推荐]
    """
    X = np.array([[1, -1,  2], ## "f"非常重要,为了标准化,矩阵元素必须是浮点类型
                  [2,  0,  0],
                  [0,  1, -1]], dtype = "f")
    print("binarized X = \n%s\n" % preprocessing.binarize(X, threshold=1.1))
开发者ID:MacHu-GWU,项目名称:six-demon-bag,代码行数:7,代码来源:binarize.py

示例10: elastic_transform

def elastic_transform(image, gt, alpha, sigma, random_state=None):
    """
    :param image: image
    :param gt: ground truth
    :param alpha: deformation coefficient (high alpha -> strong deformation)
    :param sigma: std of the gaussian filter. (high sigma -> smooth deformation)
    :param random_state:
    :return: deformation of the pair [image,mask]
    """

    if random_state is None:
        random_state = np.random.RandomState(None)

    shape = image.shape

    d = 4
    sub_shape = (shape[0]/d, shape[0]/d)

    deformations_x = random_state.rand(*sub_shape) * 2 - 1
    deformations_y = random_state.rand(*sub_shape) * 2 - 1

    deformations_x = np.repeat(np.repeat(deformations_x, d, axis=1), d, axis = 0)
    deformations_y = np.repeat(np.repeat(deformations_y, d, axis=1), d, axis = 0)

    dx = gaussian_filter(deformations_x, sigma, mode="constant", cval=0) * alpha
    dy = gaussian_filter(deformations_y, sigma, mode="constant", cval=0) * alpha

    x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]))
    indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1))

    elastic_image = map_coordinates(image, indices, order=1).reshape(shape)
    elastic_gt = map_coordinates(gt, indices, order=1).reshape(shape)
    elastic_gt = preprocessing.binarize(np.array(elastic_gt), threshold=0.5)

    return [elastic_image, elastic_gt]
开发者ID:vherman3,项目名称:AxonSegmentation,代码行数:35,代码来源:input_data.py

示例11: test_model

def test_model(LRM, data, TBI=False):

	global GAMMA

	y = list(data.TBResult.values)

	test_recs = list(data.StudyNum.values)

	X = data.drop(['StudyNum','TBResult'],axis=1)

	probs = LRM.predict_proba(X)[:,1]

	"""
	Calculate AUC acc using ROC analysis
	"""
	# Get FPR and TPR for the test set
	fpr, tpr, thresh = roc_curve(y,probs)
	# Calc AUC acc
	auc_acc = auc(fpr,tpr)

	pred = map(int,binarize(np.array(probs).reshape(1,-1),threshold = GAMMA)[0])


	if not TBI:
		ACC,SENS,SPEC = eval_model(pred, y)
		return [ACC,SENS,SPEC,auc_acc]

	else:
		ACC,SENS,SPEC = eval_model(pred, y, probs=probs, test_px=test_recs, TBI=True)
		return [ACC,SENS,SPEC]
开发者ID:Rendiere,项目名称:Masters_Round3,代码行数:30,代码来源:vanilla_5AUG.py

示例12: load_data

    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        from neurosynth.analysis.reduce import average_within_regions

        # Load Masks by studies matrix

        # ADD FEATURE TO FILTER BY FEATURES
        masks_by_studies = average_within_regions(self.dataset, self.mask_img, threshold = self.thresh)

        study_ids = self.dataset.feature_table.data.index

        print "Loading data from neurosynth..."

        pb = tools.ProgressBar(len(list(masks_by_studies)), start=True)

        self.ids_by_masks = []
        self.data_by_masks = []
        for mask in masks_by_studies:

            m_ids = study_ids[np.where(mask == True)[0]]
            self.ids_by_masks.append(m_ids)
            self.data_by_masks.append(self.dataset.get_feature_data(ids=m_ids))
            pb.next()

        self.mask_num = masks_by_studies.shape[0]    
        self.mask_pairs = list(itertools.permutations(range(0, self.mask_num), 2))

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num, self.mask_num))
        # Load data
        for pair in self.mask_pairs:
            reg1_ids = self.ids_by_masks[pair[0]]
            reg2_ids = self.ids_by_masks[pair[1]]

            reg1_set = list(set(reg1_ids) - set(reg2_ids))
            reg2_set = list(set(reg2_ids) - set(reg1_ids))

            x1 = self.data_by_masks[pair[0]]
            x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]]

            x2 = self.data_by_masks[pair[1]]
            x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] 

            y = np.array([0]*len(reg1_set) + [1]*len(reg2_set))

            X = np.vstack((x1, x2))

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            self.c_data[pair] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []
开发者ID:margulies,项目名称:NS_Classify,代码行数:59,代码来源:multipleclassifier.py

示例13: transform

    def transform(self, X):
        """Compute the Jaccard similarity for all pairs of elements in ``X``.

        Rows i in ``X`` are assumed to represent pairs, where
        ``X[i, :n_features]`` and ``X[i, n_features:]`` correspond to their two
        individual elements, each representing a set. Calling ``transform``
        computes the Jaccard similarity between these sets, i.e. such that
        ``Xt[i]`` is the Jaccard similarity of ``X[i, :n_features]`` and
        ``X[i, n_features:]``.

        Parameters
        ----------
        :param X: array-like, shape (n_samples, n_features)
            Input data.

        Returns
        -------
        :returns: Xt array-like, shape (n_samples, 1)
            The transformed data.
        """
        n_samples, n_features_all = X.shape
        n_features = n_features_all // 2

        X = binarize(X)
        X1 = X[:, :n_features]
        X2 = X[:, n_features:]

        sparse = sp.issparse(X)

        if sparse and not sp.isspmatrix_csr(X):
            X = X.tocsr()

        if sparse:
            if X.data.sum() == 0:
                return np.zeros((n_samples, 1))

            numerator = np.asarray(X1.multiply(X2).sum(axis=1)).ravel()

            X_sum = X1 + X2
            X_sum.data[X_sum.data != 0.] = 1
            M = X_sum.sum(axis=1)
            A = M.getA()
            denominator = A.reshape(-1,)

        else:
            if len(X[X.nonzero()]) == 0.:
                return np.zeros((n_samples, 1))

            numerator = (X1 * X2).sum(axis=1)

            X_sum = X1 + X2
            X_sum[X_sum.nonzero()] = 1
            denominator = X_sum.sum(axis=1)

        with np.errstate(divide="ignore", invalid="ignore"):
            Xt = numerator / denominator
            Xt[np.where(denominator == 0)[0]] = 0.

        return np.array(Xt).reshape(-1, 1)
开发者ID:jochenklein,项目名称:beard,代码行数:59,代码来源:pairs.py

示例14: eval_model

def eval_model(preds, y_ref, probs = [], test_px = [], TBI = False):

	global GAMMA

	if len(preds) != len(y_ref):
		print "Predicted labels and test labels dont have the same dimensions!"
		print "Predicted: ", n_pred, "; Tests: ", n_test
		exit()


	if not TBI:
		CM = confusion_matrix(y_ref, preds)

		TP = CM[1,1]
		TN = CM[0,0]
		FP = CM[0,1]
		FN = CM[1,0]

		ACC = (TP+TN)/float(TP+TN+FP+FN)
		SENS = TP/float(TP+FN)
		SPEC = TN/float(TN+FP)

		return ACC,SENS,SPEC

	else:

		i = np.arange(len(test_px))

		df = pd.DataFrame({"Recording": pd.Series(test_px,index = i),
                            "Prediction": pd.Series(preds,index = i),
                            "Reference": pd.Series(y_ref,index = i),
                            "Probabilities": pd.Series(probs,index = i)
                            }).sort_values(by="Recording")

		y_test_rec 	= []
		TBI_list	= []

		for name, group in df.groupby("Recording"):
			l = group.Reference.iloc[0]
			y_test_rec.append(l)

			TB_prob = sum(group.Probabilities.values) / float(len(group.Probabilities))
			TBI_list.append(TB_prob)


		diagnosis_list = map(int,binarize(np.array(TBI_list).reshape(1,-1),threshold = GAMMA)[0])

		CM = confusion_matrix(y_test_rec, diagnosis_list)

		TP = CM[1,1]
        TN = CM[0,0]
        FP = CM[0,1]
        FN = CM[1,0]

        ACC = (TP+TN)/float(TP+TN+FP+FN)
        SENS = TP/float(TP+FN)
        SPEC = TN/float(TN+FP)

        return ACC, SENS, SPEC
开发者ID:Rendiere,项目名称:Masters_Round3,代码行数:59,代码来源:vanilla_5AUG.py

示例15: predictClass

 def predictClass(self, threshold=0.5):
     # prediction
     # self.pred_y = self.model.predict(self.test_set_X)
     if self.is_keras:
         self.pred_y_prob = self.model.predict_proba(self.test_set_X)[:, 0]
     else:
         self.pred_y_prob = self.model.predict_proba(self.test_set_X)[:, 1]
     self.pred_y = binarize(self.pred_y_prob.reshape(1, -1), threshold)[0].astype(int)
开发者ID:asalomatov,项目名称:variants,代码行数:8,代码来源:train.py


注:本文中的sklearn.preprocessing.binarize函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。