当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.LabelBinarizer方法代码示例

本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.LabelBinarizer方法的具体用法?Python preprocessing.LabelBinarizer怎么用?Python preprocessing.LabelBinarizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing的用法示例。


在下文中一共展示了preprocessing.LabelBinarizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mmb_evaluate_model

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
开发者ID:egaus,项目名称:MaliciousMacroBot,代码行数:22,代码来源:mmbot.py

示例2: test_sklearn_labelbin

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_sklearn_labelbin(self):

        m = np.array([1.0, .81, .85, .81, .85, .81])
        u = np.array([1.0, .23, .50, .23, .30, 0.13])

        # Create the train dataset.
        X_train, true_links = binary_vectors(
            1000, 500, m=m, u=u, random_state=535, return_links=True)

        binarizer = LabelBinarizer()
        binarizer.fit(X_train.iloc[:, 0])
        assert len(binarizer.classes_) == 1

        binarizer.classes_ = np.array([0, 1])
        assert len(binarizer.classes_) == 2

        binarizer.transform(X_train.iloc[:, 1])
        assert len(binarizer.classes_) == 2 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:20,代码来源:test_classify.py

示例3: formatClass

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def formatClass(rootFile, Cl):
    import sklearn.preprocessing as pp
    print('==========================================================================\n')
    print(' Running basic TensorFlow. Creating class data in binary form...')
    Cl2 = pp.LabelBinarizer().fit_transform(Cl)
    
    import matplotlib.pyplot as plt
    plt.hist([float(x) for x in Cl], bins=np.unique([float(x) for x in Cl]), edgecolor="black")
    plt.xlabel('Class')
    plt.ylabel('Occurrances')
    plt.title('Class distibution')
    plt.savefig(rootFile + '_ClassDistrib.png', dpi = 160, format = 'png')  # Save plot
    if tfDef.plotClassDistribTF == True:
        print(' Plotting Class distibution \n')
        plt.show()
    
    return Cl2

#******************************************************************************** 
开发者ID:feranick,项目名称:SpectralMachine,代码行数:21,代码来源:SpectraLearnPredict.py

示例4: test_cross_val_predict

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96) 
开发者ID:civisanalytics,项目名称:muffnn,代码行数:18,代码来源:test_mlp_classifier.py

示例5: bio_classification_report

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def bio_classification_report(y_true, y_pred):
    """
    Classification report for a l ist of BIOSE-encoded sequences.
    It computes token-level metrics and discards 'O' labels.
    :param y_true:
    :param y_pred:
    :return:
    """
    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(y_true)
    y_pred_combined = lb.transform(y_pred)

    tagset = set(lb.classes_) - {'O'}
    tagset = set(lb.classes_)
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {
        cls: idx for idx, cls in enumerate(lb.classes_)
    }

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset
    ) 
开发者ID:baiyyang,项目名称:medical-entity-recognition,代码行数:27,代码来源:crf_unit.py

示例6: eval

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        y_pred = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            y_pred.append(preds)

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
        y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        )) 
开发者ID:jiaeyan,项目名称:Jiayan,代码行数:25,代码来源:crf_sent_tagger.py

示例7: train

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def train(self, x, y):
        """
        Training multiple estimators each for distinguishing a pair of classes.

        Args:
            x (numpy.ndarray): input points
            y (numpy.ndarray): input labels
        Raises:
            Exception: given all data points are assigned to the same class,
                        the prediction would be boring
        """
        self.label_binarizer_ = LabelBinarizer(neg_label=0)
        Y = self.label_binarizer_.fit_transform(y)
        self.classes = self.label_binarizer_.classes_
        columns = (np.ravel(col) for col in Y.T)
        self.estimators = []
        for _, column in enumerate(columns):
            unique_y = np.unique(column)
            if len(unique_y) == 1:
                raise Exception("given all data points are assigned to the same class, "
                                "the prediction would be boring.")
            estimator = self.estimator_cls(*self.params)
            estimator.fit(x, column)
            self.estimators.append(estimator) 
开发者ID:Qiskit,项目名称:qiskit-aqua,代码行数:26,代码来源:one_against_rest.py

示例8: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def __init__(self, feature_vector_size, label_words):
        self.ann = cv2.ml.ANN_MLP_create()
        # Number of centroids used to build the feature vectors
        input_size = feature_vector_size
        # Number of models to recongnize
        output_size = len(label_words)
        # Applying Heaton rules
        hidden_size = (input_size * (2 / 3)) + output_size
        nn_config = np.array([input_size, hidden_size, output_size], dtype=np.uint8)
        self.label_words = label_words
        self.ann.setLayerSizes(np.array(nn_config))
        # Symmetrical Sigmoid as activation function
        self.ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
        # Map models as tuples of probabilities
        self.le = preprocessing.LabelBinarizer()
        self.le.fit(label_words)  # Label words are ['dress', 'footwear', 'backpack'] 
开发者ID:PacktPublishing,项目名称:OpenCV-3-x-with-Python-By-Example,代码行数:18,代码来源:training.py

示例9: test_conversion_with_sparse_y

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_conversion_with_sparse_y(self):
        """Tests conversion of a model that's fitted with y values in a sparse format."""
        from sklearn.model_selection import train_test_split

        X_train, X_test, y_train, y_test = train_test_split(
            self.iris_X, self.iris_y, test_size=0.2, train_size=0.8
        )

        from sklearn import preprocessing

        lb = preprocessing.LabelBinarizer(sparse_output=True)
        binarized_y = lb.fit_transform(y_train)

        sklearn_model = KNeighborsClassifier(algorithm="brute")
        sklearn_model.fit(X_train, binarized_y)

        self.assertRaises(ValueError, sklearn.convert, sklearn_model) 
开发者ID:apple,项目名称:coremltools,代码行数:19,代码来源:test_k_neighbors_classifier.py

示例10: fit

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def fit(self, X, y):
        """
        :param X_: shape = [n_samples, n_features] 
        :param y: shape = [n_samples] 
        :return: self
        """
        labelbin = LabelBinarizer()
        Y = labelbin.fit_transform(y)
        self.classes = labelbin.classes_
        self.class_count = np.zeros(Y.shape[1], dtype=np.float64)
        self.feature_count = np.zeros((Y.shape[1], X.shape[1]),
                                      dtype=np.float64)

        self.feature_count += Y.T @ X
        self.class_count += Y.sum(axis=0)
        smoothed_fc = self.feature_count + self.alpha
        smoothed_cc = smoothed_fc.sum(axis=1)

        self.feature_log_prob = (np.log(smoothed_fc) -
                                 np.log(smoothed_cc.reshape(-1, 1))) 
开发者ID:WiseDoge,项目名称:plume,代码行数:22,代码来源:naive_bayes.py

示例11: encode_labels

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def encode_labels(self, label_dict, srcids):
        flat_labels = ['O']
        if self.use_brick_flag:
            with open('brick/tags.json', 'r') as fp:
                brick_tags = json.load(fp)
            flat_labels += ['B_' + tag for tag in brick_tags] + \
                           ['I_' + tag for tag in brick_tags]
        flat_labels += reduce(adder, [reduce(adder, label_dict[srcid].values()) for srcid in srcids])
        self.le = LabelBinarizer().fit(flat_labels)
        stack = []
        for srcid in srcids:
            labels = label_dict[srcid]
            sentences = self.sentence_dict[srcid]
            for metadata_type in self.sentence_dict[srcid].keys():
                labels = label_dict[srcid][metadata_type]
                if len(labels) == 0:
                    encoded = np.zeros((self.max_len, encoded.shape[1]))
                else:
                    encoded = self.le.transform(labels)
                    encoded = np.vstack([encoded, np.zeros(
                                         (self.max_len - encoded.shape[0],
                                          encoded.shape[1]))])
                stack.append(encoded)
        return np.stack(stack) 
开发者ID:plastering,项目名称:plastering,代码行数:26,代码来源:char2ir_gpu.py

示例12: get_mnist_data

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def get_mnist_data():
    """Loads the MNIST data set into memory.

    Returns
    -------
    X : array-like, shape=[n_samples, n_features]
        Training data for the MNIST data set.
        
    y : array-like, shape=[n_samples,]
        Labels for the MNIST data set.
    """
    digits = load_digits()
    X, y = digits.data, digits.target
    y = LabelBinarizer().fit_transform(y)

    return X, y 
开发者ID:thuijskens,项目名称:production-tools,代码行数:18,代码来源:train_model.py

示例13: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def __init__(self, n_hidden=20, alpha=0.5, rbf_width=1.0,
                 activation_func='tanh', activation_args=None,
                 user_components=None, regressor=None,
                 binarizer=LabelBinarizer(-1, 1),
                 random_state=None):

        super(ELMClassifier, self).__init__(n_hidden=n_hidden,
                                            alpha=alpha,
                                            random_state=random_state,
                                            activation_func=activation_func,
                                            activation_args=activation_args,
                                            user_components=user_components,
                                            rbf_width=rbf_width,
                                            regressor=regressor)

        self.classes_ = None
        self.binarizer = binarizer 
开发者ID:dlmacedo,项目名称:SVM-CNN,代码行数:19,代码来源:elm.py

示例14: make_xgboost_dataframe_mapper

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware = True):
	"""Construct a DataFrameMapper for feeding complex data into an XGBModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	DataFrameMapper

	"""
	features = list()
	for column, dtype in dtypes.items():
		if _is_categorical(dtype):
			features.append(([column], PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else LabelBinarizer(sparse_output = True)))
		else:
			features.append(([column], None))
	return DataFrameMapper(features) 
开发者ID:jpmml,项目名称:sklearn2pmml,代码行数:25,代码来源:xgboost.py

示例15: _check_X_y

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def _check_X_y(self, X, y):

        # helpful error message for sklearn < 1.17
        is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2

        if is_2d or type_of_target(y) != 'binary':
            raise TypeError("Only binary targets supported. For training "
                            "multiclass or multilabel models, you may use the "
                            "OneVsRest or OneVsAll metaestimators in "
                            "scikit-learn.")

        X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
                         multi_output=False)

        self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
        y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
        return X, y 
开发者ID:scikit-learn-contrib,项目名称:polylearn,代码行数:19,代码来源:base.py


注:本文中的sklearn.preprocessing.LabelBinarizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。