当前位置: 首页>>代码示例>>Python>>正文


Python LabelBinarizer.fit方法代码示例

本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer.fit方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.fit方法的具体用法?Python LabelBinarizer.fit怎么用?Python LabelBinarizer.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.LabelBinarizer的用法示例。


在下文中一共展示了LabelBinarizer.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_label_binarizer_multilabel

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat,
                       lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()

    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:31,代码来源:test_preprocessing.py

示例2: BaseSGD

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BaseSGD(object):
    def _get_loss(self):
        losses = {
            "modified_huber": ModifiedHuber(),
            "hinge": Hinge(1.0),
            "perceptron": Hinge(0.0),
            "log": Log(),
            "sparse_log": SparseLog(),
            "squared": SquaredLoss(),
            "huber": Huber(self.epsilon),
            "epsilon_insensitive": EpsilonInsensitive(self.epsilon),
        }
        return losses[self.loss]

    def _get_learning_rate(self):
        learning_rates = {"constant": 1, "pegasos": 2, "invscaling": 3}
        return learning_rates[self.learning_rate]

    def _set_label_transformers(self, y):
        if self.multiclass == "natural":
            self.label_encoder_ = LabelEncoder()
            y = self.label_encoder_.fit_transform(y).astype(np.float64)

        self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1)
        self.label_binarizer_.fit(y)
        self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
        n_classes = len(self.label_binarizer_.classes_)
        n_vectors = 1 if n_classes <= 2 else n_classes
        return n_classes, n_vectors
开发者ID:Raz0r,项目名称:lightning,代码行数:31,代码来源:sgd.py

示例3: get_abalone19

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def get_abalone19():
    """Loads abalone dataset, maps gender feature to binary features, adds
    new label to create abalone19 imbalanced binary classification dataset."""
    raw_data = pd.read_csv(ABALONE_FILE, sep=',')
    genders = list(raw_data.ix[:, 'gender'])
    cts_data = raw_data.drop(labels='gender', axis=1)

    # initialize & fit preprocesser
    lbz = LabelBinarizer()
    lbz.fit(genders)

    # encode categorical var
    encoded_genders = pd.DataFrame(lbz.transform(genders))
    encoded_genders.columns = ['gender_' + k for k in lbz.classes_]

    # recombine encoded data & return
    new_data = pd.concat(objs=[encoded_genders, cts_data], axis=1)
    new_data['label'] = raw_data['rings'].map(
        lambda k: 1 if k > 10 else 0)               # binary clf task
    new_data = new_data.drop('rings', axis=1)

    # standardize cts features
    if STANDARDIZE:
        for col in new_data.ix[:, 3:-1]:
            mean = new_data[col].mean()
            std = new_data[col].std()
            new_data[col] = new_data[col].map(lambda k: (k - mean) / float(std))

    pos_recs = new_data['label'].sum()
    print 'total pos class pct = {} %\n'.format(
        round(100 * pos_recs / float(len(new_data)), 3))

    return new_data
开发者ID:Adusei,项目名称:science,代码行数:35,代码来源:grid_cv.py

示例4: fit

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        samples, self.n_features = X.shape

        # because our space of targets are discrete
        lb = LabelBinarizer()
        lb.fit(y)
        self.classes = lb.classes_
        self.n_class = self.classes.size

        self.class_prior = np.zeros(self.n_class, dtype=np.float64)
        self.feature_proba = []

        for i, y_i in enumerate(self.classes):
            # get Xs only for y_i class
            X_yi = X[y == y_i]
            class_count = X_yi[:, 0].size
            self.class_prior[i] = np.float64(class_count) / samples

            count_all_features = 0
            all_features = np.zeros(self.n_features)
            for sample_features in X_yi:
                # accumulate feature according our algorithm
                all_features, count_all_features = self._add_features_dens(
                    sample_features, all_features, count_all_features)

            # calculate probabilites according our algorithm
            self.feature_proba.append(
                self._compute_proba(all_features, count_all_features))

        return self
开发者ID:archelan,项目名称:ts,代码行数:34,代码来源:naive_bayes.py

示例5: BusinessCategoriesFeature

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BusinessCategoriesFeature(BaseEstimator):
	"""
	WARNING!!!
	Works only with a modified version of LabelBinarizer.

	A binarization of the reviews' business categories.
	"""

	def __init__(self, data=None):
		self.data = data

	def __create_labels_list(self, review_list):
		labels = []
		for review in review_list:
			business = self.data.get_business_for_review(review)
			labels.append(business['categories'])
		return labels

	def fit(self, X, y):
		self.binarizer = LabelBinarizer()
		labels = self.__create_labels_list(X)
		self.binarizer.fit(labels)
		return self

	def transform(self, X):
		labels = self.__create_labels_list(X)
		binarized_labels = self.binarizer.transform(labels)
		return binarized_labels.astype(float)
开发者ID:MihaiDamian,项目名称:YelpRC,代码行数:30,代码来源:features.py

示例6: train_logreg

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def train_logreg(X, y, test_X, test_y, load_vec=True):
	""" 	
	Trains logistic regression on the feature set.
	"""
	full_y = y + test_y
	
	lb = LabelBinarizer()
	lb.fit(full_y)
	# Convert into 1-D array
	print len(X), len(test_X)
	model = LogisticRegression()
	big_X = X + test_X

	features = featurize(big_X)
	X, test_X = features[:4500], features[4500:]
	print X.shape, X

	model.fit(X, y)

	y_pred = model.predict(X)
	print set(y_pred)
	print metrics.classification_report(y, y_pred, digits = 3)
	y_pred = model.predict(test_X)
	print set(y_pred)
	print metrics.classification_report(test_y, y_pred, digits = 3)
开发者ID:BinbinBian,项目名称:NNLI,代码行数:27,代码来源:featurizer.py

示例7: display_image_predictions

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def display_image_predictions(features, labels, predictions):
    n_classes = 10
    label_names = _load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
        correct_name = label_names[label_id]

        axies[image_i][0].imshow(feature*255)
        axies[image_i][0].set_title(correct_name)
        axies[image_i][0].set_axis_off()

        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
开发者ID:lpalum,项目名称:machine-learning,代码行数:30,代码来源:helper.py

示例8: one_hot_encoding

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def one_hot_encoding(y_train, y_test):
    labelBinarizer = LabelBinarizer()
    labelBinarizer.fit(y_train)

    y_train_one_hot = labelBinarizer.transform(y_train)
    y_test_one_hot = labelBinarizer.transform(y_test)
    return y_train_one_hot, y_test_one_hot
开发者ID:dzungcamlang,项目名称:Traffic-Signs,代码行数:9,代码来源:util.py

示例9: Encoding

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def Encoding(data, general_matrix=None):
    encoder = LabelBinarizer()
    count = 0
    # encoding
    for i in range(data.shape[1]):
        if type(data[0, i]) == str:
            count += 1
            col = data[:, i]
            unique = np.unique(col if general_matrix is None else general_matrix[:, i])

            try:
                encoder.fit(unique)
            except:
                pass

            new_col = encoder.transform(col)

            # split at i and i + 1
            before, removed, after = np.hsplit(data, [i, i + 1])
            # concatenate
            data = np.concatenate((before, new_col, after), axis=1)
            before, removed, after = np.hsplit(general_matrix, [i, i + 1])
            general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)

    print "count : %d" % count
    # return data
    return data
开发者ID:nhanloukiala,项目名称:AppsOfDataAnalysis,代码行数:29,代码来源:cyber_attack_classification.py

示例10: logloss

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def logloss(act, pred):
    epsilon = 10 ** -15
    pred = np.maximum(np.minimum(pred, 1 - epsilon), epsilon)
    lb = LabelBinarizer()
    lb.fit(act)
    act_binary = lb.transform(act)
    logloss = - np.sum(np.multiply(act_binary, np.log(pred))) / pred.shape[0]
    return logloss
开发者ID:Lawrence-Liu,项目名称:otto,代码行数:10,代码来源:multilayer_gb.py

示例11: __init__

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
    def __init__(
        self,
        train_file,
        test_file,
        batch_size=32,
        embedding_size=20,
        max_norm=40,
        lr=0.01,
        num_hops=3,
        adj_weight_tying=True,
        linear_start=True,
        **kwargs
    ):
        train_lines, test_lines = self.get_lines(train_file), self.get_lines(test_file)
        lines = np.concatenate([train_lines, test_lines], axis=0)
        vocab, word_to_idx, idx_to_word, max_seqlen, max_sentlen = self.get_vocab(lines)

        self.data = {"train": {}, "test": {}}
        S_train, self.data["train"]["C"], self.data["train"]["Q"], self.data["train"]["Y"] = self.process_dataset(
            train_lines, word_to_idx, max_sentlen, offset=0
        )
        S_test, self.data["test"]["C"], self.data["test"]["Q"], self.data["test"]["Y"] = self.process_dataset(
            test_lines, word_to_idx, max_sentlen, offset=len(S_train)
        )
        S = np.concatenate([np.zeros((1, max_sentlen), dtype=np.int32), S_train, S_test], axis=0)
        for i in range(10):
            for k in ["C", "Q", "Y"]:
                print k, self.data["test"][k][i]
        print "batch_size:", batch_size, "max_seqlen:", max_seqlen, "max_sentlen:", max_sentlen
        print "sentences:", S.shape
        print "vocab:", len(vocab), vocab
        for d in ["train", "test"]:
            print d,
            for k in ["C", "Q", "Y"]:
                print k, self.data[d][k].shape,
            print ""

        lb = LabelBinarizer()
        lb.fit(list(vocab))
        vocab = lb.classes_.tolist()

        self.batch_size = batch_size
        self.max_seqlen = max_seqlen
        self.max_sentlen = max_sentlen
        self.embedding_size = embedding_size
        self.num_classes = len(vocab) + 1
        self.vocab = vocab
        self.adj_weight_tying = adj_weight_tying
        self.num_hops = num_hops
        self.lb = lb
        self.init_lr = lr
        self.lr = self.init_lr
        self.max_norm = max_norm
        self.S = S
        self.idx_to_word = idx_to_word
        self.nonlinearity = None if linear_start else lasagne.nonlinearities.softmax

        self.build_network(self.nonlinearity)
开发者ID:kastnerkyle,项目名称:MemN2N,代码行数:60,代码来源:main.py

示例12: fit

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
    def fit(self, Xt, yt, Xh, yh, callback=None):
        lbin = LabelBinarizer()
        lbin.fit(yt)
        Yt_multi = lbin.transform(yt)
        Yh_multi = lbin.transform(yh)
        sample_weight_train = np.ones(Xt.shape[0])
        sample_weight_test = np.ones(Xh.shape[0])


        if Yt_multi.shape[1] == 1:
            Yt_multi = np.hstack([1 - Yt_multi, Yt_multi])
            Yh_multi = np.hstack([1 - Yh_multi, Yh_multi])
            print('warning: only two classes detected')

        n_classes = Yt_multi.shape[1]
        n_features = Xt.shape[1]

        if self.alpha0 is None:
            self.alpha0 = np.zeros(n_classes * n_features)  # if not np.all(np.unique(yt) == np.array([-1, 1])):
        #     raise ValueError
        x0 = np.zeros(n_features * n_classes)

        # assert x0.size == self.alpha0.size

        def h_func_grad(x, alpha):
            # x = x.reshape((-1,Yt_multi.shape[1]))
            return _multinomial_loss_grad(
                x, Xt, Yt_multi, np.exp(alpha), sample_weight_train)[:2]

        def h_hessian(x, alpha):
            # x = x.reshape((-1,Yt_multi.shape[1]))
            return _multinomial_grad_hess(
                x, Xt, Yt_multi, np.exp(alpha), sample_weight_train)[1]

        def g_func_grad(x, alpha):
            # x = x.reshape((-1,Yt_multi.shape[1]))
            return _multinomial_loss_grad(
                x, Xh, Yh_multi, np.zeros(alpha.size),
                sample_weight_test)[:2]

        def h_crossed(x, alpha):
            # return x.reshape((n_classes, -1)) * alpha
            # x = x.reshape((-1,Yt_multi.shape[1]))
            tmp = np.exp(alpha) * x
            return sparse.dia_matrix(
                (tmp, 0),
                shape=(n_features * n_classes, n_features * n_classes))

        opt = hoag_lbfgs(
            h_func_grad, h_hessian, h_crossed, g_func_grad, x0,
            callback=callback,
            tolerance_decrease=self.tolerance_decrease,
            lambda0=self.alpha0, maxiter=self.max_iter,
            verbose=self.verbose)

        self.coef_ = opt[0]
        self.alpha_ = opt[1]
        return self
开发者ID:dongzhixiang,项目名称:hoag,代码行数:60,代码来源:multilogistic.py

示例13: load_dataset2

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
    def load_dataset2(self):
        X, y, X_test, y_test = dataset = snippet_reader.toNumpy()
        X, y = shuffle(X, y)

        lb = LabelBinarizer()
        lb.fit(y)

        for y_bin in lb.transform(y).T:
            return X, y_bin
开发者ID:pyongjoo,项目名称:ende,代码行数:11,代码来源:clf_calib.py

示例14: BaseClassifier

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BaseClassifier(BaseEstimator):

    def predict_proba(self, X):
        if len(self.classes_) != 2:
            raise NotImplementedError("predict_(log_)proba only supported"
                                      " for binary classification")

        if self.loss == "log":
            df = self.decision_function(X).ravel()
            prob = 1.0 / (1.0 + np.exp(-df))
        elif self.loss == "modified_huber":
            df = self.decision_function(X).ravel()
            prob = np.minimum(1, np.maximum(-1, df))
            prob += 1
            prob /= 2
        else:
            raise NotImplementedError("predict_(log_)proba only supported when"
                                      " loss='log' or loss='modified_huber' "
                                      "(%s given)" % self.loss)

        out = np.zeros((X.shape[0], 2), dtype=np.float64)
        out[:, 1] = prob
        out[:, 0] = 1 - prob

        return out

    def _set_label_transformers(self, y, reencode=False, neg_label=-1):
        if reencode:
            self.label_encoder_ = LabelEncoder()
            y = self.label_encoder_.fit_transform(y).astype(np.int32)
        else:
            y = y.astype(np.int32)

        self.label_binarizer_ = LabelBinarizer(neg_label=neg_label,
                                               pos_label=1)
        self.label_binarizer_.fit(y)
        self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
        n_classes = len(self.label_binarizer_.classes_)
        n_vectors = 1 if n_classes <= 2 else n_classes

        return y, n_classes, n_vectors

    def decision_function(self, X):
        pred = safe_sparse_dot(X, self.coef_.T)
        if hasattr(self, "intercept_"):
            pred += self.intercept_
        return pred

    def predict(self, X):
        pred = self.decision_function(X)
        out = self.label_binarizer_.inverse_transform(pred)

        if hasattr(self, "label_encoder_"):
            out = self.label_encoder_.inverse_transform(out)

        return out
开发者ID:aurora1625,项目名称:lightning,代码行数:58,代码来源:base.py

示例15: fit

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise RuntimeError("Only works with DataFrames.  Got {}".format(X.__class__))

        self.binarizers_ = []
        for col in X.columns:
            binarizer = LabelBinarizer(self.neg_label, self.pos_label)
            binarizer.fit(X[col].values)
            self.binarizers_.append((col, binarizer))
        return self
开发者ID:hxu,项目名称:kaggle-allstate,代码行数:12,代码来源:classes.py


注:本文中的sklearn.preprocessing.LabelBinarizer.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。