当前位置: 首页>>代码示例>>Python>>正文


Python LabelBinarizer.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.transform方法的具体用法?Python LabelBinarizer.transform怎么用?Python LabelBinarizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.LabelBinarizer的用法示例。


在下文中一共展示了LabelBinarizer.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def train():
    tr, va, te = read_dataset('../mnist.pkl.gz')
    binarizer = LabelBinarizer().fit(range(10))

    x = tf.placeholder(tf.float32, [None, 784])
    y = tf.placeholder(tf.float32, [None, 10])
    keep_prob = tf.placeholder(tf.float32)
    preds = model.inference(x, keep_prob)
    loss, total_loss = model.loss(preds, y)
    acc = model.evaluation(preds, y)
    # learning rate: 0.1
    train_op = model.training(total_loss, 0.1)

    init = tf.initialize_all_variables()
    sess = tf.Session()
    sess.run(init)
    for i in xrange(10000):
        batch_xs, batch_ys = tr.next_batch(50)
        if i % 100 == 0:
            train_acc = acc.eval(feed_dict={
                x:batch_xs, y:binarizer.transform(batch_ys),
                keep_prob: 1.0}, session=sess)
            print "step: {0}, training accuracy {1}".format(i, train_acc)
            validation_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, va, sess)
            print("Validation accuracy : {0}".format(validation_accuracy))
        train_op.run(feed_dict={
            x:batch_xs, y:binarizer.transform(batch_ys), keep_prob: 0.5},
                     session=sess)

    test_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, te, sess)
    print("Test accuracy : ", test_accuracy)
开发者ID:alexandresablayrolles,项目名称:ml-ps7-mnist,代码行数:33,代码来源:train.py

示例2: Encoding

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def Encoding(data, general_matrix=None):
    encoder = LabelBinarizer()
    count = 0
    # encoding
    for i in range(data.shape[1]):
        if type(data[0, i]) == str:
            count += 1
            col = data[:, i]
            unique = np.unique(col if general_matrix is None else general_matrix[:, i])

            try:
                encoder.fit(unique)
            except:
                pass

            new_col = encoder.transform(col)

            # split at i and i + 1
            before, removed, after = np.hsplit(data, [i, i + 1])
            # concatenate
            data = np.concatenate((before, new_col, after), axis=1)
            before, removed, after = np.hsplit(general_matrix, [i, i + 1])
            general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)

    print "count : %d" % count
    # return data
    return data
开发者ID:nhanloukiala,项目名称:AppsOfDataAnalysis,代码行数:29,代码来源:cyber_attack_classification.py

示例3: one_hot_encoding

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def one_hot_encoding(y_train, y_test):
    labelBinarizer = LabelBinarizer()
    labelBinarizer.fit(y_train)

    y_train_one_hot = labelBinarizer.transform(y_train)
    y_test_one_hot = labelBinarizer.transform(y_test)
    return y_train_one_hot, y_test_one_hot
开发者ID:dzungcamlang,项目名称:Traffic-Signs,代码行数:9,代码来源:util.py

示例4: NN_Classifier

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class NN_Classifier(NNBase):

  def __init__(self,layers = [], lr=0.01, epochs=None, noisy=None, verbose=False):
    
    super(NN_Classifier, self).__init__(layers=layers, lr=lr, epochs=epochs, noisy=noisy, verbose=verbose)
    self.type = 'C'
    self.error_func = CrossEntropyError
    self.accuracy_score = AccuracyScore
    self.label_binarizer = LabelBinarizer()

  def predict(self, X):
    predictions = []
    for el in X:
      current_prediction = NNBase._predict(self, row(el))
      predictions.append(current_prediction)
    predictions = np.vstack(predictions)
    current_results = coalesce(predictions)
    return self.label_binarizer.inverse_transform(current_results)

  def predict_proba(self, X):
    predictions = []
    for el in X:
      current_prediction = NNBase._predict(self, row(el))
      predictions.append(current_prediction)
    predictions = np.vstack(predictions)
    return predictions

  def fit(self, X, T):
    T_impl = self.label_binarizer.fit_transform(T)
    if not self.epochs:
      self.epochs = 1

    for num in xrange(self.epochs):
      if self.verbose:
        print "Epoch: %d" % num
      for i in xrange(len(X)):
        NNBase._update(self, row(X[i]), row(T_impl[i]))

  def error(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    Y = self.predict_proba(X)
    return self.error_func.func(Y, T_impl)

  def score(self, X, T):
    Y = self.predict(X)
    return self.accuracy_score.func(Y,T)

  def analytical_gradient(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    return NNBase._analytical_gradient(self, X, T_impl)

  def numerical_gradient(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    return NNBase._numerical_gradient(self, X, T_impl)
开发者ID:dcrescim,项目名称:NN,代码行数:56,代码来源:Neural.py

示例5: partb

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def partb():
    def load(file_name):
        file = np.load(file_name)
        X_train =file['X_train'].T
        y_train =file['y_train']
        X_test =file['X_test'].T
        y_test =file['y_test']
        X_cv =file['X_cv'].T
        y_cv =file['y_cv']

        return X_train,y_train,X_cv,y_cv,X_test,y_test

    train_ = [0,0]
    test_ = [0,0]
    overall = []
    for i in range(14):

        X_train,y_train,X_cv,y_cv,X_test,y_test = load('pofa{}.npz'.format(i))

        from sklearn.preprocessing import LabelBinarizer
        binarizer = LabelBinarizer()
        binarizer.fit(y_train)
        Y_train = binarizer.transform(y_train).T
        Y_cv = binarizer.transform(y_cv).T


#nn.forward(X)
#nn.backprop(X,Y,graient_check=True)

        print(X_train.shape[0], Y_train.shape[0])
        nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid])

        nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=16,regularizer=0.15,max_iter=200,gradient_check=False,cv = (X_cv,Y_cv),graphs=False, lbfgs=False)

        output = nn.forward(X_train)

        y_train_output = binarizer.inverse_transform(output.T)
        y_test_output = binarizer.inverse_transform(nn.forward(X_test).T)
        print("Iteration: ",i)
        print((y_train_output==y_train).mean())
        print((y_test_output ==y_test).mean())

        overall.append((y_test == y_test_output).mean())

        train_[0] += (y_train_output==y_train).sum()
        train_[1] += y_train.shape[0]
        test_[0] += (y_test_output==y_test).sum()
        test_[1] += y_test.shape[0]

    print("Average train accuracy: ", train_[0]/train_[1],"Average test accuracy: ",test_[0]/test_[1])
    print(train_,test_)
    overall = np.array(overall)
    print(overall.mean())
开发者ID:sokolov-alex,项目名称:Neural-Networks,代码行数:55,代码来源:multilayer_network.py

示例6: load_dataset

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
    def load_dataset(self):
        X, y, X_test, y_test = dataset = snippet_reader.toNumpy()

        lb = LabelBinarizer()
        lb.fit(y)

        for y_bin in lb.transform(y).T:
            y = y_bin
            break

        for y_bin in lb.transform(y_test).T:
            y_test = y_bin
            break

        return X, y, X_test, y_test
开发者ID:pyongjoo,项目名称:ende,代码行数:17,代码来源:clf_calib.py

示例7: bio_classification_report

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def bio_classification_report(y_true, y_pred):
    """
    Classification report for a list of BIO-encoded sequences.
    It computes token-level metrics and discards "O" labels.

    Note that it requires scikit-learn 0.15+ (or a version from github master)
    to calculate averages properly!

    Note: This function was copied from
    http://nbviewer.ipython.org/github/tpeng/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb

    Args:
        y_true: True labels, list of strings
        y_pred: Predicted labels, list of strings
    Returns:
        classification report as string
    """
    lbin = LabelBinarizer()
    y_true_combined = lbin.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lbin.transform(list(chain.from_iterable(y_pred)))

    #tagset = set(lbin.classes_) - {NO_NE_LABEL}
    tagset = set(lbin.classes_)
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lbin.classes_)}

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset,
    )
开发者ID:aleju,项目名称:ner-crf,代码行数:34,代码来源:test.py

示例8: test_normalize_option_multilabel_classification

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def test_normalize_option_multilabel_classification():
    # Test in the multilabel case
    n_classes = 4
    n_samples = 100
    _, y_true = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples)
    _, y_pred = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples)

    # Be sure to have at least one empty label
    y_true += ([],)
    y_pred += ([],)
    n_samples += 1

    lb = LabelBinarizer().fit([range(n_classes)])
    y_true_binary_indicator = lb.transform(y_true)
    y_pred_binary_indicator = lb.transform(y_pred)

    for name, metrics in METRICS_WITH_NORMALIZE_OPTION.items():
        # List of list of labels
        measure = metrics(y_true, y_pred, normalize=True)
        assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
        assert_almost_equal(
            metrics(y_true, y_pred, normalize=False) / n_samples, measure, err_msg="Failed with %s" % name
        )

        # Indicator matrix format
        measure = metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=True)
        assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
        assert_almost_equal(
            metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=False) / n_samples,
            measure,
            err_msg="Failed with %s" % name,
        )
开发者ID:Clstone,项目名称:scikit-learn,代码行数:34,代码来源:test_metrics.py

示例9: get_abalone19

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def get_abalone19():
    """Loads abalone dataset, maps gender feature to binary features, adds
    new label to create abalone19 imbalanced binary classification dataset."""
    raw_data = pd.read_csv(ABALONE_FILE, sep=',')
    genders = list(raw_data.ix[:, 'gender'])
    cts_data = raw_data.drop(labels='gender', axis=1)

    # initialize & fit preprocesser
    lbz = LabelBinarizer()
    lbz.fit(genders)

    # encode categorical var
    encoded_genders = pd.DataFrame(lbz.transform(genders))
    encoded_genders.columns = ['gender_' + k for k in lbz.classes_]

    # recombine encoded data & return
    new_data = pd.concat(objs=[encoded_genders, cts_data], axis=1)
    new_data['label'] = raw_data['rings'].map(
        lambda k: 1 if k > 10 else 0)               # binary clf task
    new_data = new_data.drop('rings', axis=1)

    # standardize cts features
    if STANDARDIZE:
        for col in new_data.ix[:, 3:-1]:
            mean = new_data[col].mean()
            std = new_data[col].std()
            new_data[col] = new_data[col].map(lambda k: (k - mean) / float(std))

    pos_recs = new_data['label'].sum()
    print 'total pos class pct = {} %\n'.format(
        round(100 * pos_recs / float(len(new_data)), 3))

    return new_data
开发者ID:Adusei,项目名称:science,代码行数:35,代码来源:grid_cv.py

示例10: our_classification_report

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def our_classification_report(y_true, y_pred):
    """
    Classification report for a list of BIO-encoded sequences.
    It computes token-level metrics and discards "O" labels.
    
    Note that it requires scikit-learn 0.15+ (or a version from github master)
    to calculate averages properly!
    """
    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

    # print "Y_true combined", y_true_combined
    # print "Y_pred combined", y_pred_combined
        
    tagset = set(lb.classes_)
    # print "tagset: ", tagset
    tagset = sorted(tagset)
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
    
    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels = [class_indices[cls] for cls in tagset],
        target_names = tagset
    )
开发者ID:shidanxu,项目名称:corruption,代码行数:28,代码来源:crf_rec.py

示例11: CategoricalToNumerical

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class CategoricalToNumerical(object):

    def __init__(self, dimensionality_reducer=None, verify=True):
        pass
        """Takes in a dimensionality reducer in order to convert categorical features into numerical.
        """
        if dimensionality_reducer is None:
            dimensionality_reducer = RandomizedPCA(1)
        self.dimensionality_reducer = dimensionality_reducer
        self.verify = verify
        self.binarizer = LabelBinarizer()

    def fit(self, X, y=None):
        self._verify(X, self.verify)
        binarized = self.binarizer.fit_transform(X)
        self.dimensionality_reducer.fit(binarized)

    def transform(self, X):
        self._verify(X, False)
        binarized = self.binarizer.transform(X)
        result = self.dimensionality_reducer.transform(binarized).flatten()
        assert X.shape == result.shape
        return result

    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)

    def _verify(self, X, verify):
        if verify:
            assert is_categorical(X)
        else:
            assert isinstance(X, np.ndarray)
            assert len(X.shape) == 1
开发者ID:Diviyan-Kalainathan,项目名称:causal-humans,代码行数:36,代码来源:convert.py

示例12: bio_classification_report

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def bio_classification_report(y_true, y_pred):
    """Evaluates entity extraction accuracy.

    Classification report for a list of BIO-encoded sequences.
    It computes token-level metrics and discards "O" labels.
    Note that it requires scikit-learn 0.15+ (or a version from github master)
    to calculate averages properly!
    Taken from https://github.com/scrapinghub/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb
    """
    from sklearn.preprocessing import LabelBinarizer
    from itertools import chain
    from sklearn.metrics import classification_report

    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

    return classification_report(
            y_true_combined,
            y_pred_combined,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
    )
开发者ID:dhpollack,项目名称:rasa_nlu,代码行数:29,代码来源:crf_entity_extractor.py

示例13: report

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def report(test_y, pred_y):
    lb = LabelBinarizer()
    test_y_combined = lb.fit_transform(list(chain.from_iterable(test_y)))
    pred_y_combined = lb.transform(list(chain.from_iterable(pred_y)))
    tagset = sorted(set(lb.classes_))
    class_indices = {cls: idx for idx, cls in enumerate(tagset)}
    print(classification_report(test_y_combined, pred_y_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset))
开发者ID:theeluwin,项目名称:sscc-1st,代码行数:9,代码来源:sscc.py

示例14: bio_classification_report

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
    def bio_classification_report(y_true, y_pred):
        """
        Classification report for a list of BIO-encoded sequences.
        It computes token-level metrics and discards "O" labels.

        Note that it requires scikit-learn 0.15+ (or a version from
        github master) to calculate averages properly!
        """
        lb = LabelBinarizer()
        y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
        y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = set(lb.classes_) - {'O'}
        tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        labs = [class_indices[cls] for cls in tagset]

        return((precision_recall_fscore_support(y_true_combined,
                                                y_pred_combined,
                                                labels=labs,
                                                average=None,
                                                sample_weight=None)),
               (classification_report(
                   y_true_combined,
                   y_pred_combined,
                   labels=[class_indices[cls] for cls in tagset],
                   target_names=tagset,
               )), labs)
开发者ID:henchc,项目名称:CLFL_2016,代码行数:31,代码来源:CLFL_Brill.py

示例15: BusinessCategoriesFeature

# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class BusinessCategoriesFeature(BaseEstimator):
	"""
	WARNING!!!
	Works only with a modified version of LabelBinarizer.

	A binarization of the reviews' business categories.
	"""

	def __init__(self, data=None):
		self.data = data

	def __create_labels_list(self, review_list):
		labels = []
		for review in review_list:
			business = self.data.get_business_for_review(review)
			labels.append(business['categories'])
		return labels

	def fit(self, X, y):
		self.binarizer = LabelBinarizer()
		labels = self.__create_labels_list(X)
		self.binarizer.fit(labels)
		return self

	def transform(self, X):
		labels = self.__create_labels_list(X)
		binarized_labels = self.binarizer.transform(labels)
		return binarized_labels.astype(float)
开发者ID:MihaiDamian,项目名称:YelpRC,代码行数:30,代码来源:features.py


注:本文中的sklearn.preprocessing.LabelBinarizer.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。