Python preprocessing.label_binarize函数代码示例

本文整理汇总了Python中sklearn.preprocessing.label_binarize函数的典型用法代码示例。如果您正苦于以下问题：Python label_binarize函数的具体用法？Python label_binarize怎么用？Python label_binarize使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了label_binarize函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: PersonWorker

def PersonWorker(person):
    print('starting on person: ', str(person))

    #data = 40 videos x 32 alpha(csp channel)
    (X_train, y_train, X_test, y_test) = DL.loadPersonEpochDimRedu(person=person,
        featureFunc = featureFunc,
    )
    
    #http://stackoverflow.com/questions/26963454/lda-ignoring-n-components => only 1 feature :(
    print(np.shape(X_train))

    svm = LinearSVC()
    svm.fit(X_train, y_train)
    
    y = svm.predict(X_train)
    y = label_binarize(y, classes=[0, 1, 2, 3])
    train_auc = UT.auc(y, y_train)

    y = svm.predict(X_test)
    y = label_binarize(y, classes=[0, 1, 2, 3])
    test_auc = UT.auc(y, y_test)


    print('person: ', person, 
        ' - train auc: ', str(train_auc),
        ' - test auc: ' , str(test_auc)
    )

    return [train_auc, test_auc]

开发者ID:AndreasDL，项目名称:ir-thesis，代码行数:29，代码来源:promisingpaper.py

示例2: calculate_roc

def calculate_roc(truth, predictions):
    lb_truth = label_binarize(truth.iloc[:, -1].astype(int), np.arange(n_classes))
    lb_prediction = label_binarize(predictions.iloc[:, -1].astype(int), np.arange(n_classes))

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(letter_set)):
        fpr[i], tpr[i], _ = roc_curve(lb_truth[:, i], lb_prediction[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(lb_truth.ravel(), lb_prediction.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    return fpr, tpr, roc_auc

开发者ID:chriszhenghaochen，项目名称:Deep-Neural-Network-Study，代码行数:32，代码来源:sk.py

示例3: fit

    def fit(self, X, y):
        self.init_params(X, y)
        self.paths = self.construct_paths()
        num = len(self.paths[0])
        swarm_paths = [sorted(list(set([s[i] for s in self.paths if s[i] is not None]))) for i in xrange(num)]
        W = self.init_network()
        self.W_swarms = [[[s for s in self.swarms if s.path[j] == i] for i in swarm_paths[j]] for j in xrange(num)]

        X_train, X_valid, y_train, y_valid = cv.train_test_split(X, y, test_size=self.validation_size,
                                                                 random_state=self.random_state)

        # binarize true values
        if len(self.classes_) > 2:
            y_train = label_binarize(y_train, self.classes_)
            y_valid = label_binarize(y_valid, self.classes_)
        else:
            y_train = self.mlb.fit_transform(label_binarize(y_train, self.classes_))
            y_valid = self.mlb.fit_transform(label_binarize(y_valid, self.classes_))

        j = 0
        tmp = [1e3 - float(x * 1e3)/self.window for x in xrange(self.window)]
        window = deque(tmp, maxlen=(self.window * 5))
        self.num_evals = 0
        best_score = np.inf

        if self.verbose:
            print "Fitting network {0}-{1}-{2} with {3} paths".format(self.n_in, self.n_hidden, self.n_out, len(self.swarms))

        while True:
            j += 1
            for s in self.swarms:
                for p_index in xrange(self.num_particles):
                    self.num_evals += 1

                    # evaluate each swarm
                    score = s.evaluate(W, X_train, y_train, p_index)

                    # reconstruct gvn
                    Wn = self.reconstruct_gvn(W)

                    # update
                    s.update(self.w, self.c1, self.c2, p_index)

                    # evaluate gvn
                    y_pred = self.forward(Wn, X_valid)
                    score = self.cost(y_valid, y_pred)
                    if score < best_score:
                        W = Wn[:]
                        best_score = score

            window.append(best_score)
            r = linregress(range(self.window), list(window)[-self.window:])
            if self.verbose:
                print j, best_score

            if r[0] >= 0 or best_score < 1e-3:
                self.W = W
                self.num_generations = j
                return self

开发者ID:shehzadqureshi，项目名称:NeuralNetDynamicOSI，代码行数:59，代码来源:NeuralNetBasicOSI.py

示例4: test_sensitivity_specificity_error_multilabels

def test_sensitivity_specificity_error_multilabels():
    y_true = [1, 3, 3, 2]
    y_pred = [1, 1, 3, 2]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))

    with pytest.raises(ValueError):
        sensitivity_score(y_true_bin, y_pred_bin)

开发者ID:chkoar，项目名称:imbalanced-learn，代码行数:8，代码来源:test_classification.py

示例5: init

 def __init__(self, file_path, number_features):
     dataset = self.load_dataset(file_path, number_features)
     xs = dataset[:, 0:number_features + 1]
     ys = dataset[:, number_features + 1]
     self.xs, self.xs_test, ys, ys_test = train_test_split(xs, ys, train_size=0.6)
     self.ys = np.transpose(label_binarize(ys, classes=[0, 1, 2]))
     self.ys_test = np.transpose(label_binarize(ys_test, classes=[0, 1, 2]))
     self.m = self.xs.shape[0]
     self.test_set_size = self.xs_test.shape[0]

开发者ID:emersonloureiro，项目名称:tensorflow-examples，代码行数:9，代码来源:input.py

示例6: getROCScore

def getROCScore(X_train, y_train, X_test, y_test, classifierName, depth=None, Cvalue=1,alphaValue=0.0):



# Binarize the output
    y_train = label_binarize(y_train, classes=[3, 4, 5, 6, 7, 8, 9, 12, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 999])
    n_classes = y_train.shape[1]
    y_test = label_binarize(y_test, classes=[3, 4, 5, 6, 7, 8, 9, 12, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 999])



# Learn to predict each class against the other
    if classifierName=='DecisionTree':
        classifier=OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth))
    elif classifierName=='LogisticRegression':
        classifier = OneVsRestClassifier(linear_model.LogisticRegression(C=Cvalue))
    elif classifierName=='LinearSVC':
        classifier= OneVsRestClassifier(LinearSVC(C=Cvalue))
    elif classifierName=='NaiveBayes':
        classifier= OneVsRestClassifier(MultinomialNB(alpha=alphaValue))
    elif classifierName=='Bagging':
        estimator= tree.DecisionTreeClassifier()
        classifier=OneVsRestClassifier(BaggingClassifier(base_estimator=estimator))

    
    y_score = classifier.fit(X_train, y_train).predict(X_test)
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Compute macro-average ROC curve and ROC area

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    return (roc_auc["micro"],roc_auc["macro"],classifier)

开发者ID:biprade，项目名称:Applied_Machine_Learning，代码行数:55，代码来源:ROC_Curve.py

示例7: xval

def xval(clf, x, y, train_index, test_index):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(x_train, y_train)
    y_pred = clf.predict_proba(x_test)
    if len(clf.classes_) > 2:
        mse = mean_squared_error(label_binarize(y_test, clf.classes_), y_pred)
    else:
        mlb = MultiLabelBinarizer()
        mse = mean_squared_error(mlb.fit_transform(label_binarize(y_test, clf.classes_)), y_pred)
    acc = accuracy_score(y_test, y_pred.argmax(axis=1))
    evals = clf.get_num_evals()
    return mse, acc, evals

开发者ID:shehzadqureshi，项目名称:NeuralNetDynamicOSI，代码行数:13，代码来源:test_dynamic_all_cv.py

示例8: gensim_classifier

def gensim_classifier():
  logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
  label_list = get_labels()
  tweet_list = get_labelled_tweets()

  # split all sentences to list of words
  sentences = []
  for tweet in tweet_list:
    temp_doc = tweet.split()
    sentences.append(temp_doc)

  # parameters for model
  num_features = 100
  min_word_count = 1
  num_workers = 4
  context = 2
  downsampling = 1e-3

  # Initialize and train the model
  w2v_model = Word2Vec(sentences, workers=num_workers, \
              size=num_features, min_count = min_word_count, \
              window = context, sample = downsampling, seed=1)

  index_value, train_set, test_set = train_test_split(0.80, sentences)
  train_vector = getAvgFeatureVecs(train_set, w2v_model, num_features)
  test_vector = getAvgFeatureVecs(test_set, w2v_model, num_features)
  train_vector = Imputer().fit_transform(train_vector)
  test_vector = Imputer().fit_transform(test_vector)

  # train model and predict
  model = LinearSVC()
  classifier_fitted = OneVsRestClassifier(model).fit(train_vector, label_list[:index_value])
  result = classifier_fitted.predict(test_vector)

  # output result to csv
  create_directory('data')
  result.tofile("data/w2v_linsvc.csv", sep=',')

  # store the model to mmap-able files
  create_directory('model')
  joblib.dump(model, 'model/%s.pkl' % 'w2v_linsvc')

  # evaluation
  label_score = classifier_fitted.decision_function(test_vector)
  binarise_result = label_binarize(result, classes=class_list)
  binarise_labels = label_binarize(label_list, classes=class_list)

  evaluate(binarise_result, binarise_labels[index_value:], label_score, 'w2v_linsvc')

开发者ID:Andyccs，项目名称:sport-news-retrieval，代码行数:48，代码来源:gensim_classifier.py

示例9: PR_multi_class

def PR_multi_class(data_train, data_test, data_test_vectors):
    # Binarize the output
    y_train_label = label_binarize(data_train.target, classes=[0, 1, 2])
    n_classes = y_train_label.shape[1]
    
    random_state = np.random.RandomState(0)
    
    # shuffle and split training and test sets
    X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5,
                                                        random_state=random_state)
    
    # Learn to predict each class against the other
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
    classifier.fit(X_train, y_train)
    y_pred_score = classifier.decision_function(data_test_vectors)
    
    y_test_label = label_binarize(data_test.target, classes=[0, 1, 2])
    
    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i])
        average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i])
    
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel())
    average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro")
    
    # Plot Precision-Recall curve for each class
    plt.clf()
#    plt.plot(recall["micro"], precision["micro"],
#             label='micro-average PR curve (area = {0:0.2f})'
#                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='PR curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall curve of multi-class')
    plt.legend(loc="lower right")
    plt.show()
    return 0

开发者ID:RaoUmer，项目名称:docs_classification，代码行数:48，代码来源:ml_docs_classification_2.py

示例10: multiclass_AUC

def multiclass_AUC(clf, X, Y):
    # Binarize the output
    X, Y = np.array(X), np.array(Y)
    Y = label_binarize(Y, classes=list(set(Y)))
    n_classes = Y.shape[1]

    # shuffle and split training and test sets
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5,
                                                        random_state=0)
    # Learn to predict each class against the other
    classifier = OneVsRestClassifier(clf)
    Y_score = classifier.fit(X_train, Y_train).predict(X_test)

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], Y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(Y_test.ravel(), Y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    print "AUC for multiclass {}: {}".format(clf.__class__.__name__, roc_auc["micro"])

开发者ID:darylchang，项目名称:ParkinsonsGait，代码行数:25，代码来源:classify.py

示例11: transform

 def transform(self, X, y=None):
     f = np.vectorize(self._replace_label)
     X_t = f(X).reshape(len(X), 1)
     if self.binarize:
         return label_binarize(X_t, classes=self.labels)
     else:
         return X_t

开发者ID:Sandy4321，项目名称:featkit，代码行数:7，代码来源:categorical.py

示例12: trainModel

def trainModel(data):
    model = Sequential()
    model.add(Dense(400, input_dim=(data.shape[1] - 1), init="uniform"))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(500, init="uniform"))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(39, init="uniform"))
    model.add(Activation("softmax"))

    cb = EarlyStopping(monitor="val_loss", patience=3, verbose=0, mode="auto")

    output = label_binarize(data[0:, 0], range(0, 39))
    print (output.shape)
    # optim = Adam(lr=0.1, beta_l=0.2, beta_2=0.7, epsilon=1e-6)
    # model.compile(loss='categorical_crossentropy',optimizer=optim)
    # model.fit(data[0:,1:].astype(np.float32),output,nb_epoch=30,batch_size=16,show_accuracy=True,validation_split=0.5,callbacks=[cb])
    # optim = Adam(lr=0.01, beta_l=0.5, beta_2=0.8, epsilon=1e-07)
    # model.compile(loss='categorical_crossentropy',optimizer=optim)
    # model.fit(data[0:,1:].astype(np.float32),output,nb_epoch=30,batch_size=16,show_accuracy=True,validation_split=0.3,callbacks=[cb])
    optim = Adam(lr=0.001, beta_l=0.9, beta_2=0.999, epsilon=1e-07)
    model.compile(loss="categorical_crossentropy", optimizer=optim)
    model.fit(
        data[0:, 1:].astype(np.float64),
        output,
        nb_epoch=30,
        batch_size=16,
        show_accuracy=True,
        validation_split=0.1,
        callbacks=[cb],
    )
    return model

开发者ID:LucMioulet，项目名称:MachineLearning，代码行数:33，代码来源:ProtoML-keras.py

示例13: compute_rocauc

    def compute_rocauc(self):
        """

        :return:
        """
        # Binarize the output
        y_test = label_binarize(self.y_test, classes=list(range(self.n_classes)))

        # Compute ROC curve and ROC area for each class
        y_score = self.clf.predict_proba(self.X_test)
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(self.n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        self.report["roc_auc"] = dict(
            fpr={str(k): v.tolist() for k, v in fpr.items()},
            tpr={str(k): v.tolist() for k, v in tpr.items()},
            roc_auc={str(k): v.tolist() for k, v in roc_auc.items()}
        )

开发者ID:jxnl，项目名称:nyu-twipsy，代码行数:26，代码来源:reporting.py

示例14: evaluateOneEpoch

def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
    test_loss = []
    test_acc = []
    test_predict = []
    for i in range(len(inputCoor)):
        xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
        graphTest = graphTest.tocsr()
        labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
        test_batch_size = para.testBatchSize
        for testBatchID in range(len(labelTest) / test_batch_size):
            start = testBatchID * test_batch_size
            end = start + test_batch_size
            batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
            batchWeight = uniform_weight(batchLabel)
            batchGraph = batchGraph.todense()

            feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
                         trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
                         trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}

            predict, loss_test, acc_test = sess.run(
                [trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
            test_loss.append(loss_test)
            test_acc.append(acc_test)
            test_predict.append(predict)

    test_average_loss = np.mean(test_loss)
    test_average_acc = np.mean(test_acc)

    return test_average_loss, test_average_acc, test_predict

开发者ID:joosm，项目名称:Graph-CNN-in-3D-Point-Cloud-Classification，代码行数:30，代码来源:model.py

示例15: set_shared_variables

    def set_shared_variables(self, dataset, index,enable_time):
        c = np.zeros((self.batch_size, self.max_seqlen), dtype=np.int32)
        q = np.zeros((self.batch_size, ), dtype=np.int32)
        y = np.zeros((self.batch_size, self.num_classes), dtype=np.int32)
        c_pe = np.zeros((self.batch_size, self.max_seqlen, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
        q_pe = np.zeros((self.batch_size, 1, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
        # c_pe = np.ones((self.batch_size, self.max_seqlen, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
        # q_pe = np.ones((self.batch_size, 1, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)

        indices = range(index*self.batch_size, (index+1)*self.batch_size)
        for i, row in enumerate(dataset['C'][indices]):
            row = row[:self.max_seqlen]
            c[i, :len(row)] = row

        q[:len(indices)] = dataset['Q'][indices] #问题的行数组成的列表
        '''底下这个整个循环是得到一个batch对应的那个调整的矩阵'''
        for key, mask in [('C', c_pe), ('Q', q_pe)]:
            for i, row in enumerate(dataset[key][indices]):
                sentences = self.S[row].reshape((-1, self.max_sentlen)) #这句相当于把每一句，从标号变成具体的词，并补0
                for ii, word_idxs in enumerate(sentences):
                    J = np.count_nonzero(word_idxs)
                    for j in np.arange(J):
                        mask[i, ii, j, :] = (1 - (j+1)/J) - ((np.arange(self.embedding_size)+1)/self.embedding_size)*(1 - 2*(j+1)/J)

        # c_pe=np.not_equal(c_pe,0)
        # q_pe=np.not_equal(q_pe,0)

        # y[:len(indices), 1:self.num_classes] = self.lb.transform(dataset['Y'][indices])#竟然是把y变成了而之花的one=hot向量都，每个是字典大小这么长
        y[:len(indices), 1:self.num_classes] = label_binarize(dataset['Y'][indices],self.vocab)#竟然是把y变成了而之花的one=hot向量都，每个是字典大小这么长
        # y[:len(indices), 1:self.embedding_size] = self.mem_layers[0].A[[self.word_to_idx(i) for i in list(dataset['Y'][indices])]]#竟然是把y变成了而之花的one=hot向量都，每个是字典大小这么长
        self.c_shared.set_value(c)
        self.q_shared.set_value(q)
        self.a_shared.set_value(y)
        self.c_pe_shared.set_value(c_pe)
        self.q_pe_shared.set_value(q_pe)

开发者ID:shincling，项目名称:MemNN_and_Varieties，代码行数:35，代码来源:main.py

注：本文中的sklearn.preprocessing.label_binarize函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。