本文整理汇总了Python中sklearn.preprocessing.label_binarize函数的典型用法代码示例。如果您正苦于以下问题:Python label_binarize函数的具体用法?Python label_binarize怎么用?Python label_binarize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了label_binarize函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: PersonWorker
def PersonWorker(person):
print('starting on person: ', str(person))
#data = 40 videos x 32 alpha(csp channel)
(X_train, y_train, X_test, y_test) = DL.loadPersonEpochDimRedu(person=person,
featureFunc = featureFunc,
)
#http://stackoverflow.com/questions/26963454/lda-ignoring-n-components => only 1 feature :(
print(np.shape(X_train))
svm = LinearSVC()
svm.fit(X_train, y_train)
y = svm.predict(X_train)
y = label_binarize(y, classes=[0, 1, 2, 3])
train_auc = UT.auc(y, y_train)
y = svm.predict(X_test)
y = label_binarize(y, classes=[0, 1, 2, 3])
test_auc = UT.auc(y, y_test)
print('person: ', person,
' - train auc: ', str(train_auc),
' - test auc: ' , str(test_auc)
)
return [train_auc, test_auc]
示例2: calculate_roc
def calculate_roc(truth, predictions):
lb_truth = label_binarize(truth.iloc[:, -1].astype(int), np.arange(n_classes))
lb_prediction = label_binarize(predictions.iloc[:, -1].astype(int), np.arange(n_classes))
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(letter_set)):
fpr[i], tpr[i], _ = roc_curve(lb_truth[:, i], lb_prediction[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(lb_truth.ravel(), lb_prediction.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
return fpr, tpr, roc_auc
示例3: fit
def fit(self, X, y):
self.init_params(X, y)
self.paths = self.construct_paths()
num = len(self.paths[0])
swarm_paths = [sorted(list(set([s[i] for s in self.paths if s[i] is not None]))) for i in xrange(num)]
W = self.init_network()
self.W_swarms = [[[s for s in self.swarms if s.path[j] == i] for i in swarm_paths[j]] for j in xrange(num)]
X_train, X_valid, y_train, y_valid = cv.train_test_split(X, y, test_size=self.validation_size,
random_state=self.random_state)
# binarize true values
if len(self.classes_) > 2:
y_train = label_binarize(y_train, self.classes_)
y_valid = label_binarize(y_valid, self.classes_)
else:
y_train = self.mlb.fit_transform(label_binarize(y_train, self.classes_))
y_valid = self.mlb.fit_transform(label_binarize(y_valid, self.classes_))
j = 0
tmp = [1e3 - float(x * 1e3)/self.window for x in xrange(self.window)]
window = deque(tmp, maxlen=(self.window * 5))
self.num_evals = 0
best_score = np.inf
if self.verbose:
print "Fitting network {0}-{1}-{2} with {3} paths".format(self.n_in, self.n_hidden, self.n_out, len(self.swarms))
while True:
j += 1
for s in self.swarms:
for p_index in xrange(self.num_particles):
self.num_evals += 1
# evaluate each swarm
score = s.evaluate(W, X_train, y_train, p_index)
# reconstruct gvn
Wn = self.reconstruct_gvn(W)
# update
s.update(self.w, self.c1, self.c2, p_index)
# evaluate gvn
y_pred = self.forward(Wn, X_valid)
score = self.cost(y_valid, y_pred)
if score < best_score:
W = Wn[:]
best_score = score
window.append(best_score)
r = linregress(range(self.window), list(window)[-self.window:])
if self.verbose:
print j, best_score
if r[0] >= 0 or best_score < 1e-3:
self.W = W
self.num_generations = j
return self
示例4: test_sensitivity_specificity_error_multilabels
def test_sensitivity_specificity_error_multilabels():
y_true = [1, 3, 3, 2]
y_pred = [1, 1, 3, 2]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
with pytest.raises(ValueError):
sensitivity_score(y_true_bin, y_pred_bin)
示例5: __init__
def __init__(self, file_path, number_features):
dataset = self.load_dataset(file_path, number_features)
xs = dataset[:, 0:number_features + 1]
ys = dataset[:, number_features + 1]
self.xs, self.xs_test, ys, ys_test = train_test_split(xs, ys, train_size=0.6)
self.ys = np.transpose(label_binarize(ys, classes=[0, 1, 2]))
self.ys_test = np.transpose(label_binarize(ys_test, classes=[0, 1, 2]))
self.m = self.xs.shape[0]
self.test_set_size = self.xs_test.shape[0]
示例6: getROCScore
def getROCScore(X_train, y_train, X_test, y_test, classifierName, depth=None, Cvalue=1,alphaValue=0.0):
# Binarize the output
y_train = label_binarize(y_train, classes=[3, 4, 5, 6, 7, 8, 9, 12, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 999])
n_classes = y_train.shape[1]
y_test = label_binarize(y_test, classes=[3, 4, 5, 6, 7, 8, 9, 12, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 999])
# Learn to predict each class against the other
if classifierName=='DecisionTree':
classifier=OneVsRestClassifier(tree.DecisionTreeClassifier(max_depth=depth))
elif classifierName=='LogisticRegression':
classifier = OneVsRestClassifier(linear_model.LogisticRegression(C=Cvalue))
elif classifierName=='LinearSVC':
classifier= OneVsRestClassifier(LinearSVC(C=Cvalue))
elif classifierName=='NaiveBayes':
classifier= OneVsRestClassifier(MultinomialNB(alpha=alphaValue))
elif classifierName=='Bagging':
estimator= tree.DecisionTreeClassifier()
classifier=OneVsRestClassifier(BaggingClassifier(base_estimator=estimator))
y_score = classifier.fit(X_train, y_train).predict(X_test)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
# Compute macro-average ROC curve and ROC area
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
return (roc_auc["micro"],roc_auc["macro"],classifier)
示例7: xval
def xval(clf, x, y, train_index, test_index):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(x_train, y_train)
y_pred = clf.predict_proba(x_test)
if len(clf.classes_) > 2:
mse = mean_squared_error(label_binarize(y_test, clf.classes_), y_pred)
else:
mlb = MultiLabelBinarizer()
mse = mean_squared_error(mlb.fit_transform(label_binarize(y_test, clf.classes_)), y_pred)
acc = accuracy_score(y_test, y_pred.argmax(axis=1))
evals = clf.get_num_evals()
return mse, acc, evals
示例8: gensim_classifier
def gensim_classifier():
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
label_list = get_labels()
tweet_list = get_labelled_tweets()
# split all sentences to list of words
sentences = []
for tweet in tweet_list:
temp_doc = tweet.split()
sentences.append(temp_doc)
# parameters for model
num_features = 100
min_word_count = 1
num_workers = 4
context = 2
downsampling = 1e-3
# Initialize and train the model
w2v_model = Word2Vec(sentences, workers=num_workers, \
size=num_features, min_count = min_word_count, \
window = context, sample = downsampling, seed=1)
index_value, train_set, test_set = train_test_split(0.80, sentences)
train_vector = getAvgFeatureVecs(train_set, w2v_model, num_features)
test_vector = getAvgFeatureVecs(test_set, w2v_model, num_features)
train_vector = Imputer().fit_transform(train_vector)
test_vector = Imputer().fit_transform(test_vector)
# train model and predict
model = LinearSVC()
classifier_fitted = OneVsRestClassifier(model).fit(train_vector, label_list[:index_value])
result = classifier_fitted.predict(test_vector)
# output result to csv
create_directory('data')
result.tofile("data/w2v_linsvc.csv", sep=',')
# store the model to mmap-able files
create_directory('model')
joblib.dump(model, 'model/%s.pkl' % 'w2v_linsvc')
# evaluation
label_score = classifier_fitted.decision_function(test_vector)
binarise_result = label_binarize(result, classes=class_list)
binarise_labels = label_binarize(label_list, classes=class_list)
evaluate(binarise_result, binarise_labels[index_value:], label_score, 'w2v_linsvc')
示例9: PR_multi_class
def PR_multi_class(data_train, data_test, data_test_vectors):
# Binarize the output
y_train_label = label_binarize(data_train.target, classes=[0, 1, 2])
n_classes = y_train_label.shape[1]
random_state = np.random.RandomState(0)
# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5,
random_state=random_state)
# Learn to predict each class against the other
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
classifier.fit(X_train, y_train)
y_pred_score = classifier.decision_function(data_test_vectors)
y_test_label = label_binarize(data_test.target, classes=[0, 1, 2])
# Compute Precision-Recall and plot curve
precision = dict()
recall = dict()
average_precision = dict()
for i in range(n_classes):
precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i])
average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i])
# Compute micro-average ROC curve and ROC area
precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel())
average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro")
# Plot Precision-Recall curve for each class
plt.clf()
# plt.plot(recall["micro"], precision["micro"],
# label='micro-average PR curve (area = {0:0.2f})'
# ''.format(average_precision["micro"]))
for i in range(n_classes):
plt.plot(recall[i], precision[i],
label='PR curve of class {0} (area = {1:0.2f})'
''.format(i, average_precision[i]))
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall curve of multi-class')
plt.legend(loc="lower right")
plt.show()
return 0
示例10: multiclass_AUC
def multiclass_AUC(clf, X, Y):
# Binarize the output
X, Y = np.array(X), np.array(Y)
Y = label_binarize(Y, classes=list(set(Y)))
n_classes = Y.shape[1]
# shuffle and split training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5,
random_state=0)
# Learn to predict each class against the other
classifier = OneVsRestClassifier(clf)
Y_score = classifier.fit(X_train, Y_train).predict(X_test)
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], Y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(Y_test.ravel(), Y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
print "AUC for multiclass {}: {}".format(clf.__class__.__name__, roc_auc["micro"])
示例11: transform
def transform(self, X, y=None):
f = np.vectorize(self._replace_label)
X_t = f(X).reshape(len(X), 1)
if self.binarize:
return label_binarize(X_t, classes=self.labels)
else:
return X_t
示例12: trainModel
def trainModel(data):
model = Sequential()
model.add(Dense(400, input_dim=(data.shape[1] - 1), init="uniform"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(500, init="uniform"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(39, init="uniform"))
model.add(Activation("softmax"))
cb = EarlyStopping(monitor="val_loss", patience=3, verbose=0, mode="auto")
output = label_binarize(data[0:, 0], range(0, 39))
print (output.shape)
# optim = Adam(lr=0.1, beta_l=0.2, beta_2=0.7, epsilon=1e-6)
# model.compile(loss='categorical_crossentropy',optimizer=optim)
# model.fit(data[0:,1:].astype(np.float32),output,nb_epoch=30,batch_size=16,show_accuracy=True,validation_split=0.5,callbacks=[cb])
# optim = Adam(lr=0.01, beta_l=0.5, beta_2=0.8, epsilon=1e-07)
# model.compile(loss='categorical_crossentropy',optimizer=optim)
# model.fit(data[0:,1:].astype(np.float32),output,nb_epoch=30,batch_size=16,show_accuracy=True,validation_split=0.3,callbacks=[cb])
optim = Adam(lr=0.001, beta_l=0.9, beta_2=0.999, epsilon=1e-07)
model.compile(loss="categorical_crossentropy", optimizer=optim)
model.fit(
data[0:, 1:].astype(np.float64),
output,
nb_epoch=30,
batch_size=16,
show_accuracy=True,
validation_split=0.1,
callbacks=[cb],
)
return model
示例13: compute_rocauc
def compute_rocauc(self):
"""
:return:
"""
# Binarize the output
y_test = label_binarize(self.y_test, classes=list(range(self.n_classes)))
# Compute ROC curve and ROC area for each class
y_score = self.clf.predict_proba(self.X_test)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(self.n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
self.report["roc_auc"] = dict(
fpr={str(k): v.tolist() for k, v in fpr.items()},
tpr={str(k): v.tolist() for k, v in tpr.items()},
roc_auc={str(k): v.tolist() for k, v in roc_auc.items()}
)
示例14: evaluateOneEpoch
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
test_loss = []
test_acc = []
test_predict = []
for i in range(len(inputCoor)):
xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
graphTest = graphTest.tocsr()
labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
test_batch_size = para.testBatchSize
for testBatchID in range(len(labelTest) / test_batch_size):
start = testBatchID * test_batch_size
end = start + test_batch_size
batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
batchWeight = uniform_weight(batchLabel)
batchGraph = batchGraph.todense()
feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}
predict, loss_test, acc_test = sess.run(
[trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
test_loss.append(loss_test)
test_acc.append(acc_test)
test_predict.append(predict)
test_average_loss = np.mean(test_loss)
test_average_acc = np.mean(test_acc)
return test_average_loss, test_average_acc, test_predict
示例15: set_shared_variables
def set_shared_variables(self, dataset, index,enable_time):
c = np.zeros((self.batch_size, self.max_seqlen), dtype=np.int32)
q = np.zeros((self.batch_size, ), dtype=np.int32)
y = np.zeros((self.batch_size, self.num_classes), dtype=np.int32)
c_pe = np.zeros((self.batch_size, self.max_seqlen, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
q_pe = np.zeros((self.batch_size, 1, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
# c_pe = np.ones((self.batch_size, self.max_seqlen, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
# q_pe = np.ones((self.batch_size, 1, self.max_sentlen, self.embedding_size), dtype=theano.config.floatX)
indices = range(index*self.batch_size, (index+1)*self.batch_size)
for i, row in enumerate(dataset['C'][indices]):
row = row[:self.max_seqlen]
c[i, :len(row)] = row
q[:len(indices)] = dataset['Q'][indices] #问题的行数组成的列表
'''底下这个整个循环是得到一个batch对应的那个调整的矩阵'''
for key, mask in [('C', c_pe), ('Q', q_pe)]:
for i, row in enumerate(dataset[key][indices]):
sentences = self.S[row].reshape((-1, self.max_sentlen)) #这句相当于把每一句,从标号变成具体的词,并补0
for ii, word_idxs in enumerate(sentences):
J = np.count_nonzero(word_idxs)
for j in np.arange(J):
mask[i, ii, j, :] = (1 - (j+1)/J) - ((np.arange(self.embedding_size)+1)/self.embedding_size)*(1 - 2*(j+1)/J)
# c_pe=np.not_equal(c_pe,0)
# q_pe=np.not_equal(q_pe,0)
# y[:len(indices), 1:self.num_classes] = self.lb.transform(dataset['Y'][indices])#竟然是把y变成了而之花的one=hot向量都,每个是字典大小这么长
y[:len(indices), 1:self.num_classes] = label_binarize(dataset['Y'][indices],self.vocab)#竟然是把y变成了而之花的one=hot向量都,每个是字典大小这么长
# y[:len(indices), 1:self.embedding_size] = self.mem_layers[0].A[[self.word_to_idx(i) for i in list(dataset['Y'][indices])]]#竟然是把y变成了而之花的one=hot向量都,每个是字典大小这么长
self.c_shared.set_value(c)
self.q_shared.set_value(q)
self.a_shared.set_value(y)
self.c_pe_shared.set_value(c_pe)
self.q_pe_shared.set_value(q_pe)