本文整理汇总了Python中sklearn.metrics.matthews_corrcoef函数的典型用法代码示例。如果您正苦于以下问题:Python matthews_corrcoef函数的具体用法?Python matthews_corrcoef怎么用?Python matthews_corrcoef使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了matthews_corrcoef函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: plot_conf_matrix
def plot_conf_matrix(y_true, y_pred, normed=True, heatmap_color ='Blues', **kwargs):
## check to make sure that y_pred is an array of integers if y_true is a bunch of integers
true_int_check = all(isinstance(a,int) for a in y_true)
pred_int_check = all(isinstance(a,int) for a in y_pred)
if true_int_check and not pred_int_check: # convert the y_pred values to integers
if isinstance(y_pred, pd.Series):
y_pred = y_pred.astype(int)
my_c = metrics.confusion_matrix(y_true, y_pred)
print metrics.matthews_corrcoef(y_true, y_pred)
if normed:
cm_normalized = my_c.astype('float') / my_c.sum(axis=1)[:, np.newaxis]
my_c = cm_normalized
plt.title('Normalized RF Classifier Confusion Matrix')
else:
plt.title('Random Forest Classifier Confusion Matrix')
sns.heatmap(my_c, annot=True, fmt='',cmap=heatmap_color, **kwargs)
plt.ylabel('True')
plt.xlabel('Assigned')
plt.show()
return
示例2: melodiness_metrics
def melodiness_metrics(m_train, m_test, y_train, y_test):
""" Compute metrics on melodiness score
Parameters
----------
m_train : np.array [n_samples]
melodiness scores for training set
m_test : np.array [n_samples]
melodiness scores for testing set
y_train : np.array [n_samples]
Training labels.
y_test : np.array [n_samples]
Testing labels.
Returns
-------
melodiness_scores : dict
melodiness scores for training set
"""
m_bin_train = 1*(m_train >= 1)
m_bin_test = 1*(m_test >= 1)
train_scores = {}
test_scores = {}
train_scores['accuracy'] = metrics.accuracy_score(y_train, m_bin_train)
test_scores['accuracy'] = metrics.accuracy_score(y_test, m_bin_test)
train_scores['mcc'] = metrics.matthews_corrcoef(y_train, m_bin_train)
test_scores['mcc'] = metrics.matthews_corrcoef(y_test, m_bin_test)
(p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
m_bin_train)
train_scores['precision'] = p
train_scores['recall'] = r
train_scores['f1'] = f
train_scores['support'] = s
(p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
m_bin_test)
test_scores['precision'] = p
test_scores['recall'] = r
test_scores['f1'] = f
test_scores['support'] = s
train_scores['confusion matrix'] = \
metrics.confusion_matrix(y_train, m_bin_train, labels=[0, 1])
test_scores['confusion matrix'] = \
metrics.confusion_matrix(y_test, m_bin_test, labels=[0, 1])
train_scores['auc score'] = \
metrics.roc_auc_score(y_train, m_train + 1, average='weighted')
test_scores['auc score'] = \
metrics.roc_auc_score(y_test, m_test + 1, average='weighted')
melodiness_scores = {'train': train_scores, 'test': test_scores}
return melodiness_scores
示例3: clf_metrics
def clf_metrics(p_train, p_test, y_train, y_test):
""" Compute metrics on classifier predictions
Parameters
----------
p_train : np.array [n_samples]
predicted probabilities for training set
p_test : np.array [n_samples]
predicted probabilities for testing set
y_train : np.array [n_samples]
Training labels.
y_test : np.array [n_samples]
Testing labels.
Returns
-------
clf_scores : dict
classifier scores for training set
"""
y_pred_train = 1*(p_train >= 0.5)
y_pred_test = 1*(p_test >= 0.5)
train_scores = {}
test_scores = {}
train_scores['accuracy'] = metrics.accuracy_score(y_train, y_pred_train)
test_scores['accuracy'] = metrics.accuracy_score(y_test, y_pred_test)
train_scores['mcc'] = metrics.matthews_corrcoef(y_train, y_pred_train)
test_scores['mcc'] = metrics.matthews_corrcoef(y_test, y_pred_test)
(p, r, f, s) = metrics.precision_recall_fscore_support(y_train,
y_pred_train)
train_scores['precision'] = p
train_scores['recall'] = r
train_scores['f1'] = f
train_scores['support'] = s
(p, r, f, s) = metrics.precision_recall_fscore_support(y_test,
y_pred_test)
test_scores['precision'] = p
test_scores['recall'] = r
test_scores['f1'] = f
test_scores['support'] = s
train_scores['confusion matrix'] = \
metrics.confusion_matrix(y_train, y_pred_train, labels=[0, 1])
test_scores['confusion matrix'] = \
metrics.confusion_matrix(y_test, y_pred_test, labels=[0, 1])
train_scores['auc score'] = \
metrics.roc_auc_score(y_train, p_train + 1, average='weighted')
test_scores['auc score'] = \
metrics.roc_auc_score(y_test, p_test + 1, average='weighted')
clf_scores = {'train': train_scores, 'test': test_scores}
return clf_scores
示例4: printAnalysis
def printAnalysis(self,true_pred,y_pred1):
print "########## Analysing the Model result ##########################"
math_corr = matthews_corrcoef( true_pred,y_pred1)
roc_auc = roc_auc_score( true_pred,y_pred1)
print(classification_report( true_pred,y_pred1))
print("Matthews correlation :" + str(matthews_corrcoef( true_pred,y_pred1)))
print("ROC AUC score :" + str(roc_auc_score( true_pred,y_pred1)))
示例5: _show_classification_results
def _show_classification_results(y_test, y_pred):
""" Prints performance metrics for a classifier """
print metrics.classification_report(y_test, y_pred)
print
print 'Confusion matrix:'
print metrics.confusion_matrix(y_test, y_pred)
print
print 'Matthew\'s correlation coefficient:',
print metrics.matthews_corrcoef(y_test, y_pred)
print 'F1 score:',
print metrics.f1_score(y_test, y_pred)
print
示例6: score_MCC
def score_MCC(ground_truth, scores):
'''
assuming the model output is the probability of being default,
then this probability can be used for ranking. Then using the fraction of
default in validation data to assign the proper threshold to the prediction
'''
if isinstance(scores, pd.Series):
scores = scores.values
if isinstance(ground_truth, pd.Series):
ground_truth = ground_truth.values
tmp_ground_truth = np.copy(ground_truth)
fault_frac = tmp_ground_truth.mean()
#print 'score shape:', scores.shape,
print 'mean of groud truth:', fault_frac
thres_value = np.percentile(scores, 100.*(1-fault_frac), axis=0)
print 'threshold for preds:', thres_value
binary_scores = scores > thres_value
binary_scores = binary_scores.astype(int)
## convert to sk-learn format
np.place(binary_scores, binary_scores==0, -1)
np.place(tmp_ground_truth, tmp_ground_truth==0, -1)
return matthews_corrcoef(tmp_ground_truth, binary_scores)
示例7: calcualte_threshold
def calcualte_threshold(positives, negatives, measure="SPC", measure_threshold=0.95, thresholds=None, attempt=0):
"""Plot the TPR the FPR vs threshold values
Input:
postives - list of scores of postive runs
negatives - list of scores of negative runs
measure - choose coffectiong by 95% Specificity ("SPC"), or matthews_corrcoef ("MCC")
"""
assert measure in ["TPR", "FPR", "SPC", "MCC", "PPV", "FDR", "ACC"]
y_true = [1]*len(positives)+[0]*len(negatives)
values = {name:[] for name in ["TPR", "FPR", "SPC", "MCC", "PPV", "FDR", "ACC"]}
saveThreshold = None
saveValue = 1.0
thresholds = list(sorted(thresholds or map(lambda i: i/10., xrange(1,10000))))
for threshold in thresholds:
TN = sum([1 for score in negatives if score < threshold])
FP = sum([1 for score in negatives if score >= threshold])
TP = sum([1 for score in positives if score >= threshold])
FN = sum([1 for score in positives if score < threshold])
values["FPR"].append(float(FP)/(FP+TN))
values["TPR"].append(float(TP)/(TP+FN))
values["SPC"].append(float(TN)/(FP+TN))
y_pred = [int(score >= threshold) for scores in (positives, negatives) for score in scores]
values["MCC"].append(matthews_corrcoef(y_true, y_pred))
values["PPV"].append(float(TP)/(TP+FP) if TP+FP>0 else 0.0)
values["FDR"].append(float(FP)/(TP+FP) if TP+FP>0 else 0.0)
values["ACC"].append(float(TP+TN)/(len(positives)+len(negatives)))
specificity_curve_inverse = interp1d(values[measure], thresholds)
saveThreshold = specificity_curve_inverse(measure_threshold) #modified by Alexey 0.95 => measure_threshold
return saveThreshold, thresholds, values
示例8: KFold_method
def KFold_method(self):
kf = KFold(n_splits=10)
for train_index, test_index in kf.split(self.FeatureSet):
X_train = []
X_test = []
y_train = []
y_test = []
for trainid in train_index.tolist():
X_train.append(self.FeatureSet[trainid])
y_train.append(self.Label[trainid])
for testid in test_index.tolist():
X_test.append(self.FeatureSet[testid])
y_test.append(self.Label[testid])
#clf = tree.DecisionTreeClassifier()
#clf = clf.fit(X_train, y_train)
#pre_labels = clf.predict(X_test)
clf = AdaBoostClassifier(n_estimators=100)
clf = clf.fit(X_train, y_train)
pre_labels = clf.predict(X_test)
# Modeal Evaluation
ACC = metrics.accuracy_score(y_test, pre_labels)
MCC = metrics.matthews_corrcoef(y_test, pre_labels)
SN = self.performance(y_test, pre_labels)
print ACC, SN
示例9: evalmetric
def evalmetric(pred, truth):
return 'auc_mine', metrics.roc_auc_score(truth.get_label(), pred)
thresholds = np.arange(99.6, 99.9, 0.025)
bestScore = 0
bestT = 0
bestAcc = 0
bestCf = np.zeros((2,2))
thresholds = [0.10]
for t in thresholds:
temp = np.copy(pred)
temp[np.where(pred > np.percentile(pred, t))] = 1
temp[np.where(pred <= np.percentile(pred, t))] = 0
score = metrics.matthews_corrcoef(truth.get_label(), temp)
if score > bestScore:
bestScore = score
bestT = np.percentile(pred, t)
bestAuc = metrics.roc_auc_score(truth.get_label(), temp, reorder=True)
bestCf = metrics.confusion_matrix(truth.get_label(), temp)
print('threshold {} mcc {} auc {} TN {} FP {} FN {} TP {}\n'.format(bestT, bestScore, bestAcc, bestCf[0][0], bestCf[0][1], bestCf[1][0], bestCf[1][1]))
return 'mcc', -1 * bestScore
示例10: compute_MCC
def compute_MCC(y_true, y_score, threshold_num=500):
"""Compute the Matthews Correlation Coefficient.
:param y_true: true binary labels in range {0, 1}
:type y_true: numpy array
:param y_score: the probability estimates of the positive class
:type y_score: numpy array
:param threshold_num: the number of thresholds
:type threshold_num: int
:return: the maximum Matthews Correlation Coefficient
:rtype: float
"""
# Get the ranks
ranks = get_ranks(y_score)
# Generate the array which contains the value of thresholds
threshold_array = np.linspace(np.min(ranks) - 1, np.max(ranks) + 1, num=threshold_num)
# Generate MCC values
MCC_list = []
for threshold in threshold_array:
MCC_list.append(matthews_corrcoef(y_true, ranks > threshold))
MCC_array = np.array(MCC_list)
# Illustrate threshold and MCC values
# pylab.figure()
# pylab.plot(threshold_array / np.max(ranks), MCC_array)
# pylab.show()
return np.max(MCC_array)
示例11: Bootstrap_method
def Bootstrap_method(self):
rs = cross_validation.ShuffleSplit(
len(self.FeatureSet), 10, 0.25, random_state=0)
clf = tree.DecisionTreeClassifier()
for train_index, test_index in rs:
X_train = []
X_test = []
y_train = []
y_test = []
for trainid in train_index.tolist():
X_train.append(self.FeatureSet[trainid])
y_train.append(self.Label[trainid])
for testid in test_index.tolist():
X_test.append(self.FeatureSet[testid])
y_test.append(self.Label[testid])
#clf = clf.fit(X_train, y_train)
# pre_labels = clf.predict(X_test)
clf = AdaBoostClassifier(n_estimators=100)
clf = clf.fit(X_train, y_train)
pre_labels = clf.predict(X_test)
# Modeal Evaluation
ACC = metrics.accuracy_score(y_test, pre_labels)
MCC = metrics.matthews_corrcoef(y_test, pre_labels)
SN = self.performance(y_test, pre_labels)
print ACC,SN
示例12: fit_knn
def fit_knn(config):
### Prepare result holders ###b
values = {}
results = {}
monitors = {}
E = {"config": config, "results": results, "monitors":monitors, "values":values}
### Print experiment header ###
print_exp_name(config)
### Train ###
monitors["acc_fold"] = []
monitors["mcc_fold"] = []
monitors["wac_fold"] = []
monitors["cm"] = [] # confusion matrix
results["mean_acc"] = 0
results["mean_mcc"] = 0
values["mean_cls"] = Y.mean()
values["transformers"] = []
for fold in D["folds"]:
if config["use_embedding"] == 0:
tr_id, ts_id = fold["train_id"], fold["test_id"]
X_train, Y_train, X_test, Y_test = X[tr_id].todense(), Y[tr_id], X[ts_id].todense(), Y[ts_id]
else:
X_train, Y_train, X_test, Y_test = fold["X_train"], fold["Y_train"], fold["X_test"], fold["Y_test"]
if config["use_embedding"] == 0:
clf = KNeighborsClassifier(n_neighbors=config["KNN_K"], metric="jaccard")
clf.fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
else: # Looking at the similarity of the closest example and getting K=1 from arbitrary K :)
Y_pred = []
for x in X_test:
Y_pred.append(1 if x[-4] > x[-2] else -1)
Y_pred = np.array(Y_pred)
acc_fold, mcc_fold = accuracy_score(Y_test, Y_pred), matthews_corrcoef(Y_test, Y_pred)
cm = confusion_matrix(Y_test, Y_pred)
tp, fn, fp, tn = cm[1,1], cm[1,0], cm[0,1], cm[0,0]
monitors["cm"].append(cm)
monitors["wac_fold"].append(0.5*tp/float(tp+fn) + 0.5*tn/float(tn+fp))
monitors["acc_fold"].append(acc_fold)
monitors["mcc_fold"].append(mcc_fold)
monitors["acc_fold"] = np.array(monitors["acc_fold"])
monitors["mcc_fold"] = np.array(monitors["mcc_fold"])
monitors["wac_fold"] = np.array(monitors["wac_fold"])
results["mean_acc"] = monitors["acc_fold"].mean()
results["mean_mcc"] = monitors["mcc_fold"].mean()
results["mean_wac"] = monitors["wac_fold"].mean()
logger.info(results)
return E
示例13: eval_mcc
def eval_mcc(y_true, y_prob, show=False):
idx = np.argsort(y_prob)
y_true = np.array(y_true, dtype=int)
y_true_sort = y_true[idx]
n = y_true.shape[0]
nump = 1.0 * np.sum(y_true) # number of positive
numn = n - nump # number of negative
tp = nump
tn = 0.0
fp = numn
fn = 0.0
best_mcc = 0.0
best_id = -1
mccs = np.zeros(n)
for i in range(n):
if y_true_sort[i] == 1:
tp -= 1.0
fn += 1.0
else:
fp -= 1.0
tn += 1.0
new_mcc = mcc(tp, tn, fp, fn)
mccs[i] = new_mcc
if new_mcc >= best_mcc:
best_mcc = new_mcc
best_id = i
best_proba = y_prob[idx[best_id]]
y_pred = (y_prob > best_proba).astype(int)
final_mcc = matthews_corrcoef(y_true, y_pred)
if show:
return best_proba, final_mcc, y_pred
else:
return final_mcc
示例14: run
def run(self):
with LpcApocResultTask(self.tname,
self.qname,
self.subset).output().open('r') as f:
apoc_parser = ApocResultParer(f.read())
f = self.output().open('w')
data = {}
data['tname'] = self.tname
data['qname'] = self.qname
data['t pocket'] = LpcPocketPathTask(self.tname).output().path
data['q pocket'] = LpcPocketPathTask(self.qname).output().path
data['Apoc result'] = LpcApocResultTask(self.tname, self.qname, self.subset).output().path
data['Kcombu result'] = LpcKcombuResult(self.tname, self.qname, self.subset).output().path
kcombu_data = self._kcombu_results().data
data['Kcombu tanimoto'] = kcombu_data.tanimoto
t_coords, q_coords = self._select_ligand_atom_coords()
global_alignment = apoc_parser.queryGlobal(self.tname, self.qname)
data['seq identity'] = global_alignment.seq_identity
pocket_alignment = apoc_parser.queryPocket(self.tname, self.qname)
if pocket_alignment.has_pocket_alignment:
t_prt_coords, t_prt_names = self._select_residues(self.tname,
pocket_alignment.template_chainid,
pocket_alignment.template_res)
q_prt_coords, q_prt_names = self._select_residues(self.qname,
pocket_alignment.query_chainid,
pocket_alignment.query_res)
try:
assert(t_prt_names == q_prt_names)
except AssertionError:
raise AssertionError("%s and %s protein residues do not match" % (self.tname, self.qname))
t_contact = buildArrayOfContact(t_prt_coords, t_coords)
q_contact = buildArrayOfContact(q_prt_coords, q_coords)
cms = matthews_corrcoef(t_contact, q_contact)
data['# residues'] = len(pocket_alignment.template_res)
data['# ligand atoms'] = len(t_coords)
data['Apoc ps-score'] = pocket_alignment.ps_score
data['Apoc p-value'] = pocket_alignment.p_value
data['Apoc z-score'] = pocket_alignment.z_score
data['# residue atoms'] = len(t_prt_coords)
data['t contact'] = t_contact
data['q contact'] = q_contact
data['xcms'] = cms
to_write = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
f.write(to_write)
f.close()
print "xcms output %s" % (self.output().path)
示例15: svc_amino
def svc_amino(X, y, score_type):
"""
:param X:
:param y:
:param score_type:
"""
if (score_type=="split"):
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
C = 70 # SVM regularization parameter
rbf_svc = svm.SVC(kernel='rbf', gamma=0.07, C=C)
rbf_svc.fit(X_train, y_train)
y_score = np.array(rbf_svc.predict(X_test))
y_test = np.array(y_test)
tn = 0.0
fp = 0.0
for i in range(y_score.shape[0]):
if y_test[i]==-1:
if y_score[i]==-1:
tn = tn+1
else: fp = fp+1
spec = tn/(tn+fp)
print "sensitivity:"
print recall_score(y_test,y_score)
print "specificity:"
print spec
print "accuracy:"
print accuracy_score(y_test,y_score)
print "MCC:"
print matthews_corrcoef(y_test,y_score)
return "ciao"
#con binary fa 0.78 con pssm fa 0.80
else:
if(score_type=="cross"):
scores = cross_validation.cross_val_score(rbf_svc, X, np.array(y), cv=5)
return scores