本文整理汇总了Python中sklearn.metrics.cluster.normalized_mutual_info_score函数的典型用法代码示例。如果您正苦于以下问题:Python normalized_mutual_info_score函数的具体用法?Python normalized_mutual_info_score怎么用?Python normalized_mutual_info_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了normalized_mutual_info_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
def load():
data = genfromtxt('../dataset/' + dataFile + '.csv', delimiter=',')
label = genfromtxt('../dataset/' + labelFile + '.csv', delimiter=',')
res = pickle.load(open(FN,'rb'))
AE = res['autoencoder']
encodedX = AE.encoder(AE.X)
X = encodedX.data.numpy()
#X = preprocessing.scale(encodedX.data.numpy())
d_matrix = sklearn.metrics.pairwise.pairwise_distances(X, Y=None, metric='euclidean')
s = np.median(d_matrix)
Vgamma = 1/(2*s*s)
spAlloc = SpectralClustering(2, gamma=Vgamma).fit_predict(X)
nmi_sp = np.around(normalized_mutual_info_score(label, spAlloc), 3)
kmAlloc = KMeans(2).fit_predict(X)
nmi_km = np.around(normalized_mutual_info_score(label, kmAlloc), 3)
print X
print nmi_sp
print nmi_km
print res['loss']
#print res['autoencoder']
txt = dataFile + ' nmiSP : ' + str(nmi_sp) + ' , nmiKM : ' + str(nmi_km) + ' , num_of_layers:' + str(num_of_layers) + ' , num_of_output:' + str(num_of_output) + '\n'
fin = open('auto_out.txt','a')
fin.write(txt)
fin.close()
示例2: test_exactly_zero_info_score
def test_exactly_zero_info_score():
"""Check numerical stability when information is exactly zero"""
for i in np.logspace(1, 4, 4).astype(np.int):
labels_a, labels_b = np.ones(i, dtype=np.int), np.arange(i, dtype=np.int)
assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
assert_equal(v_measure_score(labels_a, labels_b), 0.0)
assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
示例3: test_single_linkage_clustering
def test_single_linkage_clustering():
# Check that we get the correct result in two emblematic cases
moons, moon_labels = make_moons(noise=0.05, random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(moons)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
moon_labels), 1)
circles, circle_labels = make_circles(factor=0.5, noise=0.025,
random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(circles)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
circle_labels), 1)
示例4: test_exactly_zero_info_score
def test_exactly_zero_info_score():
# Check numerical stability when information is exactly zero
for i in np.logspace(1, 4, 4).astype(np.int):
labels_a, labels_b = (np.ones(i, dtype=np.int),
np.arange(i, dtype=np.int))
assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
assert_equal(v_measure_score(labels_a, labels_b), 0.0)
assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
for method in ["min", "geometric", "arithmetic", "max"]:
assert adjusted_mutual_info_score(labels_a, labels_b,
method) == 0.0
assert normalized_mutual_info_score(labels_a, labels_b,
method) == 0.0
示例5: __eval_lda_clustering
def __eval_lda_clustering(lda_model, mm_corpus, gold_labels):
# lda_model = gensim.models.ldamodel.LdaModel.load(model_file)
sys_labels = list()
for i, doc in enumerate(mm_corpus):
topic_dist = lda_model[doc]
# print topic_dist
cluster_idx = 0
max_dist = 0
for tup in topic_dist:
if tup[1] > max_dist:
cluster_idx = tup[0]
max_dist = tup[1]
sys_labels.append(cluster_idx)
if len(sys_labels) % 5000 == 0:
print len(sys_labels)
# if i > 10:
# break
# print len(sys_labels)
# print len(gold_labels)
nmi_score = normalized_mutual_info_score(gold_labels, sys_labels)
purity_score = purity(gold_labels, sys_labels)
ri_score = rand_index(gold_labels, sys_labels)
# print 'NMI: %f' % normalized_mutual_info_score(gold_labels, sys_labels)
# print 'Purity: %f' % purity(gold_labels, sys_labels)
# print 'Accuracy: %f' % cluster_accuracy(gold_labels, sys_labels)
print 'NMI: %f Purity: %f Rand index: %f' % (nmi_score, purity_score, ri_score)
return nmi_score, purity_score, ri_score
示例6: evaluation
def evaluation(X_selected, n_clusters, y):
"""
This function calculates ARI, ACC and NMI of clustering results
Input
-----
X_selected: {numpy array}, shape (n_samples, n_selected_features}
input data on the selected features
n_clusters: {int}
number of clusters
y: {numpy array}, shape (n_samples,)
true labels
Output
------
nmi: {float}
Normalized Mutual Information
acc: {float}
Accuracy
"""
k_means = KMeans(n_clusters=n_clusters, init='k-means++', n_init=10, max_iter=300,
tol=0.0001, precompute_distances=True, verbose=0,
random_state=None, copy_x=True, n_jobs=1)
k_means.fit(X_selected)
y_predict = k_means.labels_
# calculate NMI
nmi = normalized_mutual_info_score(y, y_predict)
# calculate ACC
y_permuted_predict = best_map(y, y_predict)
acc = accuracy_score(y, y_permuted_predict)
return nmi, acc
示例7: pairwise_MI
def pairwise_MI(data):
columns = data.columns
MI_df = pd.DataFrame(index = columns, columns = columns)
for c1,c2 in combinations(columns, 2):
cleaned = data[[c1,c2]].dropna()
MI = normalized_mutual_info_score(cleaned[c1], cleaned[c2])
MI_df.loc[c1,c2] = MI
MI_df.loc[c2,c1] = MI
return MI_df.astype(float)
示例8: calcNMI
def calcNMI():
dataset = readARFF();
subSet = dataset[['class', 'cluster']]
#print subSet
NMI = normalized_mutual_info_score(subSet['class'], subSet['cluster'])
print NMI
示例9: main
def main():
file1 = sys.argv[1]
file2 = sys.argv[2]
c_true = {}
c_pred = {}
#read data from file
with open(file1) as fd1, open(file2) as fd2:
c_true = eval(fd1.readline())
c_pred = eval(fd2.readline())
#order the data in dictionary data structure
c_true_order = collections.OrderedDict(sorted(c_true.items()))
c_pred_order = collections.OrderedDict(sorted(c_pred.items()))
c_true_label = []
c_pred_label = []
print c_true_order
#make list with community label
for k, v in c_true_order.items():
c_true_label.append(v)
for k, v in c_pred_order.items():
c_pred_label.append(v)
simi = normalized_mutual_info_score(c_true_label,c_pred_label)
DATA_FILE = sys.argv[3].split("/")
FILE_LOG_NAME = "LOG_File_"+(DATA_FILE[-1])+ ".xlsx"
Kcore_Value = int(sys.argv[4])
if(not os.path.exists(FILE_LOG_NAME)):
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "Sheet1"
sheet['A1'] = 'K/R Value'
sheet['B1'] = 'NMI Similarity'
sheet['A2'] = 'v=10%'
sheet['A3'] = 'v=20%'
sheet['A4'] = 'v=30%'
sheet['A5'] = 'v=40%'
sheet['A6'] = 'v=50%'
sheet['A7'] = 'v=60%'
sheet['A8'] = 'v=70%'
sheet['A9'] = 'v=80%'
sheet['A10'] = 'v=90%'
sheet['A11'] = 'v=100%'
else:
wb = openpyxl.load_workbook(FILE_LOG_NAME)
sheet = wb.get_sheet_by_name('Sheet1')
sheet['B'+str(Kcore_Value + 1)] = simi
wb.save(FILE_LOG_NAME)
示例10: get_loss
def get_loss(ckernel_net, data_loader):
# Compute final average loss
for idx, (data, target) in enumerate(data_loader):
data = Variable(data.type(db['dataType']))
loss = ckernel_net.CAE_compute_loss(data)
dataOut = ckernel_net(data)
dataOut = dataOut.cpu().data.numpy()
allocation = KMeans(10).fit_predict(dataOut)
nmi = normalized_mutual_info_score(allocation, target.numpy())
return [loss.cpu().data.numpy()[0], nmi]
示例11: __eval_lda_clustering_20ng
def __eval_lda_clustering_20ng():
text_doc_file = 'e:/dc/20ng_bydate/twe/docs-nl.txt'
dict_file = 'e:/dc/20ng_bydate/lda/all-docs.dict'
mm_file = 'e:/dc/20ng_bydate/lda/all-docs.mm'
lda_model_file = 'e:/dc/20ng_bydate/lda/lda-model'
dataset_label_file = 'e:/dc/20ng_bydate/doc_split_labels.bin'
test_label_file = 'e:/dc/20ng_bydate/test_labels.bin'
__text_file_to_mm_corpus(text_doc_file, dict_file, mm_file)
__train_lda_model(dict_file, mm_file, lda_model_file)
dataset_labels = ioutils.load_labels_file(dataset_label_file)
lda_model = gensim.models.ldamodel.LdaModel.load(lda_model_file)
mm_corpus = gensim.corpora.MmCorpus(mm_file)
sys_labels = list()
for i, doc in enumerate(mm_corpus):
if dataset_labels[i] == 0:
continue
topic_dist = lda_model[doc]
# print topic_dist
cluster_idx = 0
max_dist = 0
for tup in topic_dist:
if tup[1] > max_dist:
cluster_idx = tup[0]
max_dist = tup[1]
sys_labels.append(cluster_idx)
if len(sys_labels) % 1000 == 0:
print len(sys_labels)
# if i > 10:
# break
print len(sys_labels)
gold_labels = ioutils.load_labels_file(test_label_file)
print len(gold_labels)
print normalized_mutual_info_score(gold_labels, sys_labels)
print cluster_accuracy(gold_labels, sys_labels)
示例12: main
def main():
file1 = sys.argv[1]
file2 = sys.argv[2]
c_true = {}
c_pred = {}
#read data from file
with open(file1) as fd1, open(file2) as fd2:
c_true = eval(fd1.readline())
c_pred = eval(fd2.readline())
#order the data in dictionary data structure
c_true_order = collections.OrderedDict(sorted(c_true.items()))
c_pred_order = collections.OrderedDict(sorted(c_pred.items()))
c_true_label = []
c_pred_label = []
#make list with community label
for k, v in c_true_order.items():
c_true_label.append(v)
for k, v in c_pred_order.items():
c_pred_label.append(v)
print normalized_mutual_info_score(c_true_label,c_pred_label)
示例13: test_v_measure_and_mutual_information
def test_v_measure_and_mutual_information(seed=36):
# Check relation between v_measure, entropy and mutual information
for i in np.logspace(1, 4, 4).astype(np.int):
random_state = np.random.RandomState(seed)
labels_a, labels_b = (random_state.randint(0, 10, i),
random_state.randint(0, 10, i))
assert_almost_equal(v_measure_score(labels_a, labels_b),
2.0 * mutual_info_score(labels_a, labels_b) /
(entropy(labels_a) + entropy(labels_b)), 0)
avg = 'arithmetic'
assert_almost_equal(v_measure_score(labels_a, labels_b),
normalized_mutual_info_score(labels_a, labels_b,
average_method=avg)
)
示例14: evaluate
def evaluate( self, partition, clustered_ids ):
# no class info?
if not self.has_class_info():
return {}
# get two clusterings that we can compare
n = len(clustered_ids)
classes_subset = np.zeros( n )
for row in range(n):
classes_subset[row] = self.class_map[clustered_ids[row]]
scores = {}
scores["external-nmi"] = normalized_mutual_info_score( classes_subset, partition )
scores["external-ami"] = adjusted_mutual_info_score( classes_subset, partition )
scores["external-ari"] = adjusted_rand_score( classes_subset, partition )
return scores
示例15: sklearn_measures
def sklearn_measures(U, V):
# http://scikit-learn.org/stable/modules/classes.html#clustering-metrics
import sklearn.metrics.cluster as sym
U_labels = np.nonzero(U)[1]
V_labels = np.nonzero(V)[1]
print U_labels, V_labels
# V2_labels = np.nonzero(V2)[1]
print 'entro(U)=',sym.entropy(U_labels),'entro(V)=',sym.entropy(V_labels), 'entro(U,V)=',sym.mutual_info_score(U_labels, V_labels)
res = [ ['ari', 'nmi', 'ami', 'vm' ], \
[ sym.adjusted_rand_score(U_labels, V_labels),\
sym.normalized_mutual_info_score(U_labels, V_labels),\
sym.adjusted_mutual_info_score(U_labels, V_labels),\
sym.v_measure_score(U_labels, V_labels)]]
print res
return res