当前位置: 首页>>代码示例>>Python>>正文


Python cluster.normalized_mutual_info_score函数代码示例

本文整理汇总了Python中sklearn.metrics.cluster.normalized_mutual_info_score函数的典型用法代码示例。如果您正苦于以下问题:Python normalized_mutual_info_score函数的具体用法?Python normalized_mutual_info_score怎么用?Python normalized_mutual_info_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了normalized_mutual_info_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load

def load():
	data = genfromtxt('../dataset/' + dataFile + '.csv', delimiter=',')
	label = genfromtxt('../dataset/' + labelFile + '.csv', delimiter=',')
	res = pickle.load(open(FN,'rb'))
	AE = res['autoencoder']
	encodedX = AE.encoder(AE.X)

	X = encodedX.data.numpy()
	#X = preprocessing.scale(encodedX.data.numpy())


	d_matrix = sklearn.metrics.pairwise.pairwise_distances(X, Y=None, metric='euclidean')
	s = np.median(d_matrix)
	Vgamma = 1/(2*s*s)
	spAlloc = SpectralClustering(2, gamma=Vgamma).fit_predict(X)
	nmi_sp = np.around(normalized_mutual_info_score(label, spAlloc), 3)


	kmAlloc = KMeans(2).fit_predict(X)
	nmi_km = np.around(normalized_mutual_info_score(label, kmAlloc), 3)

	print X

	print nmi_sp
	print nmi_km

	print res['loss']
	#print res['autoencoder']

	txt = dataFile + ' nmiSP : ' + str(nmi_sp) + ' , nmiKM : ' + str(nmi_km) + ' , num_of_layers:' + str(num_of_layers) + ' , num_of_output:' +  str(num_of_output) + '\n'

	fin = open('auto_out.txt','a')
	fin.write(txt)
	fin.close()
开发者ID:juliaprocess,项目名称:ml_examples,代码行数:34,代码来源:autoencoder.py

示例2: test_exactly_zero_info_score

def test_exactly_zero_info_score():
    """Check numerical stability when information is exactly zero"""
    for i in np.logspace(1, 4, 4).astype(np.int):
        labels_a, labels_b = np.ones(i, dtype=np.int), np.arange(i, dtype=np.int)
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
开发者ID:JinguoGao,项目名称:scikit-learn,代码行数:8,代码来源:test_supervised.py

示例3: test_single_linkage_clustering

def test_single_linkage_clustering():
    # Check that we get the correct result in two emblematic cases
    moons, moon_labels = make_moons(noise=0.05, random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(moons)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     moon_labels), 1)

    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
                                          random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(circles)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     circle_labels), 1)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:14,代码来源:test_hierarchical.py

示例4: test_exactly_zero_info_score

def test_exactly_zero_info_score():
    # Check numerical stability when information is exactly zero
    for i in np.logspace(1, 4, 4).astype(np.int):
        labels_a, labels_b = (np.ones(i, dtype=np.int),
                              np.arange(i, dtype=np.int))
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        for method in ["min", "geometric", "arithmetic", "max"]:
            assert adjusted_mutual_info_score(labels_a, labels_b,
                                              method) == 0.0
            assert normalized_mutual_info_score(labels_a, labels_b,
                                                method) == 0.0
开发者ID:MartinThoma,项目名称:scikit-learn,代码行数:14,代码来源:test_supervised.py

示例5: __eval_lda_clustering

def __eval_lda_clustering(lda_model, mm_corpus, gold_labels):
    # lda_model = gensim.models.ldamodel.LdaModel.load(model_file)
    sys_labels = list()
    for i, doc in enumerate(mm_corpus):
        topic_dist = lda_model[doc]
        # print topic_dist
        cluster_idx = 0
        max_dist = 0
        for tup in topic_dist:
            if tup[1] > max_dist:
                cluster_idx = tup[0]
                max_dist = tup[1]
        sys_labels.append(cluster_idx)
        if len(sys_labels) % 5000 == 0:
            print len(sys_labels)
        # if i > 10:
        #     break
    # print len(sys_labels)
    # print len(gold_labels)

    nmi_score = normalized_mutual_info_score(gold_labels, sys_labels)
    purity_score = purity(gold_labels, sys_labels)
    ri_score = rand_index(gold_labels, sys_labels)

    # print 'NMI: %f' % normalized_mutual_info_score(gold_labels, sys_labels)
    # print 'Purity: %f' % purity(gold_labels, sys_labels)
    # print 'Accuracy: %f' % cluster_accuracy(gold_labels, sys_labels)

    print 'NMI: %f Purity: %f Rand index: %f' % (nmi_score, purity_score, ri_score)
    return nmi_score, purity_score, ri_score
开发者ID:hldai,项目名称:emadr-exp,代码行数:30,代码来源:lda.py

示例6: evaluation

def evaluation(X_selected, n_clusters, y):
    """
    This function calculates ARI, ACC and NMI of clustering results

    Input
    -----
    X_selected: {numpy array}, shape (n_samples, n_selected_features}
            input data on the selected features
    n_clusters: {int}
            number of clusters
    y: {numpy array}, shape (n_samples,)
            true labels

    Output
    ------
    nmi: {float}
        Normalized Mutual Information
    acc: {float}
        Accuracy
    """
    k_means = KMeans(n_clusters=n_clusters, init='k-means++', n_init=10, max_iter=300,
                     tol=0.0001, precompute_distances=True, verbose=0,
                     random_state=None, copy_x=True, n_jobs=1)

    k_means.fit(X_selected)
    y_predict = k_means.labels_

    # calculate NMI
    nmi = normalized_mutual_info_score(y, y_predict)

    # calculate ACC
    y_permuted_predict = best_map(y, y_predict)
    acc = accuracy_score(y, y_permuted_predict)

    return nmi, acc
开发者ID:Bekterra,项目名称:scikit-feature,代码行数:35,代码来源:unsupervised_evaluation.py

示例7: pairwise_MI

def pairwise_MI(data):
    columns = data.columns
    MI_df = pd.DataFrame(index = columns, columns = columns)
    for c1,c2 in combinations(columns, 2):
        cleaned = data[[c1,c2]].dropna()
        MI = normalized_mutual_info_score(cleaned[c1], cleaned[c2])
        MI_df.loc[c1,c2] = MI
        MI_df.loc[c2,c1] = MI
    return MI_df.astype(float)
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:9,代码来源:graph_utils.py

示例8: calcNMI

def calcNMI():

	dataset = readARFF();

	subSet = dataset[['class', 'cluster']]
	#print subSet

	NMI = normalized_mutual_info_score(subSet['class'], subSet['cluster'])
	print NMI
开发者ID:av-7,项目名称:Decision-Tree-Classifier-and-Clustering,代码行数:9,代码来源:external_evaluation_measures.py

示例9: main

def main():
	file1 = sys.argv[1]
	file2 = sys.argv[2]
	
	c_true = {}
	c_pred = {}
	#read data from file
	with open(file1) as fd1, open(file2) as fd2:
		c_true = eval(fd1.readline())
		c_pred = eval(fd2.readline())
	#order the data in dictionary data structure
	c_true_order = collections.OrderedDict(sorted(c_true.items()))
	c_pred_order = collections.OrderedDict(sorted(c_pred.items()))
	c_true_label = []
	c_pred_label = []
	print c_true_order	
	#make list with community label 
	for k, v in c_true_order.items():
		c_true_label.append(v)
	for k, v in c_pred_order.items():
		c_pred_label.append(v)
	
	
	simi =  normalized_mutual_info_score(c_true_label,c_pred_label)
	

	DATA_FILE = sys.argv[3].split("/")
	FILE_LOG_NAME = "LOG_File_"+(DATA_FILE[-1])+ ".xlsx"
	Kcore_Value = int(sys.argv[4])

	if(not os.path.exists(FILE_LOG_NAME)):
		wb = openpyxl.Workbook()	
		sheet = wb.active
		sheet.title = "Sheet1"
		sheet['A1'] = 'K/R Value'
		sheet['B1'] = 'NMI Similarity'
		sheet['A2'] = 'v=10%'
		sheet['A3'] = 'v=20%'
		sheet['A4'] = 'v=30%'
		sheet['A5'] = 'v=40%'
		sheet['A6'] = 'v=50%'
		sheet['A7'] = 'v=60%'
		sheet['A8'] = 'v=70%'
		sheet['A9'] = 'v=80%'
		sheet['A10'] = 'v=90%'
		sheet['A11'] = 'v=100%'
	else:
		wb = openpyxl.load_workbook(FILE_LOG_NAME)
        
	sheet = wb.get_sheet_by_name('Sheet1')
	sheet['B'+str(Kcore_Value + 1)] = simi
	wb.save(FILE_LOG_NAME)
开发者ID:hoduan,项目名称:SU-Community-Detection,代码行数:52,代码来源:similarity.py

示例10: get_loss

def get_loss(ckernel_net, data_loader):
	#	Compute final average loss
	for idx, (data, target) in enumerate(data_loader):
		data = Variable(data.type(db['dataType']))
		loss = ckernel_net.CAE_compute_loss(data)


	dataOut = ckernel_net(data)
	dataOut = dataOut.cpu().data.numpy()

	allocation = KMeans(10).fit_predict(dataOut)
	nmi = normalized_mutual_info_score(allocation, target.numpy())
	return [loss.cpu().data.numpy()[0], nmi]
开发者ID:juliaprocess,项目名称:ml_examples,代码行数:13,代码来源:mnist.py

示例11: __eval_lda_clustering_20ng

def __eval_lda_clustering_20ng():
    text_doc_file = 'e:/dc/20ng_bydate/twe/docs-nl.txt'
    dict_file = 'e:/dc/20ng_bydate/lda/all-docs.dict'
    mm_file = 'e:/dc/20ng_bydate/lda/all-docs.mm'
    lda_model_file = 'e:/dc/20ng_bydate/lda/lda-model'

    dataset_label_file = 'e:/dc/20ng_bydate/doc_split_labels.bin'
    test_label_file = 'e:/dc/20ng_bydate/test_labels.bin'

    __text_file_to_mm_corpus(text_doc_file, dict_file, mm_file)

    __train_lda_model(dict_file, mm_file, lda_model_file)

    dataset_labels = ioutils.load_labels_file(dataset_label_file)
    lda_model = gensim.models.ldamodel.LdaModel.load(lda_model_file)
    mm_corpus = gensim.corpora.MmCorpus(mm_file)
    sys_labels = list()
    for i, doc in enumerate(mm_corpus):
        if dataset_labels[i] == 0:
            continue

        topic_dist = lda_model[doc]
        # print topic_dist
        cluster_idx = 0
        max_dist = 0
        for tup in topic_dist:
            if tup[1] > max_dist:
                cluster_idx = tup[0]
                max_dist = tup[1]
        sys_labels.append(cluster_idx)
        if len(sys_labels) % 1000 == 0:
            print len(sys_labels)
        # if i > 10:
        #     break
    print len(sys_labels)
    gold_labels = ioutils.load_labels_file(test_label_file)
    print len(gold_labels)
    print normalized_mutual_info_score(gold_labels, sys_labels)
    print cluster_accuracy(gold_labels, sys_labels)
开发者ID:hldai,项目名称:emadr-exp,代码行数:39,代码来源:lda.py

示例12: main

def main():
	file1 = sys.argv[1]
	file2 = sys.argv[2]
	c_true = {}
	c_pred = {}
	#read data from file
	with open(file1) as fd1, open(file2) as fd2:
		c_true = eval(fd1.readline())
		c_pred = eval(fd2.readline())
	
	#order the data in dictionary data structure
	c_true_order = collections.OrderedDict(sorted(c_true.items()))
	c_pred_order = collections.OrderedDict(sorted(c_pred.items()))
	c_true_label = []
	c_pred_label = []
	
	#make list with community label 
	for k, v in c_true_order.items():
		c_true_label.append(v)
	for k, v in c_pred_order.items():
		c_pred_label.append(v)
	
	print normalized_mutual_info_score(c_true_label,c_pred_label)
开发者ID:AltmerX,项目名称:SUCD,代码行数:23,代码来源:similarity.py

示例13: test_v_measure_and_mutual_information

def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(np.int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (random_state.randint(0, 10, i),
                              random_state.randint(0, 10, i))
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
        avg = 'arithmetic'
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            normalized_mutual_info_score(labels_a, labels_b,
                                                         average_method=avg)
                            )
开发者ID:MartinThoma,项目名称:scikit-learn,代码行数:14,代码来源:test_supervised.py

示例14: evaluate

	def evaluate( self, partition, clustered_ids ):
		# no class info?
		if not self.has_class_info():
			return {}
		# get two clusterings that we can compare
		n = len(clustered_ids)
		classes_subset = np.zeros( n )
		for row in range(n):
			classes_subset[row] = self.class_map[clustered_ids[row]]		
		scores = {}
		scores["external-nmi"] = normalized_mutual_info_score( classes_subset, partition )
		scores["external-ami"] = adjusted_mutual_info_score( classes_subset, partition )
		scores["external-ari"] = adjusted_rand_score( classes_subset, partition )
		return scores
开发者ID:duongtrung,项目名称:topic-stability,代码行数:14,代码来源:validation.py

示例15: sklearn_measures

def sklearn_measures(U, V):
    #     http://scikit-learn.org/stable/modules/classes.html#clustering-metrics
    import sklearn.metrics.cluster as sym
    U_labels = np.nonzero(U)[1]
    V_labels = np.nonzero(V)[1]
    print U_labels, V_labels
#     V2_labels = np.nonzero(V2)[1]
    print 'entro(U)=',sym.entropy(U_labels),'entro(V)=',sym.entropy(V_labels), 'entro(U,V)=',sym.mutual_info_score(U_labels, V_labels)
    res = [ ['ari', 'nmi', 'ami', 'vm' ], \
            [ sym.adjusted_rand_score(U_labels, V_labels),\
              sym.normalized_mutual_info_score(U_labels, V_labels),\
              sym.adjusted_mutual_info_score(U_labels, V_labels),\
              sym.v_measure_score(U_labels, V_labels)]]
    print res
    return res
开发者ID:Bigxiaofeng,项目名称:CommunityEvaluation,代码行数:15,代码来源:cluster_agreement_examples.py


注:本文中的sklearn.metrics.cluster.normalized_mutual_info_score函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。