当前位置: 首页>>代码示例>>Python>>正文


Python metrics.v_measure_score函数代码示例

本文整理汇总了Python中sklearn.metrics.v_measure_score函数的典型用法代码示例。如果您正苦于以下问题:Python v_measure_score函数的具体用法?Python v_measure_score怎么用?Python v_measure_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了v_measure_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kmeans

def kmeans(input_file, n_clusters, Output):
    lvltrace.lvltrace("LVLEntree dans kmeans unsupervised")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    sample_size, n_features = X.shape
    k_means=cluster.KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
    k_means.fit(X)
    reduced_data = k_means.transform(X)
    values = k_means.cluster_centers_.squeeze()
    labels = k_means.labels_
    k_means_cluster_centers = k_means.cluster_centers_
    print "#########################################################################################################\n"
    #print y
    #print labels
    print "K-MEANS\n"
    print('homogeneity_score: %f'%metrics.homogeneity_score(y, labels))
    print('completeness_score: %f'%metrics.completeness_score(y, labels))
    print('v_measure_score: %f'%metrics.v_measure_score(y, labels))
    print('adjusted_rand_score: %f'%metrics.adjusted_rand_score(y, labels))
    print('adjusted_mutual_info_score: %f'%metrics.adjusted_mutual_info_score(y,  labels))
    print('silhouette_score: %f'%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
    print('\n')
    print "#########################################################################################################\n"
    results = Output+"kmeans_scores.txt"
    file = open(results, "w")
    file.write("K-Means Scores\n")
    file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(y, labels))
    file.write("Completeness Score: %f\n"%metrics.completeness_score(y, labels))
    file.write("V-Measure: %f\n"%metrics.v_measure_score(y, labels))
    file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(y, labels))
    file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(y,  labels))
    file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
    file.write("\n")
    file.write("True Value, Cluster numbers, Iteration\n")
    for n in xrange(len(y)):
        file.write("%f, %f, %i\n"%(y[n],labels[n],(n+1)))
    file.close()
    import pylab as pl
    from itertools import cycle
    # plot the results along with the labels
    k_means_cluster_centers = k_means.cluster_centers_
    fig, ax = plt.subplots()
    im=ax.scatter(X[:, 0], X[:, 1], c=labels, marker='.')
    for k in xrange(n_clusters):
        my_members = labels == k
        cluster_center = k_means_cluster_centers[k]
        ax.plot(cluster_center[0], cluster_center[1], 'w', color='b',
                marker='x', markersize=6)
    fig.colorbar(im)
    plt.title("Number of clusters: %i"%n_clusters)
    save = Output + "kmeans.png"
    plt.savefig(save)
    lvltrace.lvltrace("LVLsortie dans kmeans unsupervised")
开发者ID:xaviervasques,项目名称:Neuron_Morpho_Classification_ML,代码行数:55,代码来源:unsupervised.py

示例2: main

def main():
    ''' doctsring for main '''

    args = parse_args()

    setup_logging(verbose = args.verbose)

    records = consume_fasta(args.fasta_file)

    # setup Hasher, Vectorizer and Classifier

    hasher = HashingVectorizer(analyzer='char',
                               n_features = 2 ** 18,
                               ngram_range=(args.ngram_min, args.ngram_max),
                               )

    logging.info(hasher)

    encoder, classes = get_classes(records, args.tax_level)
    n_clusters = len(classes)

    logging.info('using taxonomic level %s' % args.tax_level)
    logging.info('Using %s clusters' % n_clusters)

    classifier = MiniBatchKMeans(n_clusters = n_clusters)

    records = records[0:args.n_iters]

    chunk_generator = iter_chunk(records, args.chunk_size, args.tax_level)

    logging.info('ngram range: [%s-%s]' % (args.ngram_min, args.ngram_max))

    for labels, features in chunk_generator:

        logging.info('transforming training chunk')
        labels = encoder.transform(labels)
        vectors = hasher.transform(features)

        logging.info('fitting training chunk')
        classifier.partial_fit(vectors)

        pred_labels = classifier.predict(vectors)

        score = v_measure_score(labels, pred_labels)
        shuffled_score = v_measure_score(labels, sample(pred_labels, len(pred_labels)))

        logging.info('score: %.2f' % (score))
        logging.info('shuffled score: %.2f' % (shuffled_score))
开发者ID:audy,项目名称:bfc,代码行数:48,代码来源:bfc.py

示例3: bench_k_means

def bench_k_means(estimator, name, data, target_labels, sample_size):
  """For benchmarking K-Means estimators. Prints different clustering metrics and train accuracy
  ARGS
    estimator: K-Means clustering algorithm <sklearn.cluster.KMeans>
    name: estimator name <str>
    data: array-like or sparse matrix, shape=(n_samples, n_features)
    target_labels: labels of data points <number array>
    sample_size: size of the sample to use when computing the Silhouette Coefficient <int>
  """ 
  t0 = time()
  estimator.fit(data)

  _, _, train_accuracy = compute_residuals_and_rsquared(estimator.labels_, target_labels)

  print('% 9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
        % (name, (time() - t0), estimator.inertia_,
           metrics.homogeneity_score(target_labels, estimator.labels_),
           metrics.completeness_score(target_labels, estimator.labels_),
           metrics.v_measure_score(target_labels, estimator.labels_),
           metrics.adjusted_rand_score(target_labels, estimator.labels_),
           metrics.adjusted_mutual_info_score(target_labels,  estimator.labels_),
           metrics.silhouette_score(data, estimator.labels_,metric='euclidean',sample_size=sample_size),
           train_accuracy
          )
        )
开发者ID:rafaelvalle,项目名称:machine_listening,代码行数:25,代码来源:knn_engine.py

示例4: main

def main(argv):
    file_vectors,clust_type, clusters, distance, cluster_param, std = get_arguments(argv)
    fname='.'.join(map(str,[file_vectors.split('/')[-1],clust_type, clusters, distance, cluster_param, std]))
    writer=open(fname,'w') ## better to put in EX1, EX2, .. folders
    print 'clustering:',clust_type
    print 'clusters:',clusters
    print 'cluster_param:',cluster_param
    print 'std:',std
        
    X,words,truth=load_data(file_vectors,True)
    X=np.array(X)
    
    if clust_type=='affin':
        labels=affin_sclustering(X, n_clust=int(clusters), distance=distance, gamma=float(cluster_param), std=bool(std)) 
    else:
        labels=knn_sclustering(X, n_clust=int(clusters), k=int(cluster_param)) 
    
    writer.write('\nVMeas:'+ str(v_measure_score(truth,labels)))
    writer.write('\nRand:'+str(adjusted_rand_score(truth,labels)))
    writer.write('\nHomogen:'+str(homogeneity_score(truth,labels))+'\n')
        
    i=0
    for word in words:
        writer.write(word+' : '+str(labels[i])+'\n')
        i+=1   
    writer.close()       
开发者ID:ivri,项目名称:DiffVec,代码行数:26,代码来源:cluster.py

示例5: my_clustering

def my_clustering(X, y, n_clusters, pca):
    # =======================================
    # Complete the code here.
    # return scores like this: return [score, score, score, score]
    # =======================================
    from sklearn.cluster import KMeans
    #print('fuck X ', X.shape)
    #print('fuck y ', y.shape)
    clf = KMeans(n_clusters)
    clf.fit(X)

    from sklearn import metrics
    ari = metrics.adjusted_rand_score(y, clf.labels_)
    mri = metrics.adjusted_mutual_info_score(y, clf.labels_)
    v_measure = metrics.v_measure_score(y, clf.labels_)
    '''
    silhouette_coeff = metrics.silhouette_score(X, clf.labels_,
                                      metric='euclidean',
                                      sample_size=300)
    '''
    silhouette_coeff = metrics.silhouette_score(X, clf.labels_)

    show_images(n_clusters, clf, pca)


    return [ari,mri,v_measure,silhouette_coeff]
开发者ID:kevguy,项目名称:CSCI3320Asg3,代码行数:26,代码来源:ex2.py

示例6: bench_k_means

def bench_k_means(estimator, data, labels):
    t0 = time()
    estimator.fit(data)
    print("time to fit: {:.5}".format(time() - t0))
    homogenity = metrics.homogeneity_score(labels, estimator.labels_)
    completeness = metrics.completeness_score(labels, estimator.labels_)
    v_measure = metrics.v_measure_score(labels, estimator.labels_)
    print("homogenity {:.5}, completeness {:.5}, v_measure_score {:.5}".format(
        homogenity, completeness, v_measure)
    )

    adj_rand_score = metrics.adjusted_rand_score(
        labels, estimator.labels_
    )
    print("adjusted_rand_score {:.5}".format(adj_rand_score))

    adj_mutual_info_score = metrics.adjusted_mutual_info_score(
        labels,  estimator.labels_
    )
    print("adjusted_mutual_info_score {:.5}".format(
        adj_mutual_info_score)
    )

    silhouette_score = metrics.silhouette_score(
        data, estimator.labels_, metric='euclidean'
    )
    print("silhouette_score {:.5}".format(
        metrics.silhouette_score(data, estimator.labels_,
                                 metric='euclidean'))
    )

    return [
        homogenity, completeness, v_measure, adj_rand_score,
        adj_mutual_info_score, silhouette_score
    ]
开发者ID:all3fox,项目名称:term_paper,代码行数:35,代码来源:rules_cluster.py

示例7: cluster

def cluster(Z, K=4, algo='kmeans'):
	descr = Z.columns
	X = Imputer().fit_transform(Z)

	##############################################################################
	if algo == 'dbscan':
		# Compute DBSCAN
		db = DBSCAN(eps=0.3, min_samples=10).fit(X)
		core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
		core_samples_mask[db.core_sample_indices_] = True
		labels = db.labels_
        
		# Number of clusters in labels, ignoring noise if present.
		n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        
		print('Estimated number of clusters: %d' % n_clusters_)
		print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
		print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
		print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
		print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
		print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
		print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))
	
	elif algo == 'kmeans':
		km = KMeans(n_clusters=K)
		km.fit(X)
		print(km.labels_)
		return km
开发者ID:orichardson,项目名称:mcm2016,代码行数:28,代码来源:cluster_vi.py

示例8: bench_k_means

def bench_k_means(estimator, name, data, sample_size, labels,postIds):
    data=sparse.csr_matrix(data)
    t0 = time()
    print("Performing dimensionality reduction using LSA")
    t0 = time()
    lsa = TruncatedSVD(500)

    data = lsa.fit_transform(data)
    data = Normalizer(copy=False).fit_transform(data)

    print("done in %fs" % (time() - t0))
    print()

    #sData=sparse.csr_matrix(data)
    val=estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f '
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_)))

    print("Parsing USer File:")
    parseUserFile()
    print("extracting User File:")
    clusterDict=extractCluster(postIds,estimator.labels_)
    print("writing Cluster Data to File")
    writeCluterToFile(clusterDict)
开发者ID:ashwini-mnnit,项目名称:UnstackExperts,代码行数:29,代码来源:tag_body_clustring.py

示例9: clustering_by_kmeans

def clustering_by_kmeans(vectorizer, X, true_k):
    print "Clustering in " + str(true_k) + " groups by K-means..."
    km = KMeans(n_clusters=true_k, init='k-means++', max_iter=500, n_init=1)
    km.fit_predict(X)

    print "Measuring..."

    print("Homogeneity: %0.3f" % metrics.homogeneity_score(documents, km.labels_))
    print("Completeness: %0.3f" % metrics.completeness_score(documents, km.labels_))
    print("V-measure: %0.3f" % metrics.v_measure_score(documents, km.labels_))  #V-measure is an entropy-based measure which explicitly measures how successfully the criteria of homogeneity and completeness have been satisfied.
    print("Adjusted Rand-Index: %.3f"   % metrics.adjusted_rand_score(documents, km.labels_))
    print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, km.labels_, sample_size=1000))
    #print top terms per cluster clusters

    clusters = km.labels_.tolist()  # 0 iff term is in cluster0, 1 iff term is in cluster1 ...  (lista de termos)
    #print "Lista de termos pertencentes aos clusters " + str(clusters)
    print "Total de " + str(len(km.labels_)) + " documents"

    #Example to get all documents in cluster 0
    #cluster_0 = np.where(clusters==0) # don't forget import numpy as np
    #print cluster_0
    #cluster_0 now contains all indices of the documents in this cluster, to get the actual documents you'd do:
    #X_cluster_0 = documents[cluster_0]
    terms = vectorizer.get_feature_names()

    #print terms
    measuring_kmeans(true_k,clusters)
开发者ID:eubr-bigsea,项目名称:Tweets-cluster,代码行数:27,代码来源:tf_idf.py

示例10: bestClassify

def bestClassify(X,Y):
	"Best classifier function"
	tfidf = True

	if tfidf:
		vec = TfidfVectorizer(preprocessor = identity,
							tokenizer = identity, sublinear_tf = True)
	else:
		vec = CountVectorizer(preprocessor = identity,
							tokenizer = identity)

	km = KMeans(n_clusters=2, n_init=100, verbose=1)
	clusterer = Pipeline( [('vec', vec),
								('cls', km)] )

	prediction = clusterer.fit_predict(X,Y)

	checker = defaultdict(list)
	for pred,truth in zip(prediction,Y):
		checker[pred].append(truth)

	labeldict = {}
	for pred, label in checker.items():
		labeldict[pred] = Counter(label).most_common(1)[0][0]
		#print(pred, Counter(label).most_common(1)[0][0])

	prediction = [labeldict[p] for p in prediction]
	labels = list(labeldict.values())
	print(labels)
	print(confusion_matrix(Y, prediction, labels=labels))

	print("Homogeneity:", homogeneity_score(Y,prediction))
	print("Completeness:", completeness_score(Y,prediction))
	print("V-measure:", v_measure_score(Y,prediction))
	print("Rand-Index:", adjusted_rand_score(Y,prediction))
开发者ID:Martbov,项目名称:LearningFromData,代码行数:35,代码来源:LFDassignment5_KMBinary_Mart.py

示例11: compute_metrics

def compute_metrics(answers, predictions):
    aris = []
    vscores = []
    fscores = []
    weights = []
    for k in answers.keys():
        idx = np.argsort(np.array(answers[k][0]))
        true = np.array(answers[k][1])[idx]
        pred = np.array(predictions[k][1])
        weights.append(pred.shape[0])
        if len(np.unique(true)) > 1:
            aris.append(adjusted_rand_score(true, pred))
        vscores.append(v_measure_score(true, pred))
        fscores.append(compute_fscore(true, pred))
#        print '%s: ari=%f, vscore=%f, fscore=%f' % (k, aris[-1], vscores[-1], fscores[-1])
    aris = np.array(aris)
    vscores = np.array(vscores)
    fscores = np.array(fscores)
    weights = np.array(weights)
    print 'number of one-sense words: %d' % (len(vscores) - len(aris))
    print 'mean ari: %f' % np.mean(aris)
    print 'mean vscore: %f' % np.mean(vscores)
    print 'weighted vscore: %f' % np.sum(vscores * (weights / float(np.sum(weights))))
    print 'mean fscore: %f' % np.mean(fscores)
    print 'weighted fscore: %f' % np.sum(fscores * (weights / float(np.sum(weights))))
    return np.mean(aris),np.mean(vscores)
开发者ID:FedericoV,项目名称:AdaGram.jl,代码行数:26,代码来源:test-all.py

示例12: cluster

def cluster(model, uids):
    ##############################################################################
    # Generate sample data
    X = []
    for uid in uids:
        X.append(model.docvecs[uid])
    labels_true = uids

    ##############################################################################
    # Compute Affinity Propagation
    af = AffinityPropagation(preference=-50).fit(X)
    pickle.dump(af, open('data/af.pick', 'w'))
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_

    n_clusters_ = len(cluster_centers_indices)

    print('Estimated number of clusters: %d' % n_clusters_)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    print("Adjusted Rand Index: %0.3f"
          % metrics.adjusted_rand_score(labels_true, labels))
    print("Adjusted Mutual Information: %0.3f"
          % metrics.adjusted_mutual_info_score(labels_true, labels))
    print("Silhouette Coefficient: %0.3f"
          % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
开发者ID:wtgme,项目名称:ohsn,代码行数:27,代码来源:profile_cluster.py

示例13: predictAffinityPropagation

def predictAffinityPropagation(X, labels_true):
	#ranX, ranY = shuffle(X, y, random_state=0)
	af = AffinityPropagation(preference=-50).fit(X)
	cluster_centers_indices = af.cluster_centers_indices_
	labels = af.labels_

	n_clusters_ = len(cluster_centers_indices)

	print('Estimated number of clusters: %d' % n_clusters_)
	print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
	print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
	print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
	print("Adjusted Rand Index: %0.3f"
      % metrics.adjusted_rand_score(labels_true, labels))
	print("Adjusted Mutual Information: %0.3f"
      % metrics.adjusted_mutual_info_score(labels_true, labels))
	print("Silhouette Coefficient: %0.3f"
      % metrics.silhouette_score(X, labels, metric='sqeuclidean'))

	plt.close('all')
	plt.figure(1)
	plt.clf()

	colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
	for k, col in zip(range(n_clusters_), colors):
	    class_members = labels == k
	    cluster_center = X[cluster_centers_indices[k]]
	    plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
	    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
	             markeredgecolor='k', markersize=14)
	    for x in X[class_members]:
	        plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

	plt.title('Estimated number of clusters: %d' % n_clusters_)
	plt.show()
开发者ID:BIDS-collaborative,项目名称:EDAM,代码行数:35,代码来源:meanshift.py

示例14: test_KMeans_scores

    def test_KMeans_scores(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        scaled = pp.scale(digits.data)
        df.data = df.data.pp.scale()
        self.assert_numpy_array_almost_equal(df.data.values, scaled)

        clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
                              n_init=10, random_state=self.random_state)
        clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
                                 n_init=10, random_state=self.random_state)
        clf1.fit(scaled)
        df.fit_predict(clf2)

        expected = m.homogeneity_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.homogeneity_score(), expected)

        expected = m.completeness_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.completeness_score(), expected)

        expected = m.v_measure_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.v_measure_score(), expected)

        expected = m.adjusted_rand_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.adjusted_rand_score(), expected)

        expected = m.homogeneity_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.homogeneity_score(), expected)

        expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
                                      sample_size=300, random_state=self.random_state)
        result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
                                             random_state=self.random_state)
        self.assertAlmostEqual(result, expected)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:35,代码来源:test_cluster.py

示例15: cluster

def cluster(algorithm, data, topics, make_silhouette=False):
  print str(algorithm)
  clusters = algorithm.fit_predict(data)
  labels = algorithm.labels_
  print 'Homogeneity: %0.3f' % metrics.homogeneity_score(topics, labels)
  print 'Completeness: %0.3f' % metrics.completeness_score(topics, labels)
  print 'V-measure: %0.3f' % metrics.v_measure_score(topics, labels)
  print 'Adjusted Rand index: %0.3f' % metrics.adjusted_rand_score(topics, labels)
  print 'Silhouette test: %0.3f' % metrics.silhouette_score(data, labels)
  print ' ***************** '
  
  silhouettes = metrics.silhouette_samples(data, labels)
  num_clusters = len(set(clusters))
  print 'num clusters: %d' % num_clusters
  print 'num fitted: %d' % len(clusters)

  # Make a silhouette plot if the flag is set
  if make_silhouette:
    order = numpy.lexsort((-silhouettes, clusters)) 
    indices = [numpy.flatnonzero(clusters[order] == num_clusters) for k in range(num_clusters)]
    ytick = [(numpy.max(ind)+numpy.min(ind))/2 for ind in indices]
    ytickLabels = ["%d" % x for x in range(num_clusters)]
    cmap = cm.jet( numpy.linspace(0,1,num_clusters) ).tolist()
    clr = [cmap[i] for i in clusters[order]]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.barh(range(data.shape[0]), silhouettes[order], height=1.0,   
            edgecolor='none', color=clr)
    ax.set_ylim(ax.get_ylim()[::-1])
    plt.yticks(ytick, ytickLabels)
    plt.xlabel('Silhouette Value')
    plt.ylabel('Cluster')
    plt.savefig('cluster.png')
开发者ID:RuthRainbow,项目名称:DataMining,代码行数:34,代码来源:scilearn.py


注:本文中的sklearn.metrics.v_measure_score函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。