本文整理汇总了Python中sklearn.metrics.v_measure_score函数的典型用法代码示例。如果您正苦于以下问题:Python v_measure_score函数的具体用法?Python v_measure_score怎么用?Python v_measure_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了v_measure_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: kmeans
def kmeans(input_file, n_clusters, Output):
lvltrace.lvltrace("LVLEntree dans kmeans unsupervised")
ncol=tools.file_col_coma(input_file)
data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
X = data[:,1:]
y = data[:,0]
sample_size, n_features = X.shape
k_means=cluster.KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
k_means.fit(X)
reduced_data = k_means.transform(X)
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
print "#########################################################################################################\n"
#print y
#print labels
print "K-MEANS\n"
print('homogeneity_score: %f'%metrics.homogeneity_score(y, labels))
print('completeness_score: %f'%metrics.completeness_score(y, labels))
print('v_measure_score: %f'%metrics.v_measure_score(y, labels))
print('adjusted_rand_score: %f'%metrics.adjusted_rand_score(y, labels))
print('adjusted_mutual_info_score: %f'%metrics.adjusted_mutual_info_score(y, labels))
print('silhouette_score: %f'%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
print('\n')
print "#########################################################################################################\n"
results = Output+"kmeans_scores.txt"
file = open(results, "w")
file.write("K-Means Scores\n")
file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(y, labels))
file.write("Completeness Score: %f\n"%metrics.completeness_score(y, labels))
file.write("V-Measure: %f\n"%metrics.v_measure_score(y, labels))
file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(y, labels))
file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(y, labels))
file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
file.write("\n")
file.write("True Value, Cluster numbers, Iteration\n")
for n in xrange(len(y)):
file.write("%f, %f, %i\n"%(y[n],labels[n],(n+1)))
file.close()
import pylab as pl
from itertools import cycle
# plot the results along with the labels
k_means_cluster_centers = k_means.cluster_centers_
fig, ax = plt.subplots()
im=ax.scatter(X[:, 0], X[:, 1], c=labels, marker='.')
for k in xrange(n_clusters):
my_members = labels == k
cluster_center = k_means_cluster_centers[k]
ax.plot(cluster_center[0], cluster_center[1], 'w', color='b',
marker='x', markersize=6)
fig.colorbar(im)
plt.title("Number of clusters: %i"%n_clusters)
save = Output + "kmeans.png"
plt.savefig(save)
lvltrace.lvltrace("LVLsortie dans kmeans unsupervised")
示例2: main
def main():
''' doctsring for main '''
args = parse_args()
setup_logging(verbose = args.verbose)
records = consume_fasta(args.fasta_file)
# setup Hasher, Vectorizer and Classifier
hasher = HashingVectorizer(analyzer='char',
n_features = 2 ** 18,
ngram_range=(args.ngram_min, args.ngram_max),
)
logging.info(hasher)
encoder, classes = get_classes(records, args.tax_level)
n_clusters = len(classes)
logging.info('using taxonomic level %s' % args.tax_level)
logging.info('Using %s clusters' % n_clusters)
classifier = MiniBatchKMeans(n_clusters = n_clusters)
records = records[0:args.n_iters]
chunk_generator = iter_chunk(records, args.chunk_size, args.tax_level)
logging.info('ngram range: [%s-%s]' % (args.ngram_min, args.ngram_max))
for labels, features in chunk_generator:
logging.info('transforming training chunk')
labels = encoder.transform(labels)
vectors = hasher.transform(features)
logging.info('fitting training chunk')
classifier.partial_fit(vectors)
pred_labels = classifier.predict(vectors)
score = v_measure_score(labels, pred_labels)
shuffled_score = v_measure_score(labels, sample(pred_labels, len(pred_labels)))
logging.info('score: %.2f' % (score))
logging.info('shuffled score: %.2f' % (shuffled_score))
示例3: bench_k_means
def bench_k_means(estimator, name, data, target_labels, sample_size):
"""For benchmarking K-Means estimators. Prints different clustering metrics and train accuracy
ARGS
estimator: K-Means clustering algorithm <sklearn.cluster.KMeans>
name: estimator name <str>
data: array-like or sparse matrix, shape=(n_samples, n_features)
target_labels: labels of data points <number array>
sample_size: size of the sample to use when computing the Silhouette Coefficient <int>
"""
t0 = time()
estimator.fit(data)
_, _, train_accuracy = compute_residuals_and_rsquared(estimator.labels_, target_labels)
print('% 9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(target_labels, estimator.labels_),
metrics.completeness_score(target_labels, estimator.labels_),
metrics.v_measure_score(target_labels, estimator.labels_),
metrics.adjusted_rand_score(target_labels, estimator.labels_),
metrics.adjusted_mutual_info_score(target_labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,metric='euclidean',sample_size=sample_size),
train_accuracy
)
)
示例4: main
def main(argv):
file_vectors,clust_type, clusters, distance, cluster_param, std = get_arguments(argv)
fname='.'.join(map(str,[file_vectors.split('/')[-1],clust_type, clusters, distance, cluster_param, std]))
writer=open(fname,'w') ## better to put in EX1, EX2, .. folders
print 'clustering:',clust_type
print 'clusters:',clusters
print 'cluster_param:',cluster_param
print 'std:',std
X,words,truth=load_data(file_vectors,True)
X=np.array(X)
if clust_type=='affin':
labels=affin_sclustering(X, n_clust=int(clusters), distance=distance, gamma=float(cluster_param), std=bool(std))
else:
labels=knn_sclustering(X, n_clust=int(clusters), k=int(cluster_param))
writer.write('\nVMeas:'+ str(v_measure_score(truth,labels)))
writer.write('\nRand:'+str(adjusted_rand_score(truth,labels)))
writer.write('\nHomogen:'+str(homogeneity_score(truth,labels))+'\n')
i=0
for word in words:
writer.write(word+' : '+str(labels[i])+'\n')
i+=1
writer.close()
示例5: my_clustering
def my_clustering(X, y, n_clusters, pca):
# =======================================
# Complete the code here.
# return scores like this: return [score, score, score, score]
# =======================================
from sklearn.cluster import KMeans
#print('fuck X ', X.shape)
#print('fuck y ', y.shape)
clf = KMeans(n_clusters)
clf.fit(X)
from sklearn import metrics
ari = metrics.adjusted_rand_score(y, clf.labels_)
mri = metrics.adjusted_mutual_info_score(y, clf.labels_)
v_measure = metrics.v_measure_score(y, clf.labels_)
'''
silhouette_coeff = metrics.silhouette_score(X, clf.labels_,
metric='euclidean',
sample_size=300)
'''
silhouette_coeff = metrics.silhouette_score(X, clf.labels_)
show_images(n_clusters, clf, pca)
return [ari,mri,v_measure,silhouette_coeff]
示例6: bench_k_means
def bench_k_means(estimator, data, labels):
t0 = time()
estimator.fit(data)
print("time to fit: {:.5}".format(time() - t0))
homogenity = metrics.homogeneity_score(labels, estimator.labels_)
completeness = metrics.completeness_score(labels, estimator.labels_)
v_measure = metrics.v_measure_score(labels, estimator.labels_)
print("homogenity {:.5}, completeness {:.5}, v_measure_score {:.5}".format(
homogenity, completeness, v_measure)
)
adj_rand_score = metrics.adjusted_rand_score(
labels, estimator.labels_
)
print("adjusted_rand_score {:.5}".format(adj_rand_score))
adj_mutual_info_score = metrics.adjusted_mutual_info_score(
labels, estimator.labels_
)
print("adjusted_mutual_info_score {:.5}".format(
adj_mutual_info_score)
)
silhouette_score = metrics.silhouette_score(
data, estimator.labels_, metric='euclidean'
)
print("silhouette_score {:.5}".format(
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean'))
)
return [
homogenity, completeness, v_measure, adj_rand_score,
adj_mutual_info_score, silhouette_score
]
示例7: cluster
def cluster(Z, K=4, algo='kmeans'):
descr = Z.columns
X = Imputer().fit_transform(Z)
##############################################################################
if algo == 'dbscan':
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))
elif algo == 'kmeans':
km = KMeans(n_clusters=K)
km.fit(X)
print(km.labels_)
return km
示例8: bench_k_means
def bench_k_means(estimator, name, data, sample_size, labels,postIds):
data=sparse.csr_matrix(data)
t0 = time()
print("Performing dimensionality reduction using LSA")
t0 = time()
lsa = TruncatedSVD(500)
data = lsa.fit_transform(data)
data = Normalizer(copy=False).fit_transform(data)
print("done in %fs" % (time() - t0))
print()
#sData=sparse.csr_matrix(data)
val=estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f '
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_)))
print("Parsing USer File:")
parseUserFile()
print("extracting User File:")
clusterDict=extractCluster(postIds,estimator.labels_)
print("writing Cluster Data to File")
writeCluterToFile(clusterDict)
示例9: clustering_by_kmeans
def clustering_by_kmeans(vectorizer, X, true_k):
print "Clustering in " + str(true_k) + " groups by K-means..."
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=500, n_init=1)
km.fit_predict(X)
print "Measuring..."
print("Homogeneity: %0.3f" % metrics.homogeneity_score(documents, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(documents, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(documents, km.labels_)) #V-measure is an entropy-based measure which explicitly measures how successfully the criteria of homogeneity and completeness have been satisfied.
print("Adjusted Rand-Index: %.3f" % metrics.adjusted_rand_score(documents, km.labels_))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, km.labels_, sample_size=1000))
#print top terms per cluster clusters
clusters = km.labels_.tolist() # 0 iff term is in cluster0, 1 iff term is in cluster1 ... (lista de termos)
#print "Lista de termos pertencentes aos clusters " + str(clusters)
print "Total de " + str(len(km.labels_)) + " documents"
#Example to get all documents in cluster 0
#cluster_0 = np.where(clusters==0) # don't forget import numpy as np
#print cluster_0
#cluster_0 now contains all indices of the documents in this cluster, to get the actual documents you'd do:
#X_cluster_0 = documents[cluster_0]
terms = vectorizer.get_feature_names()
#print terms
measuring_kmeans(true_k,clusters)
示例10: bestClassify
def bestClassify(X,Y):
"Best classifier function"
tfidf = True
if tfidf:
vec = TfidfVectorizer(preprocessor = identity,
tokenizer = identity, sublinear_tf = True)
else:
vec = CountVectorizer(preprocessor = identity,
tokenizer = identity)
km = KMeans(n_clusters=2, n_init=100, verbose=1)
clusterer = Pipeline( [('vec', vec),
('cls', km)] )
prediction = clusterer.fit_predict(X,Y)
checker = defaultdict(list)
for pred,truth in zip(prediction,Y):
checker[pred].append(truth)
labeldict = {}
for pred, label in checker.items():
labeldict[pred] = Counter(label).most_common(1)[0][0]
#print(pred, Counter(label).most_common(1)[0][0])
prediction = [labeldict[p] for p in prediction]
labels = list(labeldict.values())
print(labels)
print(confusion_matrix(Y, prediction, labels=labels))
print("Homogeneity:", homogeneity_score(Y,prediction))
print("Completeness:", completeness_score(Y,prediction))
print("V-measure:", v_measure_score(Y,prediction))
print("Rand-Index:", adjusted_rand_score(Y,prediction))
示例11: compute_metrics
def compute_metrics(answers, predictions):
aris = []
vscores = []
fscores = []
weights = []
for k in answers.keys():
idx = np.argsort(np.array(answers[k][0]))
true = np.array(answers[k][1])[idx]
pred = np.array(predictions[k][1])
weights.append(pred.shape[0])
if len(np.unique(true)) > 1:
aris.append(adjusted_rand_score(true, pred))
vscores.append(v_measure_score(true, pred))
fscores.append(compute_fscore(true, pred))
# print '%s: ari=%f, vscore=%f, fscore=%f' % (k, aris[-1], vscores[-1], fscores[-1])
aris = np.array(aris)
vscores = np.array(vscores)
fscores = np.array(fscores)
weights = np.array(weights)
print 'number of one-sense words: %d' % (len(vscores) - len(aris))
print 'mean ari: %f' % np.mean(aris)
print 'mean vscore: %f' % np.mean(vscores)
print 'weighted vscore: %f' % np.sum(vscores * (weights / float(np.sum(weights))))
print 'mean fscore: %f' % np.mean(fscores)
print 'weighted fscore: %f' % np.sum(fscores * (weights / float(np.sum(weights))))
return np.mean(aris),np.mean(vscores)
示例12: cluster
def cluster(model, uids):
##############################################################################
# Generate sample data
X = []
for uid in uids:
X.append(model.docvecs[uid])
labels_true = uids
##############################################################################
# Compute Affinity Propagation
af = AffinityPropagation(preference=-50).fit(X)
pickle.dump(af, open('data/af.pick', 'w'))
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
示例13: predictAffinityPropagation
def predictAffinityPropagation(X, labels_true):
#ranX, ranY = shuffle(X, y, random_state=0)
af = AffinityPropagation(preference=-50).fit(X)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
plt.close('all')
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
class_members = labels == k
cluster_center = X[cluster_centers_indices[k]]
plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
for x in X[class_members]:
plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
示例14: test_KMeans_scores
def test_KMeans_scores(self):
digits = datasets.load_digits()
df = pdml.ModelFrame(digits)
scaled = pp.scale(digits.data)
df.data = df.data.pp.scale()
self.assert_numpy_array_almost_equal(df.data.values, scaled)
clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf1.fit(scaled)
df.fit_predict(clf2)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.completeness_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.completeness_score(), expected)
expected = m.v_measure_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.v_measure_score(), expected)
expected = m.adjusted_rand_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.adjusted_rand_score(), expected)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
sample_size=300, random_state=self.random_state)
result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
random_state=self.random_state)
self.assertAlmostEqual(result, expected)
示例15: cluster
def cluster(algorithm, data, topics, make_silhouette=False):
print str(algorithm)
clusters = algorithm.fit_predict(data)
labels = algorithm.labels_
print 'Homogeneity: %0.3f' % metrics.homogeneity_score(topics, labels)
print 'Completeness: %0.3f' % metrics.completeness_score(topics, labels)
print 'V-measure: %0.3f' % metrics.v_measure_score(topics, labels)
print 'Adjusted Rand index: %0.3f' % metrics.adjusted_rand_score(topics, labels)
print 'Silhouette test: %0.3f' % metrics.silhouette_score(data, labels)
print ' ***************** '
silhouettes = metrics.silhouette_samples(data, labels)
num_clusters = len(set(clusters))
print 'num clusters: %d' % num_clusters
print 'num fitted: %d' % len(clusters)
# Make a silhouette plot if the flag is set
if make_silhouette:
order = numpy.lexsort((-silhouettes, clusters))
indices = [numpy.flatnonzero(clusters[order] == num_clusters) for k in range(num_clusters)]
ytick = [(numpy.max(ind)+numpy.min(ind))/2 for ind in indices]
ytickLabels = ["%d" % x for x in range(num_clusters)]
cmap = cm.jet( numpy.linspace(0,1,num_clusters) ).tolist()
clr = [cmap[i] for i in clusters[order]]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.barh(range(data.shape[0]), silhouettes[order], height=1.0,
edgecolor='none', color=clr)
ax.set_ylim(ax.get_ylim()[::-1])
plt.yticks(ytick, ytickLabels)
plt.xlabel('Silhouette Value')
plt.ylabel('Cluster')
plt.savefig('cluster.png')