当前位置: 首页>>代码示例>>Python>>正文


Python AgglomerativeClustering.fit_predict方法代码示例

本文整理汇总了Python中sklearn.cluster.AgglomerativeClustering.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python AgglomerativeClustering.fit_predict方法的具体用法?Python AgglomerativeClustering.fit_predict怎么用?Python AgglomerativeClustering.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.AgglomerativeClustering的用法示例。


在下文中一共展示了AgglomerativeClustering.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: hierarchical

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def hierarchical(similarity, concepts=2, euclid=False):
    if euclid:
        model = AgglomerativeClustering(n_clusters=concepts)
        return model.fit_predict(similarity)
    else:
        model = AgglomerativeClustering(n_clusters=concepts, affinity='precomputed', linkage='complete')
        return model.fit_predict(1 - similarity)
开发者ID:thran,项目名称:experiments2.0,代码行数:9,代码来源:clusterings.py

示例2: HierarchicalTopics

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
class HierarchicalTopics(object):

    def __init__(self, corpus):
        """
        corpus is a corpus object, e.g. an HTMLCorpusReader()
        or an HTMLPickledCorpusReader() object
        """
        self.model = None
        self.vocab = list(
            set(normalize(corpus.words(categories=['news'])))
        )

    def vectorize(self, document):
        """
        Vectorizes a document consisting of a list of part of speech
        tagged tokens using the segmentation and tokenization methods.

        One-hot encode the set of documents
        """
        features = set(normalize(document))
        return np.array([
            token in features for token in self.vocab], np.short)

    def cluster(self, corpus):
        """
        Fits the AgglomerativeClustering model to the given data.
        """
        self.model = AgglomerativeClustering()

        self.model.fit_predict([
            self.vectorize(
                corpus.words(fileid)) for fileid in
            corpus.fileids(categories=['news']
                           )
        ])

        self.labels = self.model.labels_
        self.children = self.model.children_

    def plot_dendrogram(self, **kwargs):
        # Distances between each pair of children
        distance = np.arange(self.children.shape[0])
        position = np.arange(self.children.shape[0])

        # Create linkage matrix and then plot the dendrogram
        linkage_matrix = np.column_stack([
            self.children, distance, position]
        ).astype(float)

        # Plot the corresponding dendrogram
        fig, ax = plt.subplots(figsize=(15, 7))  # set size
        ax = dendrogram(linkage_matrix, **kwargs)
        plt.tick_params(axis='x', bottom='off', top='off', labelbottom='off')
        plt.tight_layout()
        plt.show()
开发者ID:yokeyong,项目名称:atap,代码行数:57,代码来源:agglomerative.py

示例3: buckshot

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def buckshot(k, mat):
    size = int((k*mat.shape[0])**.5)
    print size
    samp = np.zeros((size, mat.shape[1]))
    inds = np.random.randint(0, mat.shape[0], size)
    print inds
    
    for i in xrange(size):
        samp[i] = mat[inds[i]]
        
    #agglomerative clusting on sample
    hier = AgglomerativeClustering(n_clusters=k, linkage='average', affinity='euclidean', compute_full_tree=True)
    flat = hier.fit_predict(samp)
    
    centroids = []
    #find centroids
    for j in xrange(k):
        i_s = [i for i, l in enumerate(flat) if l == j]
        print len(i_s)
        points = [samp[m] for m in i_s]
        points = np.array(points)
        cent = np.mean(points, axis=0)
        centroids.append(cent)
    
    return centroids
开发者ID:meganbarnes,项目名称:HRC,代码行数:27,代码来源:process.py

示例4: sp_connectivity

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
 def sp_connectivity(self,X,connectivity, n_clusters, n):
         
        # plt.figure(figsize=(10, 4))
         
  #       plt.subplot(1, 3, index + 1)
         model = AgglomerativeClustering(linkage="ward",
                                            connectivity=connectivity,
                                             n_clusters=n_clusters)
         #t0 = time.time()
         y = np.zeros(shape=(n))
         y = model.fit_predict(X, None)
         #elapsed_time = time.time() - t0
         return y
         
         
         
         #plt.scatter(X[:, 0], X[:, 1], c=model.labels_,
          #           cmap=plt.cm.spectral)
         #plt.title('linkage=%s (time %.2fs)' % (linkage, elapsed_time),
           #           fontdict=dict(verticalalignment='top'))
         #plt.axis('equal')
         #plt.axis('off')
         #plt.subplots_adjust(bottom=0, top=.89, wspace=0,
           #                      left=0, right=1)
         #    plt.suptitle('n_cluster=%i, connectivity=%r' %
          #                (n_clusters, connectivity is not None), size=17)
 
 
         #plt.show()
开发者ID:ShriyaGupta,项目名称:CompBio,代码行数:31,代码来源:HeirarchichalClustering.py

示例5: clustering_approach

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
    def clustering_approach(self):
        '''
        Cluster user data using various clustering algos
        IN: self.df_full and self.labels
        OUT: results to stdout
        '''
        print 'Fitting clustering model'
        X = self.df_full.values
        y = self.labels

        # scale data
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        # KMeans
        km_clf = KMeans(n_clusters=2, n_jobs=6)
        km_clf.fit(X)

        # swap labels as super-users are in cluster 0 (messy!!)
        temp = y.apply(lambda x: 0 if x == 1 else 1)
        print '\nKMeans clustering: '
        self.analyse_preds(temp, km_clf.labels_)

        # Agglomerative clustering
        print '\nAgglomerative clustering approach: '
        ac_clf = AgglomerativeClustering()
        ac_labels = ac_clf.fit_predict(X)
        self.analyse_preds(y, ac_labels)

        return None
开发者ID:wvanamstel,项目名称:project,代码行数:32,代码来源:gitproject.py

示例6: calculateNumberOfIdealClusters

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def calculateNumberOfIdealClusters(maxAmount, corpus):
	print "Initializing silhouette analysis"
	range_n_clusters = range(2, maxAmount) # max amount of clusters equal to amount of jobs

	silhouette_high = 0;
	silhouette_high_n_clusters = 2;

	for n_clusters in range_n_clusters:
		# Initialize the clusterer with n_clusters value
		cluster = AgglomerativeClustering(n_clusters=n_clusters, linkage="ward", affinity="euclidean")
		cluster_labels = cluster.fit_predict(corpus)

		# The silhouette_score gives the average value for all the samples.
		# This gives a perspective into the density and separation of the formed clusters
		silhouette_avg = silhouette_score(corpus, cluster_labels)

		print "For n_clusters = %d, the average silhouette_score is: %.5f" % (n_clusters, silhouette_avg)

		if (silhouette_avg > silhouette_high):
		    silhouette_high = silhouette_avg
		    silhouette_high_n_clusters = n_clusters

		# Compute the silhouette scores for each sample
		sample_silhouette_values = silhouette_samples(corpus, cluster_labels)

	print ("Highest score = %f for n_clusters = %d" % (silhouette_high, silhouette_high_n_clusters))
	return silhouette_high_n_clusters
开发者ID:edwardmp,项目名称:clustering-job-offers-and-assessing-job-similarity,代码行数:29,代码来源:clustering.py

示例7: agglom

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def agglom(data, n_clusters):
    knn_graph = kneighbors_graph(data, 30, include_self=False)
    
    cluster = AgglomerativeClustering(n_clusters=n_clusters, connectivity=knn_graph, linkage='ward') # use ward / average / complete for different results
    model = cluster.fit(data)
    
    return cluster.fit_predict(data)
开发者ID:JWeel,项目名称:Sjoemel,代码行数:9,代码来源:anomalyDetection.py

示例8: openfaceExp

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def openfaceExp(lfwAligned, net, cls):
    df = pd.DataFrame(columns=('nPpl', 'nImgs',
                               'trainTimeSecMean', 'trainTimeSecStd',
                               'predictTimeSecMean', 'predictTimeSecStd',
                               'accsMean', 'accsStd'))

    repCache = {}

    df_i = 0
    for nPpl in nPplVals:
        print(" + nPpl: {}".format(nPpl))
	cls = AgglomerativeClustering(n_clusters=nPpl)
        (X, y) = getData(lfwAligned, nPpl, nImgs, size=96, mode='rgb')
        nSampled = X.shape[0]
        ss = ShuffleSplit(nSampled, n_iter=10, test_size=0.1, random_state=0)

        allTrainTimeSec = []
        allPredictTimeSec = []
        accs = []

        for train, test in ss:
            X_train = []
            for img in X[train]:
                h = hash(str(img.data))
                if h in repCache:
                    rep = repCache[h]
                else:
                    rep = net.forward(img)
                    repCache[h] = rep
                X_train.append(rep)

            start = time.time()
            X_train = np.array(X_train)
            cls.fit(X_train, y[train])
            trainTimeSec = time.time() - start
            allTrainTimeSec.append(trainTimeSec)

            start = time.time()
            X_test = []
            for img in X[test]:
                X_test.append(net.forward(img))
            y_predict = cls.fit_predict(X_test)
            predictTimeSec = time.time() - start
            allPredictTimeSec.append(predictTimeSec / len(test))
            y_predict = np.array(y_predict)
            print y[test], y_predict
            acc = accuracy_score(y[test], y_predict)
            print acc
            accs.append(acc)

        df.loc[df_i] = [nPpl, nImgs,
                        np.mean(allTrainTimeSec), np.std(allTrainTimeSec),
                        np.mean(allPredictTimeSec), np.std(allPredictTimeSec),
                        np.mean(accs), np.std(accs)]
        df_i += 1

    return df
开发者ID:billtiger,项目名称:CATANA,代码行数:59,代码来源:lfw-classification-cluster.py

示例9: get_topics

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def get_topics(X_lsi, text_names, nk=1):
    ag = AgglomerativeClustering(n_clusters=nk, affinity='cosine', linkage='average')
    topics = ag.fit_predict(X_lsi)
    paper_to_topic = defaultdict(int)
    topic_to_papers = defaultdict(list)
    for paper,topic in zip(text_names,topics):
        paper_to_topic[paper] = topic
        topic_to_papers[topic].append(paper)
    return (paper_to_topic, topic_to_papers)
开发者ID:lpalova,项目名称:scientific_publications_search_engine,代码行数:11,代码来源:gephi100.py

示例10: hierarchicalCluster

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def hierarchicalCluster(corr_matrix_df, n_clusters):
	"""calculate clustering from the correlation matrix using the hierarchical Ward method"""
	#set method
	ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',affinity='euclidean')

	result=ward.fit_predict(corr_matrix_df)
	cluster_df=pd.DataFrame(result, index=corr_matrix_df.index, columns= ['Cluster'])

	return cluster_df
开发者ID:gindeleo,项目名称:climate,代码行数:11,代码来源:climate.py

示例11: agglomerative_clustering

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def agglomerative_clustering(crime_rows, column_names, num_clusters):
    crime_xy = [crime[0:2] for crime in crime_rows]
    crime_info = [crime[2:] for crime in crime_rows]
    print("Running Agglomerative Clustering")
    agglo_clustering = AgglomerativeClustering(n_clusters=num_clusters, 
            connectivity=neighbors.kneighbors_graph(crime_xy, n_neighbors=2))
    agglomerative_clustering_labels = agglo_clustering.fit_predict(crime_xy)
    print("formatting....")
    return _format_clustering(agglomerative_clustering_labels, 
            crime_xy, crime_info, column_names)
开发者ID:egaebel,项目名称:crime-on-the-move-back-end--Python,代码行数:12,代码来源:clustering.py

示例12: test_agglomerative_clustering_with_distance_threshold_edge_case

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def test_agglomerative_clustering_with_distance_threshold_edge_case(
        linkage, threshold, y_true):
    # test boundary case of distance_threshold matching the distance
    X = [[0], [1]]
    clusterer = AgglomerativeClustering(
        n_clusters=None,
        distance_threshold=threshold,
        linkage=linkage)
    y_pred = clusterer.fit_predict(X)
    assert adjusted_rand_score(y_true, y_pred) == 1
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:12,代码来源:test_hierarchical.py

示例13: agglomorative_clustering

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
    def agglomorative_clustering(df_in):
        # Set model input args
        n_clusters = 8
        linkage = 'ward'

        model = AgglomerativeClustering(linkage=linkage,
                                        n_clusters=n_clusters)

        # attach cluster-label to dataframe
        df_in['cluster'] = model.fit_predict(df_in)
开发者ID:indrek-sepp,项目名称:nw-contribution-chart,代码行数:12,代码来源:clustering.py

示例14: clusterize

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def clusterize(matrices):
    #dbscan = DBSCAN(metric="precomputed", eps=25, min_samples=50)
    cluster = AgglomerativeClustering(n_clusters=2, affinity="precomputed", linkage="complete")
    distances = distance_matrix(matrices)
    print("mean of distances is {} and std of norms is {}".format(numpy.mean(distances), numpy.std([numpy.linalg.norm(m, numpy.inf) for m in matrices])))
    #pyplot.plot([numpy.linalg.norm(m, numpy.inf) for m in matrices], 'ro')
    #pyplot.show()
    #pyplot.hist(distances.flatten(), bins=20)
    #pyplot.show()
    return cluster.fit_predict(distances)
开发者ID:messwith,项目名称:phazes,代码行数:12,代码来源:prototype.py

示例15: find_steady_coalition

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit_predict [as 别名]
def find_steady_coalition():

    working_direcotry = r"C:\Users\ORI\Documents\IDC-non-sync\ML_Course\Election\Data\\"
    file_name = os.path.join(working_direcotry, r'ElectionsData.csv')
    train, validation, test, feature_categorical_dictionary, train_idx, test_idx, number_to_party_dictionary = prepare_the_data(file_name,

                                                                                                        working_direcotry)

    good_colation_found = False
    for n_clusters in [5,4,3]:
        print ("---------------")
        linkage = 'ward'
        X = train.data
        clusters = AgglomerativeClustering(linkage=linkage, n_clusters=n_clusters)
        clusters.fit_predict(X)
        bin_count_of_kmeans_clusters = np.bincount(clusters.labels_)
        normalized_bin_count_of_kmeans_clusters = bin_count_of_kmeans_clusters/np.sum(bin_count_of_kmeans_clusters).astype('float32')
        #is there any cluster with more than 50% of the votes?
        coalition_exists = np.any(normalized_bin_count_of_kmeans_clusters > 0.5)
        print "number_of_clustes {0}".format(n_clusters)
        print "coalition_exists: {0} ".format(coalition_exists)

        # find all the parties belong to the cluster
        biggest_cluster = np.argmax(normalized_bin_count_of_kmeans_clusters)
        biggest_cluster_voters = np.bincount(train.labels[clusters.labels_ == biggest_cluster].astype('int64'))

        #normalize the votes by the size of their parties:
        votes_out_of_party =  biggest_cluster_voters/np.bincount( train.labels.astype('int32')).astype('float32')
        #commited_to_coalition_parties = partyw with majority of the  votes in the cluster
        commited_to_coalition_parties = votes_out_of_party > 0.5

        percentage_of_voters_in_commited_coalition = np.sum(biggest_cluster_voters[votes_out_of_party > 0.5])*1.0/len(train.labels)*1.0

        print percentage_of_voters_in_commited_coalition
        if percentage_of_voters_in_commited_coalition> 0.5:
            print "coalition found"
            parties_in_coalition = number_to_party_dictionary.keys()
            print "parties in coalition:{0}".format([number_to_party_dictionary[k] for k in  np.array(number_to_party_dictionary.keys())[votes_out_of_party > 0.5]])

            break
        print ("---------------")
开发者ID:Ori226,项目名称:MLEx,代码行数:43,代码来源:ex4_final.py


注:本文中的sklearn.cluster.AgglomerativeClustering.fit_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。