当前位置: 首页>>代码示例>>Python>>正文


Python MiniBatchKMeans.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit方法的具体用法?Python MiniBatchKMeans.fit怎么用?Python MiniBatchKMeans.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MiniBatchKMeans的用法示例。


在下文中一共展示了MiniBatchKMeans.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_data_pre

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def train_data_pre(train_x, train_y):
	topic_data = dict()
	print type(train_x)
	print train_x[0]
	print
	train_x = np.array(train_x.todense())
	for index in xrange(len(train_y)):
		y = train_y[index]
		if y in topic_data:
			topic_data[y].append(train_x[index])
		else:
			print train_x[index]
			print
			topic_data[y] = [train_x[index]]

	min_size = 100000
	for key in topic_data:
		length = len(topic_data[key])
		if length < min_size:
			min_size = length

	for key in topic_data:
		length = len(topic_data[key])
		if length > min_size:
			k_means = MiniBatchKMeans(n_clusters=min_size, init_size=int(3 * min_size))
			# print topic_data[key]
			print topic_data[key]
			k_means.fit(topic_data[key])
			topic_data[key] = k_means.cluster_centers_
开发者ID:liguoyu1,项目名称:python,代码行数:31,代码来源:Feature_tfidf_TopicClassifer.py

示例2: generateCodebook

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
    def generateCodebook(self, features):
        """ Generate codebook using extracted features """
    
        
        codebook = None
        
        if self._codebookGenerateMethod == 'k-means':
#             # Codebook generation using scipy k-means
#             while run:
#                 try:
#                     # Set missing = 'raise' to raise exception 
#                     # when one of the clusters is empty
#                     whitenedFeatures = whiten(features)
#                     codebook, _ = kmeans2(whitenedFeatures, 
#                                           self._codebookSize, 
#                                           missing = 'raise')
#                     
#                     # No empty clusters
#                     run = False
#                 except ClusterError:
#                     # If one of the clusters is empty, re-run k-means
#                     run = True
            
            # Codebook generation using sklearn k-means
            whitenedFeatures = whiten(features)
            kmeans = MiniBatchKMeans(n_clusters = config.codebookSize)
            kmeans.fit(whitenedFeatures)
            codebook = kmeans.cluster_centers_
        else:
            pass
        
        self._codebook = codebook
开发者ID:1987hasit,项目名称:BoVW_Image,代码行数:34,代码来源:bag_of_words.py

示例3: extract_spatial_pyramid

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def extract_spatial_pyramid(images, dataset, vq=None, n_words=1000):
    descriptors, locations = sift_descriptors(images, dataset)
    if vq is None:
        vq = MiniBatchKMeans(n_clusters=n_words, verbose=1, init='random',
                             batch_size=2 * n_words, compute_labels=False,
                             reassignment_ratio=0.0, random_state=1, n_init=3)
        #vq = KMeans(n_clusters=n_words, verbose=10, init='random')
        vq.fit(shuffle(np.vstack(descriptors)))
    else:
        n_words = vq.n_clusters

    pyramids = []
    for descr, locs in zip(descriptors, locations):
        words = vq.predict(descr)
        global_ = np.bincount(words, minlength=n_words).astype(np.float)
        global_ /= max(global_.sum(), 1)
        third_of_image = locs[1].max() // 3 + 1
        stripe_indicator = locs[1] // third_of_image
        inds = np.vstack([stripe_indicator, words])
        stripe_hists = sparse.coo_matrix((np.ones(len(words)), inds),
                                         shape=(3, n_words)).toarray()

        stripe_hists = [x / max(x.sum(), 1) for x in stripe_hists]
        pyramids.append(np.hstack([np.hstack(stripe_hists), global_]))

    return vq, np.vstack(pyramids)
开发者ID:amueller,项目名称:segmentation,代码行数:28,代码来源:bow.py

示例4: train

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
    def train(self, featurefiles, k=100, subsampling=10):
        nbr_images = len(featurefiles)
        descr = []
        descr.append(sift.read_features_from_file(featurefiles[0])[1])
        descriptors = descr[0]
        print "begin loading image feature files..."
        for i in np.arange(1, nbr_images):
            descr.append(sift.read_features_from_file(featurefiles[i])[1])
#                descriptors = np.vstack((descriptors, descr[i]))
            descriptors = np.vstack((descriptors, descr[i][::subsampling,:]))
            if i%100 == 0:
                print i, "images have been loaded..."
        print "finish loading image feature files!"

#        self.voc, distortion = cluster.kmeans(descriptors[::subsampling,:], k, 1)
        print "begin MiniBatchKMeans cluster....patient"
        mbk = MiniBatchKMeans(k, init="k-means++", compute_labels=False, n_init=3, init_size=3*k)
#        mbk.fit(descriptors[::subsampling,:])
        mbk.fit(descriptors)
        self.voc = mbk.cluster_centers_
        print "cluster finish!"
        self.nbr_word = self.voc.shape[0]
        imwords = np.zeros((nbr_images, self.nbr_word))
        for i in xrange(nbr_images):
            imwords[i] = self.project(descr[i])

        nbr_occurences = np.sum((imwords > 0)*1, axis=0)
        self.idf = np.log( (1.0*nbr_images) / (1.0*nbr_occurences+1) )
        self.traindata = featurefiles
开发者ID:yangxian10,项目名称:CBIR_py,代码行数:31,代码来源:visual_word.py

示例5: cluster_function

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def cluster_function(user_np):
    ##############################################################################
    # Compute clustering with Means
    if len(user_np) < 10 :
        n_cl = 2
    elif len(user_np) <= 100 :
        n_cl = 10
    elif len(user_np) <= 500 :
        n_cl = 15
    elif len(user_np) <= 1000 :
        n_cl = 20
    else :
        n_cl = 30

    k_means = MiniBatchKMeans(n_clusters=n_cl, init='k-means++', max_iter=100, batch_size=100, verbose=0, compute_labels=True, 
                              random_state=None, tol=0.0, max_no_improvement=10, init_size=None, n_init=3, reassignment_ratio=0.01)

    t0 = time.time()
    k_means.fit(user_np)
    
    t_batch = time.time() - t0
    print "Batch running time : ", t_batch
    
    k_means_labels = k_means.labels_
    
    #prediction = k_means.predict(user_np)
    return k_means_labels
开发者ID:jimdsouza89,项目名称:Entertainment-and-media-analytics,代码行数:29,代码来源:user_profiling.py

示例6: cluster_tfidf

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def cluster_tfidf(tfidf):
    kmeans = MiniBatchKMeans(n_clusters=10, init='k-means++', n_init=1,
                         init_size=1000, batch_size=1000)

    kmeans.fit(tfidf)

    return kmeans.cluster_centers_
开发者ID:sash-ko,项目名称:ml_playground,代码行数:9,代码来源:text_mining.py

示例7: aggregate

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
   def aggregate(self, track_dataset):
       """
       An example implementation of the k-means algorithm implemented in 
       DSI Studio.  This function is automatically applied to all 
       TrackDatasets returned from a query.
 
       Parameters:
       -----------
       track_dataset:dsi2.streamlines.track_dataset.TrackDataset
       """
       # extract the streamline data
       tracks = track_dataset.tracks
       
       # Make a matrix of downsampled streamlines
       points = np.array([ downsample(trk, 3).flatten() \
                                   for trk in tracks])
 
       # Calculate the length of each streamline
       lengths = np.array([len(trk) for trk in tracks]).reshape(-1,1)
       
       # Concatenate the points and the track lengths
       features = np.hstack((points, lengths))
       
       # Initialize the k-means algorithm
       kmeans = MiniBatchKMeans(n_clusters=self.k, compute_labels=True)
       kmeans.fit(features)
 
       # Return the labels
       return kmeans.labels_      
开发者ID:mattcieslak,项目名称:DSI2,代码行数:31,代码来源:aggregator_subclass.py

示例8: main

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def main():

    p = Pool(8)

    #dog_images, cat_images, test_images = import_files('small_dog_img',
    #'small_cat_img', 'small_test_img')
    dog_images, cat_images, test_images = import_files('dog_img', 'cat_img',
    'test_img')
    n_dog = len(dog_images)
    n_cat = len(cat_images)
    n_train = n_dog + n_cat
    n_test = len(test_images)
    all_images = np.concatenate((dog_images, cat_images, test_images), axis = 0)
    n_all = all_images.shape[0]
    sift_start = time.time()
    sift_features = p.map(map_sift_desc, all_images)
    sift_end = time.time()
    print (sift_end - sift_start)*1000
    train_sift_features = reduce_sift_desc(sift_features[: n_train])
    test_sift_features = reduce_sift_desc(sift_features[n_train :])
    kmeans_start = time.time()
    kmeans = MiniBatchKMeans(n_clusters = 1000, batch_size = 1000, max_iter = 250)
    kmeans.fit(train_sift_features)
    train_predicted_labels = kmeans.predict(train_sift_features)
    test_predicted_labels = kmeans.predict(test_sift_features)
    kmeans_end = time.time()
    print (kmeans_end - kmeans_start)*1000
    '''
开发者ID:redswallow,项目名称:image-understanding,代码行数:30,代码来源:dog_cat.py

示例9: run_n_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def run_n_kmeans(data, num_cluster, n_runs, min_count):
    """
    Run's k means `n_runs` times. Each song is assigned to the
    cluster which happens at least `min_count` of the `n_runs`. 
    If `min_count` is not met, we consider the song as not belonging
    to any cluster.
    """

    # TODO: refactor and move this method to vod common libs
    n_rows = data.shape[0]
    results = np.zeros((n_rows, n_runs))
    kmeans = MiniBatchKMeans(num_cluster)

    for i in xrange(n_runs):
        kmeans.fit(data)
        labels_column = kmeans.labels_
        results[:, i] = labels_column

    return_val = np.zeros(n_rows)
    for i in xrange(n_rows):
        counter = Counter(results[i])
        most_common = counter.most_common(1)[0]
        clust = most_common[0]
        freq = most_common[1]

        if freq > min_count:
            return_val[i] = clust
        else:
            return_val[i] = -1

    return return_val
开发者ID:flaviovdf,项目名称:data-mining,代码行数:33,代码来源:k_means.py

示例10: exploitingKMeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def exploitingKMeans(data,ctrldata, length,ctrlStatus,k, who, genes,sirna, iteration=10, colors='ctrlStatus',\
                     Td=True, show=False, plotcov=False):          
    if iteration==False:
        model = MiniBatchKMeans(n_clusters=k, batch_size = 2000, init='k-means++',n_init=1000,max_iter=1000, max_no_improvement=100, compute_labels = True)
        zou=model.fit(data)
        labels=zou.labels_
        if ctrldata is not None:
            ctrl_labels=model.predict(ctrldata)
            labels=np.hstack((labels,ctrl_labels))
        
        if show==True:
            percentages = filmCharacterization(labels, length, ctrlStatus, genes,colors, Td, plotcov)
            return labels, percentages
        else:
            return labels
    else:
        first=True; countsTotal=[]; oldPerc=None
        for it in range(iteration):
            print "iteration ", it
            model = MiniBatchKMeans(n_clusters=k, batch_size = 1000, init='random',n_init=1000,max_iter=1000, max_no_improvement=100, compute_labels = True)
            zou=model.fit(data)
            
            counts, percentages = np.array(filmCount(zou.labels_, length))
            if not first:
                index=np.argmax(percentages, axis=0)
                corr={np.argmax(oldPerc[ind]):np.where(ind==index)[0][0] for ind in index}
                print it, corr
                for el in corr:
                    countsTotal[:,el]+=counts[:,corr[el]]
            else:
                countsTotal=counts
                first=False
                oldPerc=percentages
        return countsTotal
开发者ID:PeterJackNaylor,项目名称:Xb_screen,代码行数:36,代码来源:exploiting_clustering.py

示例11: fit

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
    def fit(self, X, Y):
        self.class_labels = np.array(sorted(set(Y)))
        D = KMeans(n_clusters=self.n_atoms, init_size=self.n_atoms*3)
        D.fit(np.vstack(X)[:200000])
        D = D.cluster_centers_
        D = normalize(D)
        self.D_mean = np.mean(D, axis=0)
        self.D = D - self.D_mean
        self.D_idxs = []
        self.clfs = []

        for i in xrange(self.n_nodes):
            idxs = np.random.permutation(self.n_atoms)[:self.n_features]
            idxs = np.random.permutation(len(X[0][0]))[:self.n_features]
            enc_X = []
            for x in X:
                x = normalize(x)
                x = x - self.D_mean
                enc_X.append(bow(np.argmax(np.abs(np.dot(self.D[:, idxs], x.T[idxs, :])), axis=0), self.n_atoms))


            clf = LinearSVC()
            clf.fit(enc_X, Y)
            self.clfs.append(clf)
            self.D_idxs.append(idxs)
开发者ID:psclib,项目名称:pscgen,代码行数:27,代码来源:weak_learn.py

示例12: cluster_mbk

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
	def cluster_mbk(self):
		mbk = MiniBatchKMeans(init='k-means++', n_clusters=40, batch_size=100,
					  n_init=100, max_no_improvement=10, verbose=0,
					  random_state=0)
		mbk.fit(self.all_frames_xy)
		clusters = mbk.predict(self.all_frames_xy)
		return clusters
开发者ID:wpotrzebowski,项目名称:StormClustering,代码行数:9,代码来源:storm_clustering.py

示例13: initalized_mini_batch_em

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def initalized_mini_batch_em(X, nClusters, varianceScale, miniBatchSize=1000, nIter=20, nInit=10) :

    nFeatures = X.shape[1]

    print "nfeatures",nFeatures,"nClusters",nClusters


    assert X.shape[0]>X.shape[1]
    mbkm=MiniBatchKMeans(n_clusters=nClusters, batch_size=miniBatchSize, n_init=nInit)

    print "mini batch k means initialization"
    mbkm.fit(X)
    centers =  mbkm.cluster_centers_.swapaxes(0,1).astype(numpy.float64)

    print "swap axes"
    X = X.swapaxes(0,1).astype(numpy.float64)

    print "mini batch em"
    cAlg = MiniBatchEm(nFeatures=nFeatures,nClusters=nClusters,miniBatchSize=miniBatchSize,nIter=nIter,varianceScale=varianceScale)

    print "initialize"
    cAlg.initalizeCenters(centers)
    cAlg.run(X) 

    print "predict"
    probs = cAlg.predict(X)
    probs = probs.swapaxes(0,1)

    return probs
开发者ID:timoMa,项目名称:skneuro,代码行数:31,代码来源:__init__.py

示例14: train_codebook

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def train_codebook(params, X):

    # Init kmeans instance
    km = MiniBatchKMeans(params["descriptor_size"])

    # Training the model with our descriptors
    km.fit(X)

    # Save to disk
    pickle.dump(
        km,
        open(
            os.path.join(
                params["root"],
                params["root_save"],
                params["codebooks_dir"],
                "codebook_"
                + str(params["descriptor_size"])
                + "_"
                + params["descriptor_type"]
                + "_"
                + params["keypoint_type"]
                + ".cb",
            ),
            "wb",
        ),
    )

    return km
开发者ID:gdsa-upc,项目名称:gdsa-suport,代码行数:31,代码来源:get_features.py

示例15: main

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def main():
    if len(sys.argv) != 4:
        print(__doc__)
        return 1

    infiles = glob(sys.argv[1])
    outfile = sys.argv[2]
    K = int(sys.argv[3])

    print("Reading in", len(infiles), "files")
    fullarr = np.loadtxt(fileinput.input(infiles), delimiter = '\t')[:,:-7]

    summary_stats = None
    stats_file = '/n/fs/gcf/dchouren-repo/COS513-Finance/summary_stats/stats2'
    with open(stats_file, 'rb') as inf:
        summary_stats = np.loadtxt(inf)
    stds = summary_stats[:len(summary_stats)/2]
    means = summary_stats[len(summary_stats)/2:]

    fullarr = (fullarr - means) / stds

    print("Learning MiniBatchKMeans with K =", K)

    km = MiniBatchKMeans(n_clusters = K, verbose = True) # TODO max_iter
    km.fit(fullarr)

    print("KMeans trained, saving")

    with open(outfile, 'wb') as out_model:
        pickle.dump(km, out_model)

    print("Score:", km.score(fullarr))
    
    return 0
开发者ID:Alex223124,项目名称:COS513-Finance,代码行数:36,代码来源:clustering.py


注:本文中的sklearn.cluster.MiniBatchKMeans.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。