当前位置: 首页>>代码示例>>Python>>正文


Python MiniBatchKMeans.partial_fit方法代码示例

本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.partial_fit方法的具体用法?Python MiniBatchKMeans.partial_fit怎么用?Python MiniBatchKMeans.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MiniBatchKMeans的用法示例。


在下文中一共展示了MiniBatchKMeans.partial_fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DocDescriptor

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class DocDescriptor(object):

    def __init__(self, word_descriptor, n_clusters = 1000):
        self._n_clusters = n_clusters
        self._cluster = MiniBatchKMeans(n_clusters=n_clusters,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
        self._word_descriptor = word_descriptor

    def get_word_descriptor(self, img):
        X = get_features_from_image(img)
        words = []
        for i in X:
            words.append(self._word_descriptor.transform(i))
        return words

    def partial_fit(self, img):
        X = self.get_word_descriptor(img)
        self._cluster.partial_fit(X)

    def transform(self, img):
        X = self.get_word_descriptor(img)
        Y = self._cluster.predict(X)
        desc = [0]*self._n_clusters
        unit = 1.0/self._n_clusters
        for i in range(0, len(Y)):
            desc[Y[i]] += unit
        return desc
开发者ID:caoym,项目名称:odr,代码行数:28,代码来源:odr.py

示例2: __init__

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class MiniCluster:
    def __init__(self, nclusters=1000, psize=16):
        self.psize = 16
        self.patch_size = (self.psize, self.psize)
        self.nclusters = nclusters
        self.rng = np.random.RandomState(0)
        self.kmeans = MiniBatchKMeans(n_clusters=nclusters, random_state=self.rng, verbose=True)
        
    def fit(self, images):
        buffer = []
        index = 1
        t0 = time.time()

        # The online learning part: cycle over the whole dataset 4 times
        index = 0
        passes = 10
        for _ in range(passes):
            for img in images:
                data = extract_patches_2d(img, self.patch_size, max_patches=15,
                                          random_state=self.rng)
                data = np.reshape(data, (len(data), -1))
                #This casting is only needed for RGB data
                #buffer.append(data.astype(float))
                buffer.append(data)
                index += 1
                #if index % 1000 == 0:
                if index % (self.nclusters * 2) == 0:
                    data = np.concatenate(buffer, axis=0)
                    data = gcn(data)
                    data = whiten(data)
                    self.kmeans.partial_fit(data)
                    buffer = []
                          
        dt = time.time() - t0
        print('done in %.2fs.' % dt)
开发者ID:seanv507,项目名称:sklearn,代码行数:37,代码来源:mini_batch_clusters.py

示例3: _run_cluster

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def _run_cluster(origin_list, cluster_num = 8, batch_size=100,resize=(64,64)):
    clf = MiniBatchKMeans(n_clusters=cluster_num,batch_size=batch_size)
    def next_batch(allfiles,batch_size):
        imgs = []
        inds = []
        for ind,(path,label) in enumerate(allfiles):
            img = Image.open(path).convert("L")
            img = img.resize(size=resize,Image.ANTIALIAS)
            img = np.reshape(np.array(img),(1,-1)).astype(np.float32) / 255.0
            imgs.append(img)
            inds.append(ind)
            if len(imgs) >= batch_size:
                yield  np.vstack(imgs), inds
                imgs = []
                inds = []
        if len(inds) > 0:
            return np.vstack(imgs), inds
    for _,batch in next_batch(origin_list,batch_size):
        clf.partial_fit(batch)

    cluster_dict = defaultdict(list)
    for inds, batch in next_batch(origin_list, batch_size):
        Ys = clf.predict(batch)
        for y, ind in zip(Ys, inds):
            path,label = origin_list[ind]
            cluster_dict.setdefault(y,[]).append((path,label))
    return cluster_dict
开发者ID:z01nl1o02,项目名称:tests,代码行数:29,代码来源:clusterOP.py

示例4: partialFitMiniBatchKmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def partialFitMiniBatchKmeans(training_file,categorical_features,label,maxLines,ks):
    stop=False
    cont=0
    km=[]
    with open(training_file,'r') as f:
                while stop==False:
                    print('Training section - reading data from file ...')
                    if cont==0:
                        header=f.readline().rstrip().split(',')
                        idx=dSu.findMultiple(header, categorical_features)
                        idx+=dSu.listStrFind(header,label)
                    cont+=1
                    
                    data=[]
                    stop=True
                    print(cont)
                    for line in f:
                        stop=False
                        temp=line.rstrip().split(',')
                        if dSu.listStrFind(temp,'NA')==[]:
                            temp=[float(temp[i]) for i in range(len(temp)) if not i in idx]
                            data.append(temp)
                        if len(data)==maxLines:
                            break
                    if stop==False:
                        km=MiniBatchKMeans(init='k-means++', n_clusters=ks, batch_size=len(data),
                                n_init=10, max_no_improvement=10, verbose=0)
                        km.partial_fit(data)
    return km
开发者ID:steven77723,项目名称:FittsLawTest,代码行数:31,代码来源:dataML_Big.py

示例5: gen_codebook

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def gen_codebook(sift_features_dir, cluster_count, DUMP_TO_FILE, batch_size=BATCH_SIZE):
    mbk = MiniBatchKMeans(
        init="k-means++",
        n_clusters=cluster_count,
        batch_size=BATCH_SIZE,
        n_init=KM_N_INIT,
        max_no_improvement=KM_MAX_NO_IMPROVEMENT,
        verbose=0,
    )

    if batch_size < cluster_count:
        raise Exception("batch_size must be greater than cluster_count!")

    files = __get_sift_batches_from_dir(sift_features_dir)

    batch = []
    i = 0
    for sfile in files:
        with open(sfile, "rb") as f:
            siftbatch = cPickle.load(f)
        for imgfile in siftbatch:
            for feature in siftbatch[imgfile]:
                batch.append(feature)
                i += 1

                if (i % batch_size) == 0:
                    mbk.partial_fit(batch)
                    batch = []
    if batch:
        mbk.partial_fit(batch)

    with open(DUMP_TO_FILE, "wb") as f:
        cPickle.dump(mbk.cluster_centers_, f, protocol=cPickle.HIGHEST_PROTOCOL)

    return
开发者ID:ratatatata,项目名称:ML-OSM,代码行数:37,代码来源:algo.py

示例6: test_minibatch_sensible_reassign_partial_fit

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def test_minibatch_sensible_reassign_partial_fit():
    zeroed_X, true_labels = make_blobs(n_samples=n_samples, centers=5, cluster_std=1.0, random_state=42)
    zeroed_X[::2, :] = 0
    mb_k_means = MiniBatchKMeans(n_clusters=20, random_state=42, init="random")
    for i in range(100):
        mb_k_means.partial_fit(zeroed_X)
    # there should not be too many exact zero cluster centers
    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
开发者ID:FedericaLionetto,项目名称:scikit-learn,代码行数:10,代码来源:test_k_means.py

示例7: cluster_words_all

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
    def cluster_words_all(self):
        '''
        对所有样本进行聚类
        '''

        print "start cluster_words_all ..."
        offset = 0
        limit = 300
        cluster = MiniBatchKMeans(n_clusters=100,verbose=1)
        while True:
            print ' %d partial_fit %d'%(time(),offset)

            query = DB.PcaModel.select(DB.PcaModel.feature,DB.PcaModel.pca)\
                .offset(offset).limit(limit).tuples().iterator()

            features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
            if len(features) == 0:
                break
            offset += len(features)
            X = features[:,1:]
            cluster.partial_fit(X)

        DB.db.connect()
        with DB.db.transaction():
            DB.Vocabulary.drop_table(fail_silently=True)
            DB.Vocabulary.create_table()
            DB.Words.drop_table(fail_silently=True)
            DB.Words.create_table()

            offset=0
            while True:
                query = DB.PcaModel.select(DB.PcaModel.feature,DB.PcaModel.pca).offset(offset).limit(1000).tuples().iterator()
                features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
                if len(features) == 0:
                    break
                offset += len(features)
                X = features[:,1:]
                Y = features[:,0]
                res = cluster.predict(X)

                for i in range(0,len(res)):

                    DB.Words.insert(id = res[i]).upsert().execute()
                    DB.Vocabulary.insert(word = res[i], feature = Y[i]).execute()

                DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()

                tr = DB.TrainingResult()
                tr.name = self.__class__.__name__+"_clf"
                tr.data = cluster
                tr.save()

        #print "%d words, %d core samples, %d noise"%(len(types.keys()),len(res.core_sample_indices_), len(types[-1]) )

        print "done cluster_words_all"
        #self.display_words()
        return cluster
开发者ID:caoym,项目名称:odr,代码行数:59,代码来源:odr.py

示例8: test_mini_batch_k_means_random_init_partial_fit

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def test_mini_batch_k_means_random_init_partial_fit():
    km = MiniBatchKMeans(n_clusters=n_clusters, init="random", random_state=42)

    # use the partial_fit API for online learning
    for X_minibatch in np.array_split(X, 10):
        km.partial_fit(X_minibatch)

    # compute the labeling on the complete dataset
    labels = km.predict(X)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:12,代码来源:test_k_means.py

示例9: main

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def main():
    ''' doctsring for main '''

    args = parse_args()

    setup_logging(verbose = args.verbose)

    records = consume_fasta(args.fasta_file)

    # setup Hasher, Vectorizer and Classifier

    hasher = HashingVectorizer(analyzer='char',
                               n_features = 2 ** 18,
                               ngram_range=(args.ngram_min, args.ngram_max),
                               )

    logging.info(hasher)

    encoder, classes = get_classes(records, args.tax_level)
    n_clusters = len(classes)

    logging.info('using taxonomic level %s' % args.tax_level)
    logging.info('Using %s clusters' % n_clusters)

    classifier = MiniBatchKMeans(n_clusters = n_clusters)

    records = records[0:args.n_iters]

    chunk_generator = iter_chunk(records, args.chunk_size, args.tax_level)

    logging.info('ngram range: [%s-%s]' % (args.ngram_min, args.ngram_max))

    for labels, features in chunk_generator:

        logging.info('transforming training chunk')
        labels = encoder.transform(labels)
        vectors = hasher.transform(features)

        logging.info('fitting training chunk')
        classifier.partial_fit(vectors)

        pred_labels = classifier.predict(vectors)

        score = v_measure_score(labels, pred_labels)
        shuffled_score = v_measure_score(labels, sample(pred_labels, len(pred_labels)))

        logging.info('score: %.2f' % (score))
        logging.info('shuffled score: %.2f' % (shuffled_score))
开发者ID:audy,项目名称:bfc,代码行数:50,代码来源:bfc.py

示例10: fit

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
	def fit(self):
		""" Fits the k-means on the images and returns the centroids. """

		listing = os.listdir(self.images_directory)
		listing.sort()
		listing = [s for s in listing if s.endswith('.jpg')]

		km = MiniBatchKMeans(self.k, init='k-means++', compute_labels=False)
		images_per_batch = math.ceil(self.k / self.patches_per_image)
		X = np.zeros((self.patches_per_image*images_per_batch, self.patch_width**2*self.channels))
		j = 0

		for it in range(0, self.n_iterations):
			for im in range(0, len(listing)):
				image = misc.imread(self.images_directory + '/' + listing[im])
				image = self._crop_image(image)
				image = self._resize_image(image)

				for patch in range(0, self.patches_per_image):
					x = self._yield_random_patch(image)
					X[j*self.patches_per_image+patch, :] = x

				j += 1

				if j < images_per_batch:
					continue

				self.logger.debug("Training k-means: iteration = %i, done = %.2f%%", it, 100. * (im+1) / len(listing))

				j = 0

				if self.do_rotation_invariant_training:
					X90 = self._rotate_square_vector(X, 90)
					X180 = self._rotate_square_vector(X, 180)
					X270 = self._rotate_square_vector(X, 270)

				X = np.dot(X-self.M, self.P) # whitening

				if self.do_rotation_invariant_training:
					X90 = np.dot(X90-self.M, self.P)
					X180 = np.dot(X180-self.M, self.P)
					X270 = np.dot(X270-self.M, self.P)

					km.partial_fit(X, X90, X180, X270)
				else:
					km.partial_fit(X)

		return km.cluster_centers_
开发者ID:StevenReitsma,项目名称:kaggle-galaxyzoo,代码行数:50,代码来源:kmeans.py

示例11: cluster_lv1

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
    def cluster_lv1(self):
        print "start cluster_lv1 ..."

        DB.db.connect()
        offset = 0
        limit = 3000
        cluster = MiniBatchKMeans(n_clusters=1000,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
        while True:
            print ' %d partial_fit %d'%(time(),offset)

            query = DB.DescriptorModel.select(DB.DescriptorModel.feature,DB.DescriptorModel.lv1).offset(offset).limit(limit).tuples().iterator()
            features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))

            if len(features) == 0:
                break
            offset += len(features)
            X = features[:,1:]
            cluster.partial_fit(X)


        with DB.db.transaction():
            DB.Vocabulary.drop_table(fail_silently=True)
            DB.Vocabulary.create_table()

            offset=0
            while True:
                print ' %d predict %d'%(time(),offset)
                query = DB.DescriptorModel.select(DB.DescriptorModel.feature,DB.DescriptorModel.lv1).offset(offset).limit(1000).tuples().iterator()
                features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
                if len(features) == 0:
                    break
                offset += len(features)
                X = features[:,1:]
                Y = features[:,0]
                res = cluster.predict(X)

                for i in range(0,len(res)):
                    DB.Vocabulary.insert(lv1 = res[i],lv2=0, feature = Y[i]).execute()

        #print "%d words, %d core samples, %d noise"%(len(types.keys()),len(res.core_sample_indices_), len(types[-1]) )
        self._lv1 = cluster;
        print "done cluster_lv1"
        return cluster
开发者ID:caoym,项目名称:odr,代码行数:45,代码来源:odr.py

示例12: compute_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def compute_kmeans(file_list):
    kmeans = MiniBatchKMeans(
        n_clusters=config.clustersize,
        batch_size=config.batchsize
    )

    imgs_done = 0
    print 'Computing k-means: {:5}/{:5} ({:.3f}%)'.format(
        imgs_done,
        len(file_list),
        imgs_done * 100.0 / len(file_list)
    ),

    descs_buffer = []
    total_descs = 0

    for img_path in file_list:
        descs = sift.get_descriptors(img_path)
        imgs_done += 1
        if descs is None:
            print 'Oopsie?', img_path, 'No descriptors returned!'
            continue
        if len(descs_buffer) > 0 or len(descs) < config.batchsize:
            descs_buffer.extend(descs)
        else:
            kmeans.partial_fit(descs)
            total_descs += len(descs)

        if len(descs_buffer) >= config.batchsize:
            kmeans.partial_fit(descs_buffer)
            total_descs += len(descs_buffer)
            descs_buffer = []

        print '\rComputing k-means: {:5}/{:5} ({:.3f}%)'.format(
            imgs_done,
            len(file_list),
            imgs_done * 100.0 / len(file_list)
        ),

    print '\nKmeans clustering complete, analysed {} samples'.format(total_descs)
    return kmeans
开发者ID:hharchani,项目名称:smai-project,代码行数:43,代码来源:kmeans.py

示例13: __init__

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class Kmeans:
  kmeans_batch_size = 45
  kmeans = None
  k = None
  def __init__(self, k=10, centers=None):
    self.k = k
    if(centers!=None):
      init_centers = centers
    else:
      init_centers = 'k-means++'

    self.kmeans = MiniBatchKMeans(init=init_centers, n_clusters=self.k, batch_size=self.kmeans_batch_size,
                       n_init=10, max_no_improvement=10, verbose=0)
  
  def fit(self, X):
    self.kmeans.fit(X)

  def partial_fit(self, X):
    self.kmeans.partial_fit(X)

  def predict(self, X):
    return self.kmeans.predict(X)

  def get_centers(self):
    return self.kmeans.cluster_centers_

  def set_centers(self, centers):
    self.kmeans.cluster_centers_ = centers    

  def predict_hist(self, X):
    labels = self.predict(X)
    bins = range(self.k)
    histogram = np.histogram(labels, bins=bins, density=True)[0]
    #histogram = histogram/X.shape[0]
    return histogram

  def get_params(self):
    return self.kmeans.get_params(deep=True)

  def set_params(self, kmeans_params):
    self.kmeans.set_params(**kmeans_params)
开发者ID:guidefreitas,项目名称:bag_of_visual_words,代码行数:43,代码来源:kmeans.py

示例14: online_mbk

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def online_mbk(sift_features):
    n = sift_features.shape[0]
    rng = np.random.RandomState(0)
    kmeans = MiniBatchKMeans(n_clusters = 400, batch_size = 400, max_iter = 100, random_state = rng, verbose = True)
    index = 0
    for _ in range(3):
        sift_features = shuffle(sift_features, n_samples = int(round(n*0.1)), random_state = rng)
        i = iter(sift_features)
        while True:
            index += 1
            print index*2500
            sublist = list(islice(i, 2500))
            if len(sublist) > 0:
                sublist = np.vstack(sublist)
                kmeans.partial_fit(sublist)
            else:
                break

    print "finished training"
    predicted_labels = kmeans.predict(sift_features)
    return predicted_labels
开发者ID:redswallow,项目名称:image-understanding,代码行数:23,代码来源:dog_cat_cv.py

示例15: MiniBatchKMeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
cluster_names = [(8, 'cl0'), (20, 'cl1'), (40, 'cl2')]
ckeys = [c[1] for c in cluster_names]
kms = {}
for nclust, key in cluster_names:
    km = MiniBatchKMeans(n_clusters=nclust)
    X = None
    count = 0
    for e in event_info.find():
        if X is None:
            X = np.array(e['words'])
        else:
            X = np.vstack((X, e['words']))
        count += 1
        
        if count % 10000 == 0:
            km.partial_fit(X)
            X = None
            print count
    
    kms[key] = km 
    
event_clusters = {}
for e in event_info.find():
    clusters = {key: int(km.predict(e['words'])[0]) for key, km in kms.iteritems()}
    event_info.update({'id': e['id']},
        {'$set': clusters})
    
    
# load database data, for fast access
event_clusters = {}
for e in event_info.find():
开发者ID:HamedMP,项目名称:kaggle-event-recommendation,代码行数:33,代码来源:cluster.py


注:本文中的sklearn.cluster.MiniBatchKMeans.partial_fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。