本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.partial_fit方法的具体用法?Python MiniBatchKMeans.partial_fit怎么用?Python MiniBatchKMeans.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.partial_fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DocDescriptor
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class DocDescriptor(object):
def __init__(self, word_descriptor, n_clusters = 1000):
self._n_clusters = n_clusters
self._cluster = MiniBatchKMeans(n_clusters=n_clusters,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
self._word_descriptor = word_descriptor
def get_word_descriptor(self, img):
X = get_features_from_image(img)
words = []
for i in X:
words.append(self._word_descriptor.transform(i))
return words
def partial_fit(self, img):
X = self.get_word_descriptor(img)
self._cluster.partial_fit(X)
def transform(self, img):
X = self.get_word_descriptor(img)
Y = self._cluster.predict(X)
desc = [0]*self._n_clusters
unit = 1.0/self._n_clusters
for i in range(0, len(Y)):
desc[Y[i]] += unit
return desc
示例2: __init__
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class MiniCluster:
def __init__(self, nclusters=1000, psize=16):
self.psize = 16
self.patch_size = (self.psize, self.psize)
self.nclusters = nclusters
self.rng = np.random.RandomState(0)
self.kmeans = MiniBatchKMeans(n_clusters=nclusters, random_state=self.rng, verbose=True)
def fit(self, images):
buffer = []
index = 1
t0 = time.time()
# The online learning part: cycle over the whole dataset 4 times
index = 0
passes = 10
for _ in range(passes):
for img in images:
data = extract_patches_2d(img, self.patch_size, max_patches=15,
random_state=self.rng)
data = np.reshape(data, (len(data), -1))
#This casting is only needed for RGB data
#buffer.append(data.astype(float))
buffer.append(data)
index += 1
#if index % 1000 == 0:
if index % (self.nclusters * 2) == 0:
data = np.concatenate(buffer, axis=0)
data = gcn(data)
data = whiten(data)
self.kmeans.partial_fit(data)
buffer = []
dt = time.time() - t0
print('done in %.2fs.' % dt)
示例3: _run_cluster
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def _run_cluster(origin_list, cluster_num = 8, batch_size=100,resize=(64,64)):
clf = MiniBatchKMeans(n_clusters=cluster_num,batch_size=batch_size)
def next_batch(allfiles,batch_size):
imgs = []
inds = []
for ind,(path,label) in enumerate(allfiles):
img = Image.open(path).convert("L")
img = img.resize(size=resize,Image.ANTIALIAS)
img = np.reshape(np.array(img),(1,-1)).astype(np.float32) / 255.0
imgs.append(img)
inds.append(ind)
if len(imgs) >= batch_size:
yield np.vstack(imgs), inds
imgs = []
inds = []
if len(inds) > 0:
return np.vstack(imgs), inds
for _,batch in next_batch(origin_list,batch_size):
clf.partial_fit(batch)
cluster_dict = defaultdict(list)
for inds, batch in next_batch(origin_list, batch_size):
Ys = clf.predict(batch)
for y, ind in zip(Ys, inds):
path,label = origin_list[ind]
cluster_dict.setdefault(y,[]).append((path,label))
return cluster_dict
示例4: partialFitMiniBatchKmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def partialFitMiniBatchKmeans(training_file,categorical_features,label,maxLines,ks):
stop=False
cont=0
km=[]
with open(training_file,'r') as f:
while stop==False:
print('Training section - reading data from file ...')
if cont==0:
header=f.readline().rstrip().split(',')
idx=dSu.findMultiple(header, categorical_features)
idx+=dSu.listStrFind(header,label)
cont+=1
data=[]
stop=True
print(cont)
for line in f:
stop=False
temp=line.rstrip().split(',')
if dSu.listStrFind(temp,'NA')==[]:
temp=[float(temp[i]) for i in range(len(temp)) if not i in idx]
data.append(temp)
if len(data)==maxLines:
break
if stop==False:
km=MiniBatchKMeans(init='k-means++', n_clusters=ks, batch_size=len(data),
n_init=10, max_no_improvement=10, verbose=0)
km.partial_fit(data)
return km
示例5: gen_codebook
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def gen_codebook(sift_features_dir, cluster_count, DUMP_TO_FILE, batch_size=BATCH_SIZE):
mbk = MiniBatchKMeans(
init="k-means++",
n_clusters=cluster_count,
batch_size=BATCH_SIZE,
n_init=KM_N_INIT,
max_no_improvement=KM_MAX_NO_IMPROVEMENT,
verbose=0,
)
if batch_size < cluster_count:
raise Exception("batch_size must be greater than cluster_count!")
files = __get_sift_batches_from_dir(sift_features_dir)
batch = []
i = 0
for sfile in files:
with open(sfile, "rb") as f:
siftbatch = cPickle.load(f)
for imgfile in siftbatch:
for feature in siftbatch[imgfile]:
batch.append(feature)
i += 1
if (i % batch_size) == 0:
mbk.partial_fit(batch)
batch = []
if batch:
mbk.partial_fit(batch)
with open(DUMP_TO_FILE, "wb") as f:
cPickle.dump(mbk.cluster_centers_, f, protocol=cPickle.HIGHEST_PROTOCOL)
return
示例6: test_minibatch_sensible_reassign_partial_fit
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def test_minibatch_sensible_reassign_partial_fit():
zeroed_X, true_labels = make_blobs(n_samples=n_samples, centers=5, cluster_std=1.0, random_state=42)
zeroed_X[::2, :] = 0
mb_k_means = MiniBatchKMeans(n_clusters=20, random_state=42, init="random")
for i in range(100):
mb_k_means.partial_fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
示例7: cluster_words_all
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def cluster_words_all(self):
'''
对所有样本进行聚类
'''
print "start cluster_words_all ..."
offset = 0
limit = 300
cluster = MiniBatchKMeans(n_clusters=100,verbose=1)
while True:
print ' %d partial_fit %d'%(time(),offset)
query = DB.PcaModel.select(DB.PcaModel.feature,DB.PcaModel.pca)\
.offset(offset).limit(limit).tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
if len(features) == 0:
break
offset += len(features)
X = features[:,1:]
cluster.partial_fit(X)
DB.db.connect()
with DB.db.transaction():
DB.Vocabulary.drop_table(fail_silently=True)
DB.Vocabulary.create_table()
DB.Words.drop_table(fail_silently=True)
DB.Words.create_table()
offset=0
while True:
query = DB.PcaModel.select(DB.PcaModel.feature,DB.PcaModel.pca).offset(offset).limit(1000).tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
if len(features) == 0:
break
offset += len(features)
X = features[:,1:]
Y = features[:,0]
res = cluster.predict(X)
for i in range(0,len(res)):
DB.Words.insert(id = res[i]).upsert().execute()
DB.Vocabulary.insert(word = res[i], feature = Y[i]).execute()
DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()
tr = DB.TrainingResult()
tr.name = self.__class__.__name__+"_clf"
tr.data = cluster
tr.save()
#print "%d words, %d core samples, %d noise"%(len(types.keys()),len(res.core_sample_indices_), len(types[-1]) )
print "done cluster_words_all"
#self.display_words()
return cluster
示例8: test_mini_batch_k_means_random_init_partial_fit
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def test_mini_batch_k_means_random_init_partial_fit():
km = MiniBatchKMeans(n_clusters=n_clusters, init="random", random_state=42)
# use the partial_fit API for online learning
for X_minibatch in np.array_split(X, 10):
km.partial_fit(X_minibatch)
# compute the labeling on the complete dataset
labels = km.predict(X)
assert_equal(v_measure_score(true_labels, labels), 1.0)
示例9: main
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def main():
''' doctsring for main '''
args = parse_args()
setup_logging(verbose = args.verbose)
records = consume_fasta(args.fasta_file)
# setup Hasher, Vectorizer and Classifier
hasher = HashingVectorizer(analyzer='char',
n_features = 2 ** 18,
ngram_range=(args.ngram_min, args.ngram_max),
)
logging.info(hasher)
encoder, classes = get_classes(records, args.tax_level)
n_clusters = len(classes)
logging.info('using taxonomic level %s' % args.tax_level)
logging.info('Using %s clusters' % n_clusters)
classifier = MiniBatchKMeans(n_clusters = n_clusters)
records = records[0:args.n_iters]
chunk_generator = iter_chunk(records, args.chunk_size, args.tax_level)
logging.info('ngram range: [%s-%s]' % (args.ngram_min, args.ngram_max))
for labels, features in chunk_generator:
logging.info('transforming training chunk')
labels = encoder.transform(labels)
vectors = hasher.transform(features)
logging.info('fitting training chunk')
classifier.partial_fit(vectors)
pred_labels = classifier.predict(vectors)
score = v_measure_score(labels, pred_labels)
shuffled_score = v_measure_score(labels, sample(pred_labels, len(pred_labels)))
logging.info('score: %.2f' % (score))
logging.info('shuffled score: %.2f' % (shuffled_score))
示例10: fit
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def fit(self):
""" Fits the k-means on the images and returns the centroids. """
listing = os.listdir(self.images_directory)
listing.sort()
listing = [s for s in listing if s.endswith('.jpg')]
km = MiniBatchKMeans(self.k, init='k-means++', compute_labels=False)
images_per_batch = math.ceil(self.k / self.patches_per_image)
X = np.zeros((self.patches_per_image*images_per_batch, self.patch_width**2*self.channels))
j = 0
for it in range(0, self.n_iterations):
for im in range(0, len(listing)):
image = misc.imread(self.images_directory + '/' + listing[im])
image = self._crop_image(image)
image = self._resize_image(image)
for patch in range(0, self.patches_per_image):
x = self._yield_random_patch(image)
X[j*self.patches_per_image+patch, :] = x
j += 1
if j < images_per_batch:
continue
self.logger.debug("Training k-means: iteration = %i, done = %.2f%%", it, 100. * (im+1) / len(listing))
j = 0
if self.do_rotation_invariant_training:
X90 = self._rotate_square_vector(X, 90)
X180 = self._rotate_square_vector(X, 180)
X270 = self._rotate_square_vector(X, 270)
X = np.dot(X-self.M, self.P) # whitening
if self.do_rotation_invariant_training:
X90 = np.dot(X90-self.M, self.P)
X180 = np.dot(X180-self.M, self.P)
X270 = np.dot(X270-self.M, self.P)
km.partial_fit(X, X90, X180, X270)
else:
km.partial_fit(X)
return km.cluster_centers_
示例11: cluster_lv1
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def cluster_lv1(self):
print "start cluster_lv1 ..."
DB.db.connect()
offset = 0
limit = 3000
cluster = MiniBatchKMeans(n_clusters=1000,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
while True:
print ' %d partial_fit %d'%(time(),offset)
query = DB.DescriptorModel.select(DB.DescriptorModel.feature,DB.DescriptorModel.lv1).offset(offset).limit(limit).tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
if len(features) == 0:
break
offset += len(features)
X = features[:,1:]
cluster.partial_fit(X)
with DB.db.transaction():
DB.Vocabulary.drop_table(fail_silently=True)
DB.Vocabulary.create_table()
offset=0
while True:
print ' %d predict %d'%(time(),offset)
query = DB.DescriptorModel.select(DB.DescriptorModel.feature,DB.DescriptorModel.lv1).offset(offset).limit(1000).tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
if len(features) == 0:
break
offset += len(features)
X = features[:,1:]
Y = features[:,0]
res = cluster.predict(X)
for i in range(0,len(res)):
DB.Vocabulary.insert(lv1 = res[i],lv2=0, feature = Y[i]).execute()
#print "%d words, %d core samples, %d noise"%(len(types.keys()),len(res.core_sample_indices_), len(types[-1]) )
self._lv1 = cluster;
print "done cluster_lv1"
return cluster
示例12: compute_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def compute_kmeans(file_list):
kmeans = MiniBatchKMeans(
n_clusters=config.clustersize,
batch_size=config.batchsize
)
imgs_done = 0
print 'Computing k-means: {:5}/{:5} ({:.3f}%)'.format(
imgs_done,
len(file_list),
imgs_done * 100.0 / len(file_list)
),
descs_buffer = []
total_descs = 0
for img_path in file_list:
descs = sift.get_descriptors(img_path)
imgs_done += 1
if descs is None:
print 'Oopsie?', img_path, 'No descriptors returned!'
continue
if len(descs_buffer) > 0 or len(descs) < config.batchsize:
descs_buffer.extend(descs)
else:
kmeans.partial_fit(descs)
total_descs += len(descs)
if len(descs_buffer) >= config.batchsize:
kmeans.partial_fit(descs_buffer)
total_descs += len(descs_buffer)
descs_buffer = []
print '\rComputing k-means: {:5}/{:5} ({:.3f}%)'.format(
imgs_done,
len(file_list),
imgs_done * 100.0 / len(file_list)
),
print '\nKmeans clustering complete, analysed {} samples'.format(total_descs)
return kmeans
示例13: __init__
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
class Kmeans:
kmeans_batch_size = 45
kmeans = None
k = None
def __init__(self, k=10, centers=None):
self.k = k
if(centers!=None):
init_centers = centers
else:
init_centers = 'k-means++'
self.kmeans = MiniBatchKMeans(init=init_centers, n_clusters=self.k, batch_size=self.kmeans_batch_size,
n_init=10, max_no_improvement=10, verbose=0)
def fit(self, X):
self.kmeans.fit(X)
def partial_fit(self, X):
self.kmeans.partial_fit(X)
def predict(self, X):
return self.kmeans.predict(X)
def get_centers(self):
return self.kmeans.cluster_centers_
def set_centers(self, centers):
self.kmeans.cluster_centers_ = centers
def predict_hist(self, X):
labels = self.predict(X)
bins = range(self.k)
histogram = np.histogram(labels, bins=bins, density=True)[0]
#histogram = histogram/X.shape[0]
return histogram
def get_params(self):
return self.kmeans.get_params(deep=True)
def set_params(self, kmeans_params):
self.kmeans.set_params(**kmeans_params)
示例14: online_mbk
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
def online_mbk(sift_features):
n = sift_features.shape[0]
rng = np.random.RandomState(0)
kmeans = MiniBatchKMeans(n_clusters = 400, batch_size = 400, max_iter = 100, random_state = rng, verbose = True)
index = 0
for _ in range(3):
sift_features = shuffle(sift_features, n_samples = int(round(n*0.1)), random_state = rng)
i = iter(sift_features)
while True:
index += 1
print index*2500
sublist = list(islice(i, 2500))
if len(sublist) > 0:
sublist = np.vstack(sublist)
kmeans.partial_fit(sublist)
else:
break
print "finished training"
predicted_labels = kmeans.predict(sift_features)
return predicted_labels
示例15: MiniBatchKMeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import partial_fit [as 别名]
cluster_names = [(8, 'cl0'), (20, 'cl1'), (40, 'cl2')]
ckeys = [c[1] for c in cluster_names]
kms = {}
for nclust, key in cluster_names:
km = MiniBatchKMeans(n_clusters=nclust)
X = None
count = 0
for e in event_info.find():
if X is None:
X = np.array(e['words'])
else:
X = np.vstack((X, e['words']))
count += 1
if count % 10000 == 0:
km.partial_fit(X)
X = None
print count
kms[key] = km
event_clusters = {}
for e in event_info.find():
clusters = {key: int(km.predict(e['words'])[0]) for key, km in kms.iteritems()}
event_info.update({'id': e['id']},
{'$set': clusters})
# load database data, for fast access
event_clusters = {}
for e in event_info.find():