本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit方法的具体用法?Python MiniBatchKMeans.fit怎么用?Python MiniBatchKMeans.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_data_pre
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def train_data_pre(train_x, train_y):
topic_data = dict()
print type(train_x)
print train_x[0]
print
train_x = np.array(train_x.todense())
for index in xrange(len(train_y)):
y = train_y[index]
if y in topic_data:
topic_data[y].append(train_x[index])
else:
print train_x[index]
print
topic_data[y] = [train_x[index]]
min_size = 100000
for key in topic_data:
length = len(topic_data[key])
if length < min_size:
min_size = length
for key in topic_data:
length = len(topic_data[key])
if length > min_size:
k_means = MiniBatchKMeans(n_clusters=min_size, init_size=int(3 * min_size))
# print topic_data[key]
print topic_data[key]
k_means.fit(topic_data[key])
topic_data[key] = k_means.cluster_centers_
示例2: generateCodebook
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def generateCodebook(self, features):
""" Generate codebook using extracted features """
codebook = None
if self._codebookGenerateMethod == 'k-means':
# # Codebook generation using scipy k-means
# while run:
# try:
# # Set missing = 'raise' to raise exception
# # when one of the clusters is empty
# whitenedFeatures = whiten(features)
# codebook, _ = kmeans2(whitenedFeatures,
# self._codebookSize,
# missing = 'raise')
#
# # No empty clusters
# run = False
# except ClusterError:
# # If one of the clusters is empty, re-run k-means
# run = True
# Codebook generation using sklearn k-means
whitenedFeatures = whiten(features)
kmeans = MiniBatchKMeans(n_clusters = config.codebookSize)
kmeans.fit(whitenedFeatures)
codebook = kmeans.cluster_centers_
else:
pass
self._codebook = codebook
示例3: extract_spatial_pyramid
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def extract_spatial_pyramid(images, dataset, vq=None, n_words=1000):
descriptors, locations = sift_descriptors(images, dataset)
if vq is None:
vq = MiniBatchKMeans(n_clusters=n_words, verbose=1, init='random',
batch_size=2 * n_words, compute_labels=False,
reassignment_ratio=0.0, random_state=1, n_init=3)
#vq = KMeans(n_clusters=n_words, verbose=10, init='random')
vq.fit(shuffle(np.vstack(descriptors)))
else:
n_words = vq.n_clusters
pyramids = []
for descr, locs in zip(descriptors, locations):
words = vq.predict(descr)
global_ = np.bincount(words, minlength=n_words).astype(np.float)
global_ /= max(global_.sum(), 1)
third_of_image = locs[1].max() // 3 + 1
stripe_indicator = locs[1] // third_of_image
inds = np.vstack([stripe_indicator, words])
stripe_hists = sparse.coo_matrix((np.ones(len(words)), inds),
shape=(3, n_words)).toarray()
stripe_hists = [x / max(x.sum(), 1) for x in stripe_hists]
pyramids.append(np.hstack([np.hstack(stripe_hists), global_]))
return vq, np.vstack(pyramids)
示例4: train
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def train(self, featurefiles, k=100, subsampling=10):
nbr_images = len(featurefiles)
descr = []
descr.append(sift.read_features_from_file(featurefiles[0])[1])
descriptors = descr[0]
print "begin loading image feature files..."
for i in np.arange(1, nbr_images):
descr.append(sift.read_features_from_file(featurefiles[i])[1])
# descriptors = np.vstack((descriptors, descr[i]))
descriptors = np.vstack((descriptors, descr[i][::subsampling,:]))
if i%100 == 0:
print i, "images have been loaded..."
print "finish loading image feature files!"
# self.voc, distortion = cluster.kmeans(descriptors[::subsampling,:], k, 1)
print "begin MiniBatchKMeans cluster....patient"
mbk = MiniBatchKMeans(k, init="k-means++", compute_labels=False, n_init=3, init_size=3*k)
# mbk.fit(descriptors[::subsampling,:])
mbk.fit(descriptors)
self.voc = mbk.cluster_centers_
print "cluster finish!"
self.nbr_word = self.voc.shape[0]
imwords = np.zeros((nbr_images, self.nbr_word))
for i in xrange(nbr_images):
imwords[i] = self.project(descr[i])
nbr_occurences = np.sum((imwords > 0)*1, axis=0)
self.idf = np.log( (1.0*nbr_images) / (1.0*nbr_occurences+1) )
self.traindata = featurefiles
示例5: cluster_function
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def cluster_function(user_np):
##############################################################################
# Compute clustering with Means
if len(user_np) < 10 :
n_cl = 2
elif len(user_np) <= 100 :
n_cl = 10
elif len(user_np) <= 500 :
n_cl = 15
elif len(user_np) <= 1000 :
n_cl = 20
else :
n_cl = 30
k_means = MiniBatchKMeans(n_clusters=n_cl, init='k-means++', max_iter=100, batch_size=100, verbose=0, compute_labels=True,
random_state=None, tol=0.0, max_no_improvement=10, init_size=None, n_init=3, reassignment_ratio=0.01)
t0 = time.time()
k_means.fit(user_np)
t_batch = time.time() - t0
print "Batch running time : ", t_batch
k_means_labels = k_means.labels_
#prediction = k_means.predict(user_np)
return k_means_labels
示例6: cluster_tfidf
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def cluster_tfidf(tfidf):
kmeans = MiniBatchKMeans(n_clusters=10, init='k-means++', n_init=1,
init_size=1000, batch_size=1000)
kmeans.fit(tfidf)
return kmeans.cluster_centers_
示例7: aggregate
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def aggregate(self, track_dataset):
"""
An example implementation of the k-means algorithm implemented in
DSI Studio. This function is automatically applied to all
TrackDatasets returned from a query.
Parameters:
-----------
track_dataset:dsi2.streamlines.track_dataset.TrackDataset
"""
# extract the streamline data
tracks = track_dataset.tracks
# Make a matrix of downsampled streamlines
points = np.array([ downsample(trk, 3).flatten() \
for trk in tracks])
# Calculate the length of each streamline
lengths = np.array([len(trk) for trk in tracks]).reshape(-1,1)
# Concatenate the points and the track lengths
features = np.hstack((points, lengths))
# Initialize the k-means algorithm
kmeans = MiniBatchKMeans(n_clusters=self.k, compute_labels=True)
kmeans.fit(features)
# Return the labels
return kmeans.labels_
示例8: main
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def main():
p = Pool(8)
#dog_images, cat_images, test_images = import_files('small_dog_img',
#'small_cat_img', 'small_test_img')
dog_images, cat_images, test_images = import_files('dog_img', 'cat_img',
'test_img')
n_dog = len(dog_images)
n_cat = len(cat_images)
n_train = n_dog + n_cat
n_test = len(test_images)
all_images = np.concatenate((dog_images, cat_images, test_images), axis = 0)
n_all = all_images.shape[0]
sift_start = time.time()
sift_features = p.map(map_sift_desc, all_images)
sift_end = time.time()
print (sift_end - sift_start)*1000
train_sift_features = reduce_sift_desc(sift_features[: n_train])
test_sift_features = reduce_sift_desc(sift_features[n_train :])
kmeans_start = time.time()
kmeans = MiniBatchKMeans(n_clusters = 1000, batch_size = 1000, max_iter = 250)
kmeans.fit(train_sift_features)
train_predicted_labels = kmeans.predict(train_sift_features)
test_predicted_labels = kmeans.predict(test_sift_features)
kmeans_end = time.time()
print (kmeans_end - kmeans_start)*1000
'''
示例9: run_n_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def run_n_kmeans(data, num_cluster, n_runs, min_count):
"""
Run's k means `n_runs` times. Each song is assigned to the
cluster which happens at least `min_count` of the `n_runs`.
If `min_count` is not met, we consider the song as not belonging
to any cluster.
"""
# TODO: refactor and move this method to vod common libs
n_rows = data.shape[0]
results = np.zeros((n_rows, n_runs))
kmeans = MiniBatchKMeans(num_cluster)
for i in xrange(n_runs):
kmeans.fit(data)
labels_column = kmeans.labels_
results[:, i] = labels_column
return_val = np.zeros(n_rows)
for i in xrange(n_rows):
counter = Counter(results[i])
most_common = counter.most_common(1)[0]
clust = most_common[0]
freq = most_common[1]
if freq > min_count:
return_val[i] = clust
else:
return_val[i] = -1
return return_val
示例10: exploitingKMeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def exploitingKMeans(data,ctrldata, length,ctrlStatus,k, who, genes,sirna, iteration=10, colors='ctrlStatus',\
Td=True, show=False, plotcov=False):
if iteration==False:
model = MiniBatchKMeans(n_clusters=k, batch_size = 2000, init='k-means++',n_init=1000,max_iter=1000, max_no_improvement=100, compute_labels = True)
zou=model.fit(data)
labels=zou.labels_
if ctrldata is not None:
ctrl_labels=model.predict(ctrldata)
labels=np.hstack((labels,ctrl_labels))
if show==True:
percentages = filmCharacterization(labels, length, ctrlStatus, genes,colors, Td, plotcov)
return labels, percentages
else:
return labels
else:
first=True; countsTotal=[]; oldPerc=None
for it in range(iteration):
print "iteration ", it
model = MiniBatchKMeans(n_clusters=k, batch_size = 1000, init='random',n_init=1000,max_iter=1000, max_no_improvement=100, compute_labels = True)
zou=model.fit(data)
counts, percentages = np.array(filmCount(zou.labels_, length))
if not first:
index=np.argmax(percentages, axis=0)
corr={np.argmax(oldPerc[ind]):np.where(ind==index)[0][0] for ind in index}
print it, corr
for el in corr:
countsTotal[:,el]+=counts[:,corr[el]]
else:
countsTotal=counts
first=False
oldPerc=percentages
return countsTotal
示例11: fit
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def fit(self, X, Y):
self.class_labels = np.array(sorted(set(Y)))
D = KMeans(n_clusters=self.n_atoms, init_size=self.n_atoms*3)
D.fit(np.vstack(X)[:200000])
D = D.cluster_centers_
D = normalize(D)
self.D_mean = np.mean(D, axis=0)
self.D = D - self.D_mean
self.D_idxs = []
self.clfs = []
for i in xrange(self.n_nodes):
idxs = np.random.permutation(self.n_atoms)[:self.n_features]
idxs = np.random.permutation(len(X[0][0]))[:self.n_features]
enc_X = []
for x in X:
x = normalize(x)
x = x - self.D_mean
enc_X.append(bow(np.argmax(np.abs(np.dot(self.D[:, idxs], x.T[idxs, :])), axis=0), self.n_atoms))
clf = LinearSVC()
clf.fit(enc_X, Y)
self.clfs.append(clf)
self.D_idxs.append(idxs)
示例12: cluster_mbk
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def cluster_mbk(self):
mbk = MiniBatchKMeans(init='k-means++', n_clusters=40, batch_size=100,
n_init=100, max_no_improvement=10, verbose=0,
random_state=0)
mbk.fit(self.all_frames_xy)
clusters = mbk.predict(self.all_frames_xy)
return clusters
示例13: initalized_mini_batch_em
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def initalized_mini_batch_em(X, nClusters, varianceScale, miniBatchSize=1000, nIter=20, nInit=10) :
nFeatures = X.shape[1]
print "nfeatures",nFeatures,"nClusters",nClusters
assert X.shape[0]>X.shape[1]
mbkm=MiniBatchKMeans(n_clusters=nClusters, batch_size=miniBatchSize, n_init=nInit)
print "mini batch k means initialization"
mbkm.fit(X)
centers = mbkm.cluster_centers_.swapaxes(0,1).astype(numpy.float64)
print "swap axes"
X = X.swapaxes(0,1).astype(numpy.float64)
print "mini batch em"
cAlg = MiniBatchEm(nFeatures=nFeatures,nClusters=nClusters,miniBatchSize=miniBatchSize,nIter=nIter,varianceScale=varianceScale)
print "initialize"
cAlg.initalizeCenters(centers)
cAlg.run(X)
print "predict"
probs = cAlg.predict(X)
probs = probs.swapaxes(0,1)
return probs
示例14: train_codebook
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def train_codebook(params, X):
# Init kmeans instance
km = MiniBatchKMeans(params["descriptor_size"])
# Training the model with our descriptors
km.fit(X)
# Save to disk
pickle.dump(
km,
open(
os.path.join(
params["root"],
params["root_save"],
params["codebooks_dir"],
"codebook_"
+ str(params["descriptor_size"])
+ "_"
+ params["descriptor_type"]
+ "_"
+ params["keypoint_type"]
+ ".cb",
),
"wb",
),
)
return km
示例15: main
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit [as 别名]
def main():
if len(sys.argv) != 4:
print(__doc__)
return 1
infiles = glob(sys.argv[1])
outfile = sys.argv[2]
K = int(sys.argv[3])
print("Reading in", len(infiles), "files")
fullarr = np.loadtxt(fileinput.input(infiles), delimiter = '\t')[:,:-7]
summary_stats = None
stats_file = '/n/fs/gcf/dchouren-repo/COS513-Finance/summary_stats/stats2'
with open(stats_file, 'rb') as inf:
summary_stats = np.loadtxt(inf)
stds = summary_stats[:len(summary_stats)/2]
means = summary_stats[len(summary_stats)/2:]
fullarr = (fullarr - means) / stds
print("Learning MiniBatchKMeans with K =", K)
km = MiniBatchKMeans(n_clusters = K, verbose = True) # TODO max_iter
km.fit(fullarr)
print("KMeans trained, saving")
with open(outfile, 'wb') as out_model:
pickle.dump(km, out_model)
print("Score:", km.score(fullarr))
return 0