本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit_predict方法的具体用法?Python MiniBatchKMeans.fit_predict怎么用?Python MiniBatchKMeans.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cluster_colors_into_groups
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def cluster_colors_into_groups(image, clusters):
# Performs k-means clustering on the colors in the image
clt = MiniBatchKMeans(n_clusters=clusters)
clt.fit_predict(image)
# Returns the centers of the found clusters
# These centers will give the color that the cluster is representing
# as coordinates in RGB space
return np.array([clt.cluster_centers_.astype("uint8")])
示例2: showMiniBatchKMeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def showMiniBatchKMeans(X, N):
scores = []
for number in xrange(N / 6, N / 2):
clustering = MiniBatchKMeans(n_clusters=number, max_iter=MAX_ITER )
clustering.fit_predict(X)
scores.append(clustering.score(X))
plt.plot(scores)
plt.xlabel(XLABEL)
plt.ylabel(YLABEL)
plt.show()
示例3: main
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def main():
for filename in sys.argv[1:]:
train, target = readArff(filename)
# Set up the learner we want
samples = 15
start = time.time()
fscore = 0
for x in xrange(samples):
n_clusters = 10
learner = MiniBatchKMeans(n_clusters=n_clusters, max_no_improvement=10, reassignment_ratio=0.02)
# Form the clusters
cluster_assignments = learner.fit_predict(train)
# Get the bug prediction
counts = [[0,0] for _ in xrange(n_clusters)]
for point, assignment in enumerate(cluster_assignments):
counts[assignment][int(target[point])] += 1
cluster_bug_prediction = [0 if x[0] > x[1] else 1 for x in counts]
bug_prediction = [str(cluster_bug_prediction[assignment]) for assignment in cluster_assignments]
tp, tn, fp, fn = 0,0,0,0
for actual, predicted in zip(target, bug_prediction):
if actual == "1" and predicted == "1":
tp += 1
elif actual == "0" and predicted == "1":
fp += 1
elif actual == "1" and predicted == "0":
fn += 1
fscore += 2.0*tp/(2.0*tp+fp+fn)
print "Time taken: ", time.time()-start
print "F-Score: ", fscore / samples
示例4: testClusters
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def testClusters(image):
bestSilhouette = -1
bestClusters = 0
bestLabels = None
bestCenters = None
for clusters in range(2, 10):
# Cluster colours
clt = MiniBatchKMeans(n_clusters = clusters)
labs = clt.fit_predict(image)
try:
silhouette = metrics.silhouette_score(image, labs, metric='euclidean',sample_size=500)
if silhouette > bestSilhouette:
bestSilhouette = silhouette
bestClusters = clusters
bestLabels = clt.labels_
bestCenters = clt.cluster_centers_
except ValueError as ve:
print(ve,clusters)
if len(labs) == 1:
bestLabels = labs
bestCenters = clt.cluster_centers_
del clt
return bestClusters,bestLabels,bestCenters
示例5: predictNumClusters
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def predictNumClusters(x, y):
'''
Optimize the cluster coloring based on rescaled data
through iterative reclustering in KMeans
Param: x - an array of x values for point location
y - an array of y values for point location
'''
X = np.array(list(zip(x,y)))
range_n_clusters = range(5, len(x) // 3, 3)
bestFit = 0
numClusters = 0
for n_clusters in range_n_clusters:
clusterer = MiniBatchKMeans(n_clusters=n_clusters, random_state=10)
cluster_labels = clusterer.fit_predict(X)
silhouette_avg = silhouette_score(X, cluster_labels)
# Keep track of best fit and return if greater than 90% matching
if silhouette_avg > bestFit:
print(silhouette_avg, "with k =", n_clusters)
numClusters = n_clusters
bestFit = silhouette_avg
if silhouette_avg >= .9:
return numClusters
return numClusters
示例6: train_validate
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def train_validate(self, X_train, y_train, X_valid, y_valid):
"""
"""
nc = 10
X = []
y = []
clt = MiniBatchKMeans(n_clusters=nc, batch_size=100)
for i in range(9):
XX = X_train[y_train==i]
yy = y_train[y_train==i]
lbs = clt.fit_predict(XX)
ids = lbs < 7
X.append(XX[ids])
y.append(yy[ids])
X = np.vstack(X)
y = np.hstack(y)
print X.shape
w_train = np.zeros(len(y))
for i in range(len(w_train)):
w_train[i] = self.w[int(y[i])]
xg_train = DMatrix(X, label=y, weight=w_train)
xg_valid = DMatrix(X_valid, label=y_valid)
watchlist = [(xg_train,'train'), (xg_valid, 'validation')]
bst = my_train_xgboost(self.param, xg_train, self.num_round, watchlist)
y_pred = bst.predict(xg_valid).reshape(X_valid.shape[0], 9)
return y_pred
示例7: FindMostProminentColors
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def FindMostProminentColors(image):
image = cv2.imread(image)
(h, w) = image.shape[:2]
# convert the image from the RGB color space to the Lab
# color space -- since we will be clustering using k-means
# which is based on the euclidean distance, we'll use the
# Lab color space where the euclidean distance implies
# perceptual meaning
image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# reshape the image into a feature vector so that k-means
# can be applied
image = image.reshape((image.shape[0] * image.shape[1], 3))
# apply k-means using the specified number of clusters and
# then create the quantized image based on the predictions
clt = MiniBatchKMeans(n_clusters = 10)
labels = clt.fit_predict(image)
quant = clt.cluster_centers_.astype("uint8")[labels]
#Counting how many times a label occurs and sorting them in descending order
occurancesOfLabels = Counter(labels)
mostProminentLabels = sorted(occurancesOfLabels, key=occurancesOfLabels.get, reverse=True)
#Matching the labels to their respective Lab color
mostProminentColorList = []
for i in mostProminentLabels:
itemIndex = np.where(labels==i)[0][0]
mostProminentColorList.append(quant[itemIndex])
return mostProminentColorList
示例8: define_clusters
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def define_clusters(projections):
"""
Creates several different clusterings of the data in projections.
:param projections: dict(string, (2 x Num_Samples) numpy.ndarray)
dictionary mapping the projection type (e.g. "tSNE") to an array containing
the two-dimensional coordinates for each sample in the projection.
:return: dict of string (projection name) =>
(dict of string (cluster technique) => np.ndarray of size N_Samples (cluster assignments))
"""
pbar = ProgressBar(4 * len(projections));
out_clusters = dict();
for key in projections:
proj_data = projections[key];
proj_clusters = dict();
# K-means for k = 2-5
for k in range(2, 6):
clust_name = "K-Means, k=" + str(k);
kmeans = MiniBatchKMeans(n_clusters=k);
clust_assignments = kmeans.fit_predict(proj_data.T);
proj_clusters.update({clust_name: clust_assignments});
pbar.update();
out_clusters.update({key: proj_clusters});
pbar.complete();
return out_clusters;
示例9: color_quantization_sk
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def color_quantization_sk(image, clusters):
# load the image and grab its width and height
(h, w) = image.shape[:2]
# convert the image from the RGB color space to the L*a*b*
# color space -- since we will be clustering using k-means
# which is based on the euclidean distance, we'll use the
# L*a*b* color space where the euclidean distance implies
# perceptual meaning
image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# reshape the image into a feature vector so that k-means
# can be applied
image = image.reshape((image.shape[0] * image.shape[1], 3))
# apply k-means using the specified number of clusters and
# then create the quantized image based on the predictions
clt = MiniBatchKMeans(n_clusters = clusters)
labels = clt.fit_predict(image)
quant = clt.cluster_centers_.astype("uint8")[labels]
# reshape the feature vectors to images
quant = quant.reshape((h, w, 3))
# convert from L*a*b* to RGB
quant = cv2.cvtColor(quant, cv2.COLOR_LAB2BGR)
return quant
示例10: big_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
single_pass=True):
"""k-means for very large sets of documents.
See kmeans for documentation. Differs from that function in that it does
not computer tf-idf or LSA, and fetches the documents in a streaming
fashion, so they don't need to be held in memory. It does not do random
restarts.
If the option single_pass is set to False, the documents are visited
twice: once to fit a k-means model, once to determine their label in
this model.
"""
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.text import HashingVectorizer
v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
km = MiniBatchKMeans(n_clusters=k)
labels = []
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
y = km.fit_predict(batch)
if single_pass:
labels.extend(y.tolist())
if not single_pass:
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
labels.extend(km.predict(batch).tolist())
return labels
示例11: VideoFrameReaders
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def VideoFrameReaders(VideoDirectory):
cap = cv2.VideoCapture(VideoDirectory)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
fgbg = cv2.createBackgroundSubtractorMOG2()
timestamp = []
count = 0
try:
while cap.isOpened():
ret,frame = cap.read()
time = cap.get(0) #get the frame in seconds
timestamp.append(time)
print timestamp
if frame == None:
break;
# frame = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)
image = frame.reshape((frame.shape[0]*frame.shape[1],3))
K = 4
clf = MiniBatchKMeans(K)
#predict cluster labels and quanitize each color based on the labels
cls_labels = clf.fit_predict(image)
print cls_labels
cls_quant = clf.cluster_centers_astype("uint8")[labels]
except EOFError:
pass
示例12: big_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
single_pass=True):
"""k-means for very large sets of documents.
"""
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.text import HashingVectorizer
v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
km = MiniBatchKMeans(n_clusters=k)
labels = []
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
y = km.fit_predict(batch)
if single_pass:
labels.extend(y.tolist())
if not single_pass:
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
labels.extend(km.predict(batch).tolist())
return labels
示例13: cluster_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def cluster_kmeans(args):
"""
Clustering with mini-batch K-Means.
"""
#load data
g_it = node_link_data.node_link_data_to_eden(input = args.input_file, input_type = "file")
vec = graph.Vectorizer(r = args.radius,d = args.distance, nbits = args.nbits)
X = vec.transform(g_it, n_jobs = args.n_jobs)
#log statistics on data
logger.info('Instances: %d Features: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz() / X.shape[0]))
#clustering
clustering_algo = MiniBatchKMeans(n_clusters = args.n_clusters, n_init = args.n_init)
y = clustering_algo.fit_predict(X)
msg = 'Predictions statistics: '
msg += util.report_base_statistics(y)
logger.info(msg)
#save model for vectorizer
out_file_name = "vectorizer"
eden_io.dump(vec, output_dir_path = args.output_dir_path, out_file_name = out_file_name)
logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
out_file_name = "labels"
eden_io.store_matrix(matrix = y, output_dir_path = args.output_dir_path, out_file_name = out_file_name, output_format = "text")
logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
示例14: ClusteringEnsemble
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
class ClusteringEnsemble(BaseEstimator):
def __init__(self, estimator_const=LinearRegression, n_clusters=2):
self.estimator_const_ = estimator_const
self.n_clusters_ = n_clusters
self.clustering = MiniBatchKMeans(n_clusters=self.n_clusters_)
def get_params(self, deep=True):
return { "n_clusters": self.n_clusters_}
def fit(self, X, y):
print("Training KMeans")
colors = self.clustering.fit_predict(X).reshape(X.shape[0])
print("Training Estimators")
# each estimator is assigned to one cluster
self.estimators = [self.estimator_const_() for i in range(self.n_clusters_)]
for i in range(self.n_clusters_):
rows = colors == i
self.estimators[i].fit(X[rows], y[rows])
def predict(self, X):
y = np.zeros(X.shape[0])
print("Predicting clusters")
colors = self.clustering.predict(X)
print("Estimating results")
for i in range(self.n_clusters_):
rows = colors == i
y[rows] = self.estimators[i].predict(X[rows])
return y
示例15: _create_node
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def _create_node(self, data, original_idx, current_depth):
"""Create an HKMNode and recursevily ask to create nodes if maximum
depth has not been reached"""
# end of recursion condition
if current_depth >= self.max_depth or data.shape[0] <= self.leaf_size:
if self.verbose:
self._n_points += data.shape[0]
nn_model = NearestNeighbors(n_neighbors=min(data.shape[0], self.leaf_size),
metric='cosine', algorithm='brute').fit(data)
node = HKMNode(nn_model=nn_model,
original_idx=original_idx)
else:
# it is possible to create a new branch in the data
# go through each children and cluster them
# cluster with mini-batch K-means
clustering = MiniBatchKMeans(n_clusters=self.branching_factor,
batch_size=self.batch_size)
# get one element from each partition
labels = clustering.fit_predict(data)
node = HKMNode(clustering=clustering)
node.children = []
for children_id in range(self.branching_factor):
idx = np.where(labels == children_id)[0]
node.children.append(self._create_node(
data[idx],
original_idx[idx],
current_depth + 1)
)
return node