当前位置: 首页>>代码示例>>Python>>正文


Python MiniBatchKMeans.fit_predict方法代码示例

本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit_predict方法的具体用法?Python MiniBatchKMeans.fit_predict怎么用?Python MiniBatchKMeans.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MiniBatchKMeans的用法示例。


在下文中一共展示了MiniBatchKMeans.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cluster_colors_into_groups

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def cluster_colors_into_groups(image, clusters):
    # Performs k-means clustering on the colors in the image
    clt = MiniBatchKMeans(n_clusters=clusters)
    clt.fit_predict(image)

    # Returns the centers of the found clusters
    # These centers will give the color that the cluster is representing
    # as coordinates in RGB space
    return np.array([clt.cluster_centers_.astype("uint8")])
开发者ID:ut-ras,项目名称:primavera,代码行数:11,代码来源:quantize.py

示例2: showMiniBatchKMeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def showMiniBatchKMeans(X, N):
    scores = []
    for number in xrange(N / 6, N / 2):
        clustering = MiniBatchKMeans(n_clusters=number, max_iter=MAX_ITER )
        clustering.fit_predict(X)
        scores.append(clustering.score(X))
    plt.plot(scores)
    plt.xlabel(XLABEL)
    plt.ylabel(YLABEL)
    plt.show()
开发者ID:jeka3230,项目名称:Pattern-recognition,代码行数:12,代码来源:Clustering.py

示例3: main

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def main():
	for filename in sys.argv[1:]:
		train, target = readArff(filename)

		# Set up the learner we want
		samples = 15
		start = time.time()
		fscore = 0
		for x in xrange(samples):
			n_clusters = 10
			learner = MiniBatchKMeans(n_clusters=n_clusters, max_no_improvement=10, reassignment_ratio=0.02)
			
			# Form the clusters
			cluster_assignments = learner.fit_predict(train)
			
			# Get the bug prediction
			counts = [[0,0] for _ in xrange(n_clusters)]
			for point, assignment in enumerate(cluster_assignments):
	 			counts[assignment][int(target[point])] += 1
			cluster_bug_prediction = [0 if x[0] > x[1] else 1 for x in counts]
			bug_prediction = [str(cluster_bug_prediction[assignment]) for assignment in cluster_assignments]
			tp, tn, fp, fn = 0,0,0,0
			for actual, predicted in zip(target, bug_prediction):
				if actual == "1" and predicted == "1":
					tp += 1
				elif actual == "0" and predicted == "1":
					fp += 1
				elif actual == "1" and predicted == "0":
					fn += 1
			fscore += 2.0*tp/(2.0*tp+fp+fn)
		print "Time taken: ", time.time()-start
		print "F-Score: ", fscore / samples
开发者ID:tdgoodrich,项目名称:graphmodels,代码行数:34,代码来源:cluster.py

示例4: testClusters

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def testClusters(image):
    bestSilhouette = -1
    bestClusters = 0
    bestLabels = None
    bestCenters = None

    for clusters in range(2, 10):
        # Cluster colours
        clt = MiniBatchKMeans(n_clusters = clusters)
        labs = clt.fit_predict(image)
        try:
            silhouette = metrics.silhouette_score(image, labs, metric='euclidean',sample_size=500)
            if silhouette > bestSilhouette:
                bestSilhouette = silhouette
                bestClusters = clusters
                bestLabels = clt.labels_
                bestCenters = clt.cluster_centers_

        except ValueError as ve:
            print(ve,clusters)
            if len(labs) == 1:
                bestLabels = labs
                bestCenters = clt.cluster_centers_
        del clt

    return bestClusters,bestLabels,bestCenters
开发者ID:N0taN3rd,项目名称:CompareHistograms,代码行数:28,代码来源:dominateColor.py

示例5: predictNumClusters

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def predictNumClusters(x, y):
	''' 
	Optimize the cluster coloring based on rescaled data
	through iterative reclustering in KMeans
	Param: x - an array of x values for point location
		   y - an array of y values for point location
	'''
	X = np.array(list(zip(x,y)))
	range_n_clusters = range(5, len(x) // 3, 3)
	bestFit = 0
	numClusters = 0

	for n_clusters in range_n_clusters:
		clusterer = MiniBatchKMeans(n_clusters=n_clusters, random_state=10)
		cluster_labels = clusterer.fit_predict(X)
		silhouette_avg = silhouette_score(X, cluster_labels)

		# Keep track of best fit and return if greater than 90% matching
		if silhouette_avg > bestFit:
			print(silhouette_avg, "with k =", n_clusters)
			numClusters = n_clusters
			bestFit = silhouette_avg
			if silhouette_avg >= .9:
				return numClusters
	return numClusters
开发者ID:Bboatman,项目名称:tumblrTaggingSemanticRelatedness,代码行数:27,代码来源:reScale.py

示例6: train_validate

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
 def train_validate(self, X_train, y_train, X_valid, y_valid):
     """
     """
     nc = 10
     X = []
     y = []
     clt = MiniBatchKMeans(n_clusters=nc, batch_size=100)
     for i in range(9):
         XX = X_train[y_train==i]
         yy = y_train[y_train==i]
         lbs = clt.fit_predict(XX)
         ids = lbs < 7
         X.append(XX[ids])
         y.append(yy[ids])
     X = np.vstack(X)
     y = np.hstack(y)
     print X.shape
     
     w_train = np.zeros(len(y))
     for i in range(len(w_train)):
         w_train[i] = self.w[int(y[i])]
     xg_train = DMatrix(X, label=y, weight=w_train)  
     xg_valid = DMatrix(X_valid, label=y_valid)    
     watchlist = [(xg_train,'train'), (xg_valid, 'validation')]
     bst = my_train_xgboost(self.param, xg_train, self.num_round, watchlist)
     y_pred = bst.predict(xg_valid).reshape(X_valid.shape[0], 9)
     return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:29,代码来源:clf_clust_simple.py

示例7: FindMostProminentColors

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def FindMostProminentColors(image):
    image = cv2.imread(image)
    (h, w) = image.shape[:2]
     
    # convert the image from the RGB color space to the Lab
    # color space -- since we will be clustering using k-means
    # which is based on the euclidean distance, we'll use the
    # Lab color space where the euclidean distance implies
    # perceptual meaning
    image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

    # reshape the image into a feature vector so that k-means
    # can be applied
    image = image.reshape((image.shape[0] * image.shape[1], 3))
     
    # apply k-means using the specified number of clusters and
    # then create the quantized image based on the predictions
    clt = MiniBatchKMeans(n_clusters = 10)
    labels = clt.fit_predict(image)
    quant = clt.cluster_centers_.astype("uint8")[labels]

    #Counting how many times a label occurs and sorting them in descending order
    occurancesOfLabels = Counter(labels)
    mostProminentLabels = sorted(occurancesOfLabels, key=occurancesOfLabels.get, reverse=True)

    #Matching the labels to their respective Lab color
    mostProminentColorList = []
    for i in mostProminentLabels:
        itemIndex = np.where(labels==i)[0][0]
        mostProminentColorList.append(quant[itemIndex])

    return mostProminentColorList
开发者ID:nicfro,项目名称:Img_Anno,代码行数:34,代码来源:ProminentColors.py

示例8: define_clusters

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def define_clusters(projections):
    """
    Creates several different clusterings of the data in projections.

    :param projections: dict(string, (2 x Num_Samples) numpy.ndarray)
        dictionary mapping the projection type (e.g. "tSNE") to an array containing
        the two-dimensional coordinates for each sample in the projection.

    :return: dict of string (projection name) =>
        (dict of string (cluster technique) => np.ndarray of size N_Samples (cluster assignments))
    """

    pbar = ProgressBar(4 * len(projections));

    out_clusters = dict();

    for key in projections:

        proj_data = projections[key];
        proj_clusters = dict();

        # K-means for k = 2-5
        for k in range(2, 6):
            clust_name = "K-Means, k=" + str(k);
            kmeans = MiniBatchKMeans(n_clusters=k);
            clust_assignments = kmeans.fit_predict(proj_data.T);
            proj_clusters.update({clust_name: clust_assignments});
            pbar.update();

        out_clusters.update({key: proj_clusters});

    pbar.complete();

    return out_clusters;
开发者ID:shakea02,项目名称:FastProject,代码行数:36,代码来源:Projections.py

示例9: color_quantization_sk

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def color_quantization_sk(image, clusters):
    # load the image and grab its width and height
    (h, w) = image.shape[:2]
     
    # convert the image from the RGB color space to the L*a*b*
    # color space -- since we will be clustering using k-means
    # which is based on the euclidean distance, we'll use the
    # L*a*b* color space where the euclidean distance implies
    # perceptual meaning
    image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
     
    # reshape the image into a feature vector so that k-means
    # can be applied
    image = image.reshape((image.shape[0] * image.shape[1], 3))
     
    # apply k-means using the specified number of clusters and
    # then create the quantized image based on the predictions
    clt = MiniBatchKMeans(n_clusters = clusters)
    labels = clt.fit_predict(image)
    quant = clt.cluster_centers_.astype("uint8")[labels]
     
    # reshape the feature vectors to images
    quant = quant.reshape((h, w, 3))
     
    # convert from L*a*b* to RGB
    quant = cv2.cvtColor(quant, cv2.COLOR_LAB2BGR)
    return quant
开发者ID:simama,项目名称:RealSense,代码行数:29,代码来源:mono_avoid.py

示例10: big_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
               single_pass=True):
    """k-means for very large sets of documents.

    See kmeans for documentation. Differs from that function in that it does
    not computer tf-idf or LSA, and fetches the documents in a streaming
    fashion, so they don't need to be held in memory. It does not do random
    restarts.

    If the option single_pass is set to False, the documents are visited
    twice: once to fit a k-means model, once to determine their label in
    this model.
    """
    from sklearn.cluster import MiniBatchKMeans
    from sklearn.feature_extraction.text import HashingVectorizer

    v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
    km = MiniBatchKMeans(n_clusters=k)

    labels = []
    for batch in batches(docs, batch_size):
        batch = map(fetch, docs)
        batch = v.transform(batch)
        y = km.fit_predict(batch)
        if single_pass:
            labels.extend(y.tolist())

    if not single_pass:
        for batch in batches(docs, batch_size):
            batch = map(fetch, docs)
            batch = v.transform(batch)
            labels.extend(km.predict(batch).tolist())

    return labels
开发者ID:aolieman,项目名称:xtas,代码行数:36,代码来源:cluster.py

示例11: VideoFrameReaders

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def VideoFrameReaders(VideoDirectory):
    cap = cv2.VideoCapture(VideoDirectory)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    fgbg = cv2.createBackgroundSubtractorMOG2()
    timestamp = []
    count = 0
    try:
        while cap.isOpened():
            ret,frame = cap.read()
            time = cap.get(0) #get the frame in seconds
            timestamp.append(time)

            print timestamp

            if frame == None:
                break;
           # frame = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)
            image = frame.reshape((frame.shape[0]*frame.shape[1],3))
            K = 4
            clf = MiniBatchKMeans(K)

            #predict cluster labels and quanitize each color based on the labels

            cls_labels = clf.fit_predict(image)
            print cls_labels
            cls_quant = clf.cluster_centers_astype("uint8")[labels]


    except EOFError:
        pass
开发者ID:saminaji,项目名称:CellMigration,代码行数:32,代码来源:kmeans_seg.py

示例12: big_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
               single_pass=True):
    """k-means for very large sets of documents.

    """
    from sklearn.cluster import MiniBatchKMeans
    from sklearn.feature_extraction.text import HashingVectorizer

    v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
    km = MiniBatchKMeans(n_clusters=k)

    labels = []
    for batch in batches(docs, batch_size):
        batch = map(fetch, docs)
        batch = v.transform(batch)
        y = km.fit_predict(batch)
        if single_pass:
            labels.extend(y.tolist())

    if not single_pass:
        for batch in batches(docs, batch_size):
            batch = map(fetch, docs)
            batch = v.transform(batch)
            labels.extend(km.predict(batch).tolist())

    return labels
开发者ID:mariahendrike,项目名称:xtas,代码行数:28,代码来源:cluster.py

示例13: cluster_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
def cluster_kmeans(args):
	"""
	Clustering with mini-batch K-Means.
	"""
	#load data
	g_it = node_link_data.node_link_data_to_eden(input = args.input_file, input_type = "file")
	vec = graph.Vectorizer(r = args.radius,d = args.distance, nbits = args.nbits)
	X = vec.transform(g_it, n_jobs = args.n_jobs)
	
	#log statistics on data
	logger.info('Instances: %d Features: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz() / X.shape[0]))

	#clustering
	clustering_algo = MiniBatchKMeans(n_clusters = args.n_clusters, n_init = args.n_init)
	y = clustering_algo.fit_predict(X) 
	msg = 'Predictions statistics: '
	msg += util.report_base_statistics(y)
	logger.info(msg)

	#save model for vectorizer
	out_file_name = "vectorizer"
	eden_io.dump(vec, output_dir_path = args.output_dir_path, out_file_name = out_file_name)
	logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)

	out_file_name = "labels"
	eden_io.store_matrix(matrix = y, output_dir_path = args.output_dir_path, out_file_name = out_file_name, output_format = "text")
	logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
开发者ID:nickgentoo,项目名称:pyEDeN,代码行数:29,代码来源:cluster_kmeans.py

示例14: ClusteringEnsemble

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
class ClusteringEnsemble(BaseEstimator):

    def __init__(self, estimator_const=LinearRegression, n_clusters=2):
        self.estimator_const_ = estimator_const
        self.n_clusters_ = n_clusters
        self.clustering = MiniBatchKMeans(n_clusters=self.n_clusters_)

    def get_params(self, deep=True):
        return { "n_clusters": self.n_clusters_}

    def fit(self, X, y):
        print("Training KMeans")
        colors = self.clustering.fit_predict(X).reshape(X.shape[0])

        print("Training Estimators")
        # each estimator is assigned to one cluster
        self.estimators = [self.estimator_const_() for i in range(self.n_clusters_)]
        for i in range(self.n_clusters_):
            rows = colors == i
            self.estimators[i].fit(X[rows], y[rows])

    def predict(self, X):
        y = np.zeros(X.shape[0])
        print("Predicting clusters")
        colors = self.clustering.predict(X)

        print("Estimating results")
        for i in range(self.n_clusters_):
            rows = colors == i
            y[rows] = self.estimators[i].predict(X[rows])

        return y
开发者ID:Patechoc,项目名称:labs-untested,代码行数:34,代码来源:estimators.py

示例15: _create_node

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_predict [as 别名]
    def _create_node(self, data, original_idx, current_depth):
        """Create an HKMNode and recursevily ask to create nodes if maximum
        depth has not been reached"""

        # end of recursion condition
        if current_depth >= self.max_depth or data.shape[0] <= self.leaf_size:
            if self.verbose:
                self._n_points += data.shape[0]

            nn_model = NearestNeighbors(n_neighbors=min(data.shape[0], self.leaf_size),
                                        metric='cosine', algorithm='brute').fit(data)
            node = HKMNode(nn_model=nn_model,
                           original_idx=original_idx)
        else:
            # it is possible to create a new branch in the data
            # go through each children and cluster them
            # cluster with mini-batch K-means
            clustering = MiniBatchKMeans(n_clusters=self.branching_factor,
                                         batch_size=self.batch_size)
            # get one element from each partition
            labels = clustering.fit_predict(data)

            node = HKMNode(clustering=clustering)
            node.children = []

            for children_id in range(self.branching_factor):
                idx = np.where(labels == children_id)[0]
                node.children.append(self._create_node(
                    data[idx],
                    original_idx[idx],
                    current_depth + 1)
                )
        return node
开发者ID:KarimJedda,项目名称:science_concierge,代码行数:35,代码来源:hkm_nn.py


注:本文中的sklearn.cluster.MiniBatchKMeans.fit_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。