当前位置: 首页>>代码示例>>Python>>正文


Python MiniBatchKMeans.predict方法代码示例

本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.predict方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.predict方法的具体用法?Python MiniBatchKMeans.predict怎么用?Python MiniBatchKMeans.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MiniBatchKMeans的用法示例。


在下文中一共展示了MiniBatchKMeans.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def main():

    p = Pool(8)

    #dog_images, cat_images, test_images = import_files('small_dog_img',
    #'small_cat_img', 'small_test_img')
    dog_images, cat_images, test_images = import_files('dog_img', 'cat_img',
    'test_img')
    n_dog = len(dog_images)
    n_cat = len(cat_images)
    n_train = n_dog + n_cat
    n_test = len(test_images)
    all_images = np.concatenate((dog_images, cat_images, test_images), axis = 0)
    n_all = all_images.shape[0]
    sift_start = time.time()
    sift_features = p.map(map_sift_desc, all_images)
    sift_end = time.time()
    print (sift_end - sift_start)*1000
    train_sift_features = reduce_sift_desc(sift_features[: n_train])
    test_sift_features = reduce_sift_desc(sift_features[n_train :])
    kmeans_start = time.time()
    kmeans = MiniBatchKMeans(n_clusters = 1000, batch_size = 1000, max_iter = 250)
    kmeans.fit(train_sift_features)
    train_predicted_labels = kmeans.predict(train_sift_features)
    test_predicted_labels = kmeans.predict(test_sift_features)
    kmeans_end = time.time()
    print (kmeans_end - kmeans_start)*1000
    '''
开发者ID:redswallow,项目名称:image-understanding,代码行数:30,代码来源:dog_cat.py

示例2: clustering3

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def clustering3(results):
   my_data = np.delete(np.genfromtxt('/dragon/Text-clustering-basing-on-string-metrics/Data/RObjects/sparse_matrix.csv', delimiter=','), 0, 0)
   my_sparse_data = coo_matrix((my_data[:,0], (my_data[:, 1]-1, my_data[:, 2]-1)))
   my_sparse_data = my_sparse_data.tocsr()
   true_k = 67969
   km = MiniBatchKMeans(n_clusters=true_k, init=results, n_init=1, batch_size=1000, init_size = 2*true_k)
   km.predict(my_sparse_data)
   np.savetxt("/dragon/Text-clustering-basing-on-string-metrics/Data/pyObjects/km_labels2.txt", km.labels_, delimiter = ', ')
开发者ID:potockan,项目名称:Text-clustering-basing-on-string-metrics,代码行数:10,代码来源:16_mini_batch_kmeans_parallel.py

示例3: lat_long_manipulation

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def lat_long_manipulation(train, test):

    esti = 125
    km = MiniBatchKMeans(n_clusters=esti, random_state=1377, init_size=esti*100)
    km.fit(train.ix[:, ['latitude', 'longitude']])
    train['loc_clust_125'] = km.predict(train.ix[:,['latitude','longitude']])
    test['loc_clust_125'] = km.predict(test.ix[:,['latitude', 'longitude']])
    return train, test
开发者ID:RashmiDS,项目名称:YelpKaggle,代码行数:10,代码来源:data_cleaning.py

示例4: catsAnddogs

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def catsAnddogs():
  import numpy as np 
  import mahotas as mh
  from mahotas.features import surf 
  from sklearn.linear_model import LogisticRegression
  import glob
  from sklearn.cluster import MiniBatchKMeans


  all_instance_filenames = []
  all_instance_targets = []

  for f in glob.glob('./data/train/*.jpg'):
    target = 1 if 'cat' in f else 0
    all_instance_filenames.append(f)
    all_instance_targets.append(target)

  surf_features = []
  counter = 0
  for f in all_instance_filenames:
    print 'reading image:',f
    image = mh.imread(f,as_grey=True)
    surf_features.append(surf.surf(image)[:,5:])

  train_len = int(len(all_instance_filenames)*.6)
  X_train_surf_features = np.concatenate(surf_features[:train_len])
  X_test_surf_features = np.concatenate(surf_features[train_len:])
  y_train = all_instance_targets[:train_len]
  y_test = all_instance_targets[train_len:]

  n_clusters = 300
  print 'Clustering', len(X_train_surf_features), 'features'
  estimator = MiniBatchKMeans(n_clusters=n_clusters)
  estimator.fit_transform(X_train_surf_features)

  X_train = []
  for instance in surf_features[:train_len]:
    clusters = estimator.predict(instance)
    features = np.bincount(clustes)
    if len(features) < n_clusters:
      features = np.append(features,np.zeros((1,n_clusters-len(features))))
    X_train.append(features)

  X_test = []
  for instance in surf_features[train_len:]:
    clusters = estimator.predict(instance)
    features = np.bincount(clustes)
    if len(features) < n_clusters:
      features = np.append(features,np.zeros((1,n_clusters-len(features))))
    X_test.append(features)    

  clf = LogisticRegression(C=0.001,penalty='l2')
  clf.fit_transform(X_train,y_train)
  predictions = clf.predict(X_test)
  print classification_report(y_test,predictions)
  print 'precision:', precision_score(y_test,predictions)
  print 'recall:', recall_score(y_test,predictions)
  print 'accuracy:', accuracy_score(y_test,predictions)
开发者ID:marcinwal,项目名称:ThoughtfulMachineLearning,代码行数:60,代码来源:clusteringKmeans.py

示例5: test_predict_minibatch_dense_input

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch_dense_input():
    mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, random_state=40).fit(X)

    # sanity check: predict centroid labels
    pred = mb_k_means.predict(mb_k_means.cluster_centers_)
    assert_array_equal(pred, np.arange(n_clusters))

    # sanity check: re-predict labeling for training set samples
    pred = mb_k_means.predict(X)
    assert_array_equal(mb_k_means.predict(X), mb_k_means.labels_)
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:12,代码来源:test_k_means.py

示例6: test_predict_minibatch

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch(data, init):
    mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init=init,
                                 n_init=10, random_state=0).fit(data)

    # sanity check: re-predict labeling for training set samples
    assert_array_equal(mb_k_means.predict(data), mb_k_means.labels_)

    # sanity check: predict centroid labels
    pred = mb_k_means.predict(mb_k_means.cluster_centers_)
    assert_array_equal(pred, np.arange(n_clusters))
开发者ID:arthurmensch,项目名称:scikit-learn,代码行数:12,代码来源:test_k_means.py

示例7: __init__

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class Cluster:

    def __init__(self):
        self.train_file = os.path.join('data', 'sample')

    def run_main(self):
        self.load_data()
        self.vectorize()

        #KMeans - K++
        print "KMeans - K++"
        self.kmeans = KMeans(n_clusters=3, init='k-means++', n_init=10000)
        self.train()
        self.get_metrics()

        #MiniBatchKMeans - K++
        print "MiniBatchKMeans - K++"
        self.kmeans = MiniBatchKMeans(n_clusters=3, init='k-means++', n_init=10000)       
        self.train()
        self.get_metrics()
 
        #KMeans - Random
        print "KMeans - Random"
        self.kmeans = KMeans(n_clusters=3, init='random', n_init=10000)
        self.train()
        self.get_metrics()

        #MiniBatchKMeans - K++
        print "MiniBatchKMeans - Random"
        self.kmeans = MiniBatchKMeans(n_clusters=3, init='random', n_init=10000)       
        self.train()
        self.get_metrics()


    def load_data(self):
        self.training_data = []
        with open(self.train_file, 'r') as fd:
            for line in fd.readlines():
                self.training_data.append(line)

    def vectorize(self):
        self.vect = TfidfVectorizer(stop_words='english')  
        self.X = self.vect.fit_transform(self.training_data)

    def train(self):
        self.kmeans.fit(self.X)        

    def get_metrics(self):
        print self.kmeans.labels_ 

    def test(self):
        self.test_data = ["I know both Ashok and Harini"]
        self.Y = self.vect.fit_transform(self.test_data)
        print self.kmeans.predict(self.Y)
开发者ID:karthik-chandrasekar,项目名称:UnsupervisedLearning,代码行数:56,代码来源:Cluster.py

示例8: test_predict_minibatch_random_init_sparse_input

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch_random_init_sparse_input():
    mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init="random", n_init=10).fit(X_csr)

    # sanity check: re-predict labeling for training set samples
    assert_array_equal(mb_k_means.predict(X_csr), mb_k_means.labels_)

    # sanity check: predict centroid labels
    pred = mb_k_means.predict(mb_k_means.cluster_centers_)
    assert_array_equal(pred, np.arange(n_clusters))

    # check that models trained on sparse input also works for dense input at
    # predict time
    assert_array_equal(mb_k_means.predict(X), mb_k_means.labels_)
开发者ID:FedericaLionetto,项目名称:scikit-learn,代码行数:15,代码来源:test_k_means.py

示例9: cluster_mbk

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
	def cluster_mbk(self):
		mbk = MiniBatchKMeans(init='k-means++', n_clusters=40, batch_size=100,
					  n_init=100, max_no_improvement=10, verbose=0,
					  random_state=0)
		mbk.fit(self.all_frames_xy)
		clusters = mbk.predict(self.all_frames_xy)
		return clusters
开发者ID:wpotrzebowski,项目名称:StormClustering,代码行数:9,代码来源:storm_clustering.py

示例10: DocDescriptor

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class DocDescriptor(object):

    def __init__(self, word_descriptor, n_clusters = 1000):
        self._n_clusters = n_clusters
        self._cluster = MiniBatchKMeans(n_clusters=n_clusters,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
        self._word_descriptor = word_descriptor

    def get_word_descriptor(self, img):
        X = get_features_from_image(img)
        words = []
        for i in X:
            words.append(self._word_descriptor.transform(i))
        return words

    def partial_fit(self, img):
        X = self.get_word_descriptor(img)
        self._cluster.partial_fit(X)

    def transform(self, img):
        X = self.get_word_descriptor(img)
        Y = self._cluster.predict(X)
        desc = [0]*self._n_clusters
        unit = 1.0/self._n_clusters
        for i in range(0, len(Y)):
            desc[Y[i]] += unit
        return desc
开发者ID:caoym,项目名称:odr,代码行数:28,代码来源:odr.py

示例11: KMeansFeatureTransformer

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class KMeansFeatureTransformer(object):
    def __init__(self, patches, k=1500, model_path=None):
        self.mean_distances = None
        if model_path is None:
            self.k_means = MiniBatchKMeans(n_clusters=k, compute_labels=False,
                                           reassignment_ratio=0, max_no_improvement=10, batch_size=10000,
                                           verbose=2)
            self.k_means.fit(patches)
            # update mean distances
            self.compute_mean_distances(patches)
        else:
            self.load(model_path)

    def transform(self, patches):
        return self.k_means.transform(patches)

    def predict(self, patches):
        return self.k_means.predict(patches)

    def compute_mean_distances(self, patches):
        self.mean_distances = np.mean(self.k_means.transform(patches), axis=0)

    def save(self, file_path='model/k_means_model'):
        joblib.dump(self.k_means, file_path)

    def load(self, file_path):
        self.k_means = joblib.load(file_path)
开发者ID:EDFward,项目名称:10601-playground,代码行数:29,代码来源:feats_repr.py

示例12: processAttributes_surf

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def processAttributes_surf(filePattern):
	targets_data = []
	surf_features = []
	counter = 0
	for f in glob.glob(filePattern):
		counter+=1
		print 'Reading image: ', counter, f

		target = 1 if 'cat' in f else 0
		targets_data.append(target)
		
		image = mh.imread(f, as_grey=True)
		surf_features.append(surf.surf(image)[:, 5:])

	X_train_surf_features = np.concatenate(surf_features)
	
	# Clusters
	n_clusters = 300
	print 'Clustering', len(X_train_surf_features), 'features'
	estimator = MiniBatchKMeans(n_clusters=n_clusters)
	estimator.fit_transform(X_train_surf_features)

	x_data = []
	for instance in surf_features:
		clusters = estimator.predict(instance)
		features = np.bincount(clusters)
		if len(features) < n_clusters:
			features = np.append(features, np.zeros((1, n_clusters-len(features))))

		x_data.append(features)

	return x_data, targets_data
开发者ID:mbonaventura,项目名称:aa2015,代码行数:34,代码来源:attribute_extraction.py

示例13: train

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def train(X,vectorizer,true_k,minibatch = False,showLable = False):
    #使用minibatch采样数据/所有原始数据训练k-means
    if minibatch:
        km = MiniBatchKMeans(n_clusters=true_k, init='k-means++', n_init=1,
                             init_size=1000, batch_size=1000, verbose=False)
    else:
        km = KMeans(n_clusters=true_k, init='k-means++', max_iter=300, n_init=2,n_jobs=-2,
                    verbose=False)
    km.fit(X)    
    if showLable:
        print("Top terms per cluster:")
        order_centroids = km.cluster_centers_.argsort()[:, ::-1]
        terms = vectorizer.get_feature_names()
#         print (vectorizer.get_stop_words())
        for i in range(true_k):
            print("Cluster %d:" % i, end='')
            o.write("Cluster %d:" % i,)
            for ind in order_centroids[i, :3]:
                print(' %s' % terms[ind], end='')
                o.write(' %s' % terms[ind].encode('utf-8'))
            print()
            o.write('\n')
    result = list(km.predict(X))
#     print ('Cluster distribution:')
#     print (dict([(i, result.count(i)) for i in result]))
    return -km.score(X)
开发者ID:haomingchan0811,项目名称:iPIN,代码行数:28,代码来源:K-means.py

示例14: big_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
               single_pass=True):
    """k-means for very large sets of documents.

    See kmeans for documentation. Differs from that function in that it does
    not computer tf-idf or LSA, and fetches the documents in a streaming
    fashion, so they don't need to be held in memory. It does not do random
    restarts.

    If the option single_pass is set to False, the documents are visited
    twice: once to fit a k-means model, once to determine their label in
    this model.
    """
    from sklearn.cluster import MiniBatchKMeans
    from sklearn.feature_extraction.text import HashingVectorizer

    v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
    km = MiniBatchKMeans(n_clusters=k)

    labels = []
    for batch in batches(docs, batch_size):
        batch = map(fetch, docs)
        batch = v.transform(batch)
        y = km.fit_predict(batch)
        if single_pass:
            labels.extend(y.tolist())

    if not single_pass:
        for batch in batches(docs, batch_size):
            batch = map(fetch, docs)
            batch = v.transform(batch)
            labels.extend(km.predict(batch).tolist())

    return labels
开发者ID:aolieman,项目名称:xtas,代码行数:36,代码来源:cluster.py

示例15: big_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
               single_pass=True):
    """k-means for very large sets of documents.

    """
    from sklearn.cluster import MiniBatchKMeans
    from sklearn.feature_extraction.text import HashingVectorizer

    v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
    km = MiniBatchKMeans(n_clusters=k)

    labels = []
    for batch in batches(docs, batch_size):
        batch = map(fetch, docs)
        batch = v.transform(batch)
        y = km.fit_predict(batch)
        if single_pass:
            labels.extend(y.tolist())

    if not single_pass:
        for batch in batches(docs, batch_size):
            batch = map(fetch, docs)
            batch = v.transform(batch)
            labels.extend(km.predict(batch).tolist())

    return labels
开发者ID:mariahendrike,项目名称:xtas,代码行数:28,代码来源:cluster.py


注:本文中的sklearn.cluster.MiniBatchKMeans.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。