本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.predict方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.predict方法的具体用法?Python MiniBatchKMeans.predict怎么用?Python MiniBatchKMeans.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def main():
p = Pool(8)
#dog_images, cat_images, test_images = import_files('small_dog_img',
#'small_cat_img', 'small_test_img')
dog_images, cat_images, test_images = import_files('dog_img', 'cat_img',
'test_img')
n_dog = len(dog_images)
n_cat = len(cat_images)
n_train = n_dog + n_cat
n_test = len(test_images)
all_images = np.concatenate((dog_images, cat_images, test_images), axis = 0)
n_all = all_images.shape[0]
sift_start = time.time()
sift_features = p.map(map_sift_desc, all_images)
sift_end = time.time()
print (sift_end - sift_start)*1000
train_sift_features = reduce_sift_desc(sift_features[: n_train])
test_sift_features = reduce_sift_desc(sift_features[n_train :])
kmeans_start = time.time()
kmeans = MiniBatchKMeans(n_clusters = 1000, batch_size = 1000, max_iter = 250)
kmeans.fit(train_sift_features)
train_predicted_labels = kmeans.predict(train_sift_features)
test_predicted_labels = kmeans.predict(test_sift_features)
kmeans_end = time.time()
print (kmeans_end - kmeans_start)*1000
'''
示例2: clustering3
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def clustering3(results):
my_data = np.delete(np.genfromtxt('/dragon/Text-clustering-basing-on-string-metrics/Data/RObjects/sparse_matrix.csv', delimiter=','), 0, 0)
my_sparse_data = coo_matrix((my_data[:,0], (my_data[:, 1]-1, my_data[:, 2]-1)))
my_sparse_data = my_sparse_data.tocsr()
true_k = 67969
km = MiniBatchKMeans(n_clusters=true_k, init=results, n_init=1, batch_size=1000, init_size = 2*true_k)
km.predict(my_sparse_data)
np.savetxt("/dragon/Text-clustering-basing-on-string-metrics/Data/pyObjects/km_labels2.txt", km.labels_, delimiter = ', ')
开发者ID:potockan,项目名称:Text-clustering-basing-on-string-metrics,代码行数:10,代码来源:16_mini_batch_kmeans_parallel.py
示例3: lat_long_manipulation
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def lat_long_manipulation(train, test):
esti = 125
km = MiniBatchKMeans(n_clusters=esti, random_state=1377, init_size=esti*100)
km.fit(train.ix[:, ['latitude', 'longitude']])
train['loc_clust_125'] = km.predict(train.ix[:,['latitude','longitude']])
test['loc_clust_125'] = km.predict(test.ix[:,['latitude', 'longitude']])
return train, test
示例4: catsAnddogs
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def catsAnddogs():
import numpy as np
import mahotas as mh
from mahotas.features import surf
from sklearn.linear_model import LogisticRegression
import glob
from sklearn.cluster import MiniBatchKMeans
all_instance_filenames = []
all_instance_targets = []
for f in glob.glob('./data/train/*.jpg'):
target = 1 if 'cat' in f else 0
all_instance_filenames.append(f)
all_instance_targets.append(target)
surf_features = []
counter = 0
for f in all_instance_filenames:
print 'reading image:',f
image = mh.imread(f,as_grey=True)
surf_features.append(surf.surf(image)[:,5:])
train_len = int(len(all_instance_filenames)*.6)
X_train_surf_features = np.concatenate(surf_features[:train_len])
X_test_surf_features = np.concatenate(surf_features[train_len:])
y_train = all_instance_targets[:train_len]
y_test = all_instance_targets[train_len:]
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
X_train = []
for instance in surf_features[:train_len]:
clusters = estimator.predict(instance)
features = np.bincount(clustes)
if len(features) < n_clusters:
features = np.append(features,np.zeros((1,n_clusters-len(features))))
X_train.append(features)
X_test = []
for instance in surf_features[train_len:]:
clusters = estimator.predict(instance)
features = np.bincount(clustes)
if len(features) < n_clusters:
features = np.append(features,np.zeros((1,n_clusters-len(features))))
X_test.append(features)
clf = LogisticRegression(C=0.001,penalty='l2')
clf.fit_transform(X_train,y_train)
predictions = clf.predict(X_test)
print classification_report(y_test,predictions)
print 'precision:', precision_score(y_test,predictions)
print 'recall:', recall_score(y_test,predictions)
print 'accuracy:', accuracy_score(y_test,predictions)
示例5: test_predict_minibatch_dense_input
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch_dense_input():
mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, random_state=40).fit(X)
# sanity check: predict centroid labels
pred = mb_k_means.predict(mb_k_means.cluster_centers_)
assert_array_equal(pred, np.arange(n_clusters))
# sanity check: re-predict labeling for training set samples
pred = mb_k_means.predict(X)
assert_array_equal(mb_k_means.predict(X), mb_k_means.labels_)
示例6: test_predict_minibatch
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch(data, init):
mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init=init,
n_init=10, random_state=0).fit(data)
# sanity check: re-predict labeling for training set samples
assert_array_equal(mb_k_means.predict(data), mb_k_means.labels_)
# sanity check: predict centroid labels
pred = mb_k_means.predict(mb_k_means.cluster_centers_)
assert_array_equal(pred, np.arange(n_clusters))
示例7: __init__
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class Cluster:
def __init__(self):
self.train_file = os.path.join('data', 'sample')
def run_main(self):
self.load_data()
self.vectorize()
#KMeans - K++
print "KMeans - K++"
self.kmeans = KMeans(n_clusters=3, init='k-means++', n_init=10000)
self.train()
self.get_metrics()
#MiniBatchKMeans - K++
print "MiniBatchKMeans - K++"
self.kmeans = MiniBatchKMeans(n_clusters=3, init='k-means++', n_init=10000)
self.train()
self.get_metrics()
#KMeans - Random
print "KMeans - Random"
self.kmeans = KMeans(n_clusters=3, init='random', n_init=10000)
self.train()
self.get_metrics()
#MiniBatchKMeans - K++
print "MiniBatchKMeans - Random"
self.kmeans = MiniBatchKMeans(n_clusters=3, init='random', n_init=10000)
self.train()
self.get_metrics()
def load_data(self):
self.training_data = []
with open(self.train_file, 'r') as fd:
for line in fd.readlines():
self.training_data.append(line)
def vectorize(self):
self.vect = TfidfVectorizer(stop_words='english')
self.X = self.vect.fit_transform(self.training_data)
def train(self):
self.kmeans.fit(self.X)
def get_metrics(self):
print self.kmeans.labels_
def test(self):
self.test_data = ["I know both Ashok and Harini"]
self.Y = self.vect.fit_transform(self.test_data)
print self.kmeans.predict(self.Y)
示例8: test_predict_minibatch_random_init_sparse_input
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def test_predict_minibatch_random_init_sparse_input():
mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, init="random", n_init=10).fit(X_csr)
# sanity check: re-predict labeling for training set samples
assert_array_equal(mb_k_means.predict(X_csr), mb_k_means.labels_)
# sanity check: predict centroid labels
pred = mb_k_means.predict(mb_k_means.cluster_centers_)
assert_array_equal(pred, np.arange(n_clusters))
# check that models trained on sparse input also works for dense input at
# predict time
assert_array_equal(mb_k_means.predict(X), mb_k_means.labels_)
示例9: cluster_mbk
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def cluster_mbk(self):
mbk = MiniBatchKMeans(init='k-means++', n_clusters=40, batch_size=100,
n_init=100, max_no_improvement=10, verbose=0,
random_state=0)
mbk.fit(self.all_frames_xy)
clusters = mbk.predict(self.all_frames_xy)
return clusters
示例10: DocDescriptor
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class DocDescriptor(object):
def __init__(self, word_descriptor, n_clusters = 1000):
self._n_clusters = n_clusters
self._cluster = MiniBatchKMeans(n_clusters=n_clusters,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
self._word_descriptor = word_descriptor
def get_word_descriptor(self, img):
X = get_features_from_image(img)
words = []
for i in X:
words.append(self._word_descriptor.transform(i))
return words
def partial_fit(self, img):
X = self.get_word_descriptor(img)
self._cluster.partial_fit(X)
def transform(self, img):
X = self.get_word_descriptor(img)
Y = self._cluster.predict(X)
desc = [0]*self._n_clusters
unit = 1.0/self._n_clusters
for i in range(0, len(Y)):
desc[Y[i]] += unit
return desc
示例11: KMeansFeatureTransformer
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
class KMeansFeatureTransformer(object):
def __init__(self, patches, k=1500, model_path=None):
self.mean_distances = None
if model_path is None:
self.k_means = MiniBatchKMeans(n_clusters=k, compute_labels=False,
reassignment_ratio=0, max_no_improvement=10, batch_size=10000,
verbose=2)
self.k_means.fit(patches)
# update mean distances
self.compute_mean_distances(patches)
else:
self.load(model_path)
def transform(self, patches):
return self.k_means.transform(patches)
def predict(self, patches):
return self.k_means.predict(patches)
def compute_mean_distances(self, patches):
self.mean_distances = np.mean(self.k_means.transform(patches), axis=0)
def save(self, file_path='model/k_means_model'):
joblib.dump(self.k_means, file_path)
def load(self, file_path):
self.k_means = joblib.load(file_path)
示例12: processAttributes_surf
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def processAttributes_surf(filePattern):
targets_data = []
surf_features = []
counter = 0
for f in glob.glob(filePattern):
counter+=1
print 'Reading image: ', counter, f
target = 1 if 'cat' in f else 0
targets_data.append(target)
image = mh.imread(f, as_grey=True)
surf_features.append(surf.surf(image)[:, 5:])
X_train_surf_features = np.concatenate(surf_features)
# Clusters
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
x_data = []
for instance in surf_features:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters-len(features))))
x_data.append(features)
return x_data, targets_data
示例13: train
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def train(X,vectorizer,true_k,minibatch = False,showLable = False):
#使用minibatch采样数据/所有原始数据训练k-means
if minibatch:
km = MiniBatchKMeans(n_clusters=true_k, init='k-means++', n_init=1,
init_size=1000, batch_size=1000, verbose=False)
else:
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=300, n_init=2,n_jobs=-2,
verbose=False)
km.fit(X)
if showLable:
print("Top terms per cluster:")
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
# print (vectorizer.get_stop_words())
for i in range(true_k):
print("Cluster %d:" % i, end='')
o.write("Cluster %d:" % i,)
for ind in order_centroids[i, :3]:
print(' %s' % terms[ind], end='')
o.write(' %s' % terms[ind].encode('utf-8'))
print()
o.write('\n')
result = list(km.predict(X))
# print ('Cluster distribution:')
# print (dict([(i, result.count(i)) for i in result]))
return -km.score(X)
示例14: big_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
single_pass=True):
"""k-means for very large sets of documents.
See kmeans for documentation. Differs from that function in that it does
not computer tf-idf or LSA, and fetches the documents in a streaming
fashion, so they don't need to be held in memory. It does not do random
restarts.
If the option single_pass is set to False, the documents are visited
twice: once to fit a k-means model, once to determine their label in
this model.
"""
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.text import HashingVectorizer
v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
km = MiniBatchKMeans(n_clusters=k)
labels = []
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
y = km.fit_predict(batch)
if single_pass:
labels.extend(y.tolist())
if not single_pass:
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
labels.extend(km.predict(batch).tolist())
return labels
示例15: big_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import predict [as 别名]
def big_kmeans(docs, k, batch_size=1000, n_features=(2 ** 20),
single_pass=True):
"""k-means for very large sets of documents.
"""
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.text import HashingVectorizer
v = HashingVectorizer(input="content", n_features=n_features, norm="l2")
km = MiniBatchKMeans(n_clusters=k)
labels = []
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
y = km.fit_predict(batch)
if single_pass:
labels.extend(y.tolist())
if not single_pass:
for batch in batches(docs, batch_size):
batch = map(fetch, docs)
batch = v.transform(batch)
labels.extend(km.predict(batch).tolist())
return labels