本文整理汇总了Python中sklearn.cluster.KMeans.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.get_params方法的具体用法?Python KMeans.get_params怎么用?Python KMeans.get_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.KMeans
的用法示例。
在下文中一共展示了KMeans.get_params方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: range
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
for idx in range(0,max_records):
print "processing dog."+str(idx)+".jpg\n"
img = cv2.imread('train/train/dog.'+str(idx)+'.jpg')
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
kp = sift.detect(gray,None)
tkp, td = sift.compute(gray, kp)
temp_points = []
for k in tkp:
tuples = (int(math.ceil(k.pt[0])),int(math.ceil(k.pt[1])))
points.append(tuples)
temp_points.append(tuples)
dog_features[idx] = temp_points
kmeans = KMeans()
kmeans = kmeans.fit(points)
params = kmeans.get_params()
n_clusters = params["n_clusters"]
overall_feats = []
count = 1
for cats in cat_features:
print "Record-->"+str(count)
clusters = kmeans.predict(cat_features[cats])
print clusters
feats = []
for i in range(0,n_clusters):
feats.append(0)
feats.append(0)
for num in clusters:
feats[num] = feats[num]+1
overall_feats.append(feats)
示例2: get_x_y_data
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
import numpy as np
from util import get_x_y_data
from sklearn.cluster import KMeans
TEST_DATA_ROWS = 20
# class sklearn.cluster.KMeans
# (n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1)
x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data()
# no duplicate value, so reverse this dictionary
int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys()))
kmeans = KMeans(n_clusters=zone_cnt) # a,b,c,d,e 5 centor
kmeans.fit(x_data)
print kmeans.get_params()
# centers
print kmeans.cluster_centers_
# every lable for cluster
print kmeans.labels_
# the smaller inertia is, the better the classifier works
print kmeans.inertia_
indices = np.random.permutation(len(x_data))
x_test = x_data[indices[-TEST_DATA_ROWS:]]
x_distance = kmeans.transform(x_test)
test_result = kmeans.predict(x_test) # test
for type, dis in zip(test_result, x_distance):
示例3: editor_input_clustering
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
def editor_input_clustering(filtered_editor_log, code_template, user_info, ankors):
def _unicode(c):
if u'\u4e00' <= c <= u'\u9fff':
return False
try:
c.decode('ascii')
except UnicodeDecodeError:
return False
except UnicodeEncodeError:
return False
return True
editor_cmd_data = filtered_editor_log.map(lambda x: x.filter_editor_log(['insert', 'remove', 'paste', 'copy', 'save', 'open'])).map(lambda x: x.combine_editor_input())
insert_data = editor_cmd_data.flatmap(lambda x: x.cmd_list).filter_by(lambda x: x['action']==u'insert').map(lambda x: x['lines'][0])
template_filtered_data = editor_cmd_data.flatmap(lambda x: x.cmd_list).filter_by(lambda x: x['action']==u'paste').map(lambda x: x['text'])
template_filtered_data = template_filtered_data.map(lambda x: code_template.strip_template(x))
total_input = data_reader.SList(insert_data + template_filtered_data.flatmap(lambda x: x.split(u"\n")))
total_input = total_input.filter_by(lambda x: len(filter(lambda y: not y in [u"\n", u"\t", u"\r", u" "] and _unicode(y), x))>5)
print len(total_input)
feature_set, ankor_set = _generate_feature_set(total_input,ankors.splitter)
print len(feature_set)
# pca = PCA(n_components=2)
# pca.fit(feature_set)
# plot_data = pca.transform(feature_set)
# fig, ax = report_tools.prepare_plot()
# ax.scatter([item[0] for item in plot_data], [item[1] for item in plot_data])
# plt.title('Scatter plot on editor input')
# plt.savefig('scatter_editor_input.png')
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# ax.scatter([item[0] for item in plot_data], [item[1] for item in plot_data], [item[2] for item in plot_data])
# plt.title('Scatter plot on editor input')
# plt.savefig('3d_scatter_editor_input.png')
# db = Birch().fit(feature_set)
# labels = db.labels_
model = KMeans(n_clusters=300)
labels = model.fit_predict(feature_set)
result = zip(labels, total_input)
size_list = []
cluster_list = []
print len(set(labels))
for label in set(labels):
tmp_result = filter(lambda x: x[0]==label, result)
if len(tmp_result) > 100:
size_list.append(len(tmp_result))
cluster_list.append(label)
with codecs.open("clustering_{}.txt".format(label), 'w', 'utf-8') as f_out:
f_out.write(u"Size of cluster: {}\n".format(len(tmp_result)))
for item in tmp_result:
f_out.write(u"{}\n".format(item[1]))
fig, ax = report_tools.prepare_plot(figsize=(20, 5))
ind = np.arange(len(size_list))
width = 0.5
ax.bar(ind, size_list, width)
ax.set_xticks(ind+width)
ax.set_xticklabels(['C{}'.format(i) for i in cluster_list], rotation='90')
plt.title('Cluster size')
plt.savefig('cluster_size.png')
ankor_label = model.predict(ankor_set)
with open('ankor_label.txt', 'w') as f_out:
for item in zip(ankors.splitter, ankor_label):
f_out.write("{}\n{}\n\n".format(item[0], item[1]))
with open('model.json', 'w') as f_out:
json.dump(model.get_params(), f_out)
示例4: KMeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
print "KMeans: F1 score on test: {}".format(f1_score(labels_test, pred_labels_test))
### Task 5: Tune your classifier to achieve better than .3 precision and recall
### using our testing script.
# set up a classifier and grid parameters
base_clf = KMeans()
parameters = {'random_state' : [42],
'n_clusters' : [4, 8, 16],
'max_iter' : [300, 1000, 10000],
'init' : ['k-means++', 'random'] }
# do the grid search and print results
print "KMeans Grid search ..."
from sklearn import grid_search
gs_clf = grid_search.GridSearchCV(base_clf, parameters, scoring='f1')
gs_clf.fit(features_train, labels_train)
clf = gs_clf.best_estimator_
best_parameters = clf.get_params()
print "Best score: {:0.3f} with parameters:".format(gs_clf.best_score_)
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
test_classifier(clf, my_dataset, features_list)
### Dump your classifier, dataset, and features_list so
### anyone can run/check your results.
dump_classifier_and_data(clf, my_dataset, features_list)
示例5: CodeBook
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
#.........这里部分代码省略.........
# we should refactor the input validation.
#
# X = self._check_fit_data(X)
# return self.fit(X)._transform(X)
raise NotImplementedError
def transform(self, X, y=None):
"""Transform X to a cluster-distance space.
In the new space, each dimension is the distance to the cluster
centers. Note that even if X is sparse, the array returned by
`transform` will typically be dense.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
New data to transform.
Returns
-------
X_new : array, shape [n_samples, k]
X transformed in the new space.
"""
# check_is_fitted(self, 'cook_book_')
# X = self._check_test_data(X)
# return self._transform(X)
raise NotImplementedError
def _transform(self, X):
"""guts of transform method; no input validation"""
# return euclidean_distances(X, self.cook_book_)
raise NotImplementedError
def predict(self, X):
"""Predicts the index value of the closest word within the code book.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
New data to predict.
Returns
-------
labels : array, shape [n_samples,]
Index of the closest word within the code book.
"""
return self.cluster_core.predict(X)
def get_dictionary(self):
"""Retrieves the words forming the code book
Returns
-------
dictionary : array, shape [n_words, n_features]
Code book elements (words of the dictionary) represented
in the feature space
"""
#TODO: check that the coodebook is fitted
return self.cluster_core.cluster_centers_
def get_BoF_descriptor(self, X):
# norm = lambda x: x.astype(float)/np.linalg.norm(x)
# return norm(np.bincount(self.predict(X)))
return np.histogram(self.predict(X),
bins=range(self.n_words+1),
density=True)
def get_BoF_pramide_descriptor(self, X):
""" Split the image (or volume) in a piramide manner and get
a descriptor for each level (and part). Concatenate the output.
TODO: build proper documentaiton
"""
def split_data_by2(X):
# TODO: rewrite this in a nice manner that uses len(X.shape)
# TODO: this can rise ERROR if length of X is odd
parts = [np.split(x, 2, axis=2) for x in [np.split(x, 2, axis=1) for x in
np.slit(X, 2, axis=0) ]]
return parts
def get_occurrences(X):
return np.histogram(X, bins=range(self.n_words+1))
def build_piramide(X, level=2):
if level is 0:
return get_occurrences(X)
else:
return [get_occurrences(X)] + [build_piramide(Xpart, level-1)
for Xpart in split_data_by2(X)]
return build_piramide(self.predict(X))
def get_params(self, deep=True):
return self.cluster_core.get_params()
def set_params(self, **params):
self.cluster_core.set_params(**params)
示例6: nab_and_format_bispec
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
def nab_and_format_bispec(fn, clfdict, expected_struct, clffull, get_cats=False, need_PCA=False, full_clf=True):
df = pd.read_csv(fn, skiprows=8, delim_whitespace=True, low_memory=False)
# get rid of the bogus first columns
cols = df.columns[2::]
df.drop(df.columns[-2::], 1, inplace=True)
df.columns = cols
tdata = df
# Need the stats for each bispec component
print "\nre-normalizing data..."
for x in tdata.columns[5::]:
m = pickle.load(open("./data_stats/" + str(x) + "_m.p", "rb"))
s = pickle.load(open("./data_stats/" + str(x) + "_s.p", "rb"))
tdata[x] = tdata[x].map(norm(s, m))
if need_PCA == True:
# now we transform it using the previously trained PCA
print "\nloading in PCA..."
pca = pickle.load(open("pca.p", "rb"))
trans_values = pca.transform(tdata[tdata.columns[5::]].values)
else:
trans_values = tdata[tdata.columns[5::]].values
if get_cats == True:
KM = KMeans(n_clusters=2)
print "\n Separating into " + str(KM.get_params()["n_clusters"]) + " parts, and removing surface atoms."
nonsurf = remove_surface_atoms(df)
atom_cats = np.zeros(len(trans_values))
atom_cats[nonsurf] = KM.fit_predict(trans_values[nonsurf])
return df, atom_cats
# make_output(fn,df,atom_cats)
classdict = pickle.load(open("classdict.p", "rb"))
print "\n making prediction..."
if full_clf == False:
predictions = {}
values = {}
for k in classdict.keys():
# values[k]=clfdict[k].predict(trans_values)
predictions[k] = clfdict[k].predict_proba(trans_values)
out = np.zeros(len(trans_values))
# need the maximum likelyhood defect (could be bulk if all are small)
for x in range(len(trans_values)):
poss_def = np.zeros(len(classdict))
for k in classdict.keys():
if k > 9:
p = 1
else:
p = 0
if predictions[k][x][p] >= 0.60 and classdict[k].find(expected_struct) != -1:
poss_def[k] = predictions[k][x][p]
if sum(poss_def) > 0:
out[x] = np.array(poss_def).argmax()
else:
out[x] = -1
else: # default full clf structure
probs = clffull.predict_proba(trans_values)
out = []
for p in probs:
# if the difference is less than 5%, and one option is a defect, take the defect!
mp = np.array(p).argmax()
"""
if max(p) - second_largest(p) <= 0.05 and (np.array(p).argmax() == 9 or np.array(p).argmax() == 10):
mp = (np.array(p)==second_largest(p)).argmax()
else:
mp = np.array(p).argmax()
"""
out.append(mp)
predictions = probs
return df, out, trans_values, tdata, predictions, classdict