当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.get_params方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.get_params方法的具体用法?Python KMeans.get_params怎么用?Python KMeans.get_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.get_params方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: range

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
for idx in range(0,max_records):
	print "processing dog."+str(idx)+".jpg\n"
	img = cv2.imread('train/train/dog.'+str(idx)+'.jpg')
	gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
	kp = sift.detect(gray,None)
	tkp, td = sift.compute(gray, kp)
	temp_points = []
	for k in tkp:
		tuples = (int(math.ceil(k.pt[0])),int(math.ceil(k.pt[1])))
		points.append(tuples)
		temp_points.append(tuples)
	dog_features[idx] = temp_points

kmeans = KMeans()
kmeans = kmeans.fit(points)
params = kmeans.get_params()
n_clusters = params["n_clusters"]

overall_feats = []
count = 1
for cats in cat_features:
	print "Record-->"+str(count)
	clusters = kmeans.predict(cat_features[cats])
	print clusters
	feats = []
	for i in range(0,n_clusters):
		feats.append(0)
	feats.append(0)
	for num in clusters:
		feats[num] = feats[num]+1
	overall_feats.append(feats)
开发者ID:ram1988,项目名称:dog_cat,代码行数:33,代码来源:extract_image_features.py

示例2: get_x_y_data

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
import numpy as np
from util import get_x_y_data
from sklearn.cluster import KMeans

TEST_DATA_ROWS = 20

# class sklearn.cluster.KMeans
# (n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1)

x_data, y_data, zone_cnt, zone_int_dict = get_x_y_data()
# no duplicate value, so reverse this dictionary
int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys()))

kmeans = KMeans(n_clusters=zone_cnt)  # a,b,c,d,e  5 centor
kmeans.fit(x_data)
print kmeans.get_params()
# centers
print kmeans.cluster_centers_

# every lable for cluster
print kmeans.labels_

# the smaller inertia is, the better the classifier works
print kmeans.inertia_

indices = np.random.permutation(len(x_data))
x_test = x_data[indices[-TEST_DATA_ROWS:]]
x_distance = kmeans.transform(x_test)
test_result = kmeans.predict(x_test)  # test

for type, dis in zip(test_result, x_distance):
开发者ID:AloneGu,项目名称:ml_algo_box,代码行数:33,代码来源:kmeans.py

示例3: editor_input_clustering

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
def editor_input_clustering(filtered_editor_log, code_template, user_info, ankors):
    def _unicode(c):
        if u'\u4e00' <= c <= u'\u9fff':
            return False
        try:
            c.decode('ascii')
        except UnicodeDecodeError:
            return False
        except UnicodeEncodeError:
            return False
        return True
    editor_cmd_data = filtered_editor_log.map(lambda x: x.filter_editor_log(['insert', 'remove', 'paste', 'copy', 'save', 'open'])).map(lambda x: x.combine_editor_input())
    insert_data = editor_cmd_data.flatmap(lambda x: x.cmd_list).filter_by(lambda x: x['action']==u'insert').map(lambda x: x['lines'][0])
    template_filtered_data = editor_cmd_data.flatmap(lambda x: x.cmd_list).filter_by(lambda x: x['action']==u'paste').map(lambda x: x['text'])
    template_filtered_data = template_filtered_data.map(lambda x: code_template.strip_template(x))
    total_input = data_reader.SList(insert_data + template_filtered_data.flatmap(lambda x: x.split(u"\n")))
    total_input = total_input.filter_by(lambda x: len(filter(lambda y: not y in [u"\n", u"\t", u"\r", u" "] and _unicode(y), x))>5)
    print len(total_input)
    feature_set, ankor_set = _generate_feature_set(total_input,ankors.splitter)
    print len(feature_set)
    # pca = PCA(n_components=2)
    # pca.fit(feature_set)
    # plot_data = pca.transform(feature_set)

    # fig, ax = report_tools.prepare_plot()
    # ax.scatter([item[0] for item in plot_data], [item[1] for item in plot_data])
    # plt.title('Scatter plot on editor input')
    # plt.savefig('scatter_editor_input.png')

    # fig = plt.figure()
    # ax = fig.add_subplot(111, projection='3d')
    # ax.scatter([item[0] for item in plot_data], [item[1] for item in plot_data], [item[2] for item in plot_data])
    # plt.title('Scatter plot on editor input')
    # plt.savefig('3d_scatter_editor_input.png')

    # db = Birch().fit(feature_set)
    # labels = db.labels_
    model = KMeans(n_clusters=300)
    labels = model.fit_predict(feature_set)
    result  = zip(labels, total_input)
    size_list = []
    cluster_list = []
    print len(set(labels))
    for label in set(labels):
        tmp_result = filter(lambda x: x[0]==label, result)
        if len(tmp_result) > 100:
            size_list.append(len(tmp_result))
            cluster_list.append(label)
            with codecs.open("clustering_{}.txt".format(label), 'w', 'utf-8') as f_out:
                f_out.write(u"Size of cluster: {}\n".format(len(tmp_result)))
                for item in tmp_result:
                    f_out.write(u"{}\n".format(item[1]))
    fig, ax = report_tools.prepare_plot(figsize=(20, 5))
    ind = np.arange(len(size_list))
    width = 0.5
    ax.bar(ind, size_list, width)
    ax.set_xticks(ind+width)
    ax.set_xticklabels(['C{}'.format(i) for i in cluster_list], rotation='90')
    plt.title('Cluster size')
    plt.savefig('cluster_size.png')

    ankor_label = model.predict(ankor_set)
    with open('ankor_label.txt', 'w') as f_out:
        for item in zip(ankors.splitter, ankor_label):
            f_out.write("{}\n{}\n\n".format(item[0], item[1]))

    with open('model.json', 'w') as f_out:
        json.dump(model.get_params(), f_out)
开发者ID:JoeJ11,项目名称:log_analyzer,代码行数:70,代码来源:analyzer.py

示例4: KMeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
print "KMeans: F1 score on test: {}".format(f1_score(labels_test, pred_labels_test))
 
### Task 5: Tune your classifier to achieve better than .3 precision and recall 
### using our testing script.

# set up a classifier and grid parameters
base_clf  = KMeans()
parameters = {'random_state' : [42], 
              'n_clusters'   : [4, 8, 16],
              'max_iter'     : [300, 1000, 10000],
              'init'         : ['k-means++', 'random'] }

# do the grid search and print results
print "KMeans Grid search ..."
from sklearn import grid_search
gs_clf = grid_search.GridSearchCV(base_clf, parameters, scoring='f1')
gs_clf.fit(features_train, labels_train)
clf =  gs_clf.best_estimator_
best_parameters = clf.get_params()
print "Best score: {:0.3f} with parameters:".format(gs_clf.best_score_)
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))  

test_classifier(clf, my_dataset, features_list)

### Dump your classifier, dataset, and features_list so 
### anyone can run/check your results.

dump_classifier_and_data(clf, my_dataset, features_list)

开发者ID:aglavers,项目名称:identifying-fraud-from-enron-email,代码行数:31,代码来源:poi_id.py

示例5: CodeBook

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]

#.........这里部分代码省略.........
        # we should refactor the input validation.
        # 
        # X = self._check_fit_data(X)
        # return self.fit(X)._transform(X)
        raise NotImplementedError

    def transform(self, X, y=None):
        """Transform X to a cluster-distance space.

        In the new space, each dimension is the distance to the cluster
        centers.  Note that even if X is sparse, the array returned by
        `transform` will typically be dense.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            New data to transform.

        Returns
        -------
        X_new : array, shape [n_samples, k]
            X transformed in the new space.
        """
        # check_is_fitted(self, 'cook_book_')

        # X = self._check_test_data(X)
        # return self._transform(X)
        raise NotImplementedError

    def _transform(self, X):
        """guts of transform method; no input validation"""
        # return euclidean_distances(X, self.cook_book_)
        raise NotImplementedError


    def predict(self, X):
        """Predicts the index value of the closest word within the code book.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            New data to predict.

        Returns
        -------
        labels : array, shape [n_samples,]
            Index of the closest word within the code book.
        """
        return self.cluster_core.predict(X)

    def get_dictionary(self):
        """Retrieves the words forming the code book

        Returns
        -------
        dictionary : array, shape [n_words, n_features]
            Code book elements (words of the dictionary) represented
            in the feature space
        """
        #TODO: check that the coodebook is fitted
        return self.cluster_core.cluster_centers_

    def get_BoF_descriptor(self, X):

        # norm = lambda x: x.astype(float)/np.linalg.norm(x)
        # return norm(np.bincount(self.predict(X)))
        return np.histogram(self.predict(X),
                            bins=range(self.n_words+1),
                            density=True)

    def get_BoF_pramide_descriptor(self, X):
        """ Split the image (or volume) in a piramide manner and get
        a descriptor for each level (and part). Concatenate the output.
        TODO: build proper documentaiton

        """
        def split_data_by2(X):
            # TODO: rewrite this in a nice manner that uses len(X.shape)
            # TODO: this can rise ERROR if length of X is odd
            parts = [np.split(x, 2, axis=2) for x in [np.split(x, 2, axis=1) for x in
             np.slit(X, 2, axis=0) ]]
            return parts

        def get_occurrences(X):
            return np.histogram(X, bins=range(self.n_words+1))

        def build_piramide(X, level=2):
            if level is 0:
                return get_occurrences(X)
            else:
                return [get_occurrences(X)] + [build_piramide(Xpart, level-1)
                       for Xpart in split_data_by2(X)]

        return build_piramide(self.predict(X))

    def get_params(self, deep=True):
        return self.cluster_core.get_params()

    def set_params(self, **params):
        self.cluster_core.set_params(**params)
开发者ID:glemaitre,项目名称:protoclass,代码行数:104,代码来源:codebook.py

示例6: nab_and_format_bispec

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import get_params [as 别名]
def nab_and_format_bispec(fn, clfdict, expected_struct, clffull, get_cats=False, need_PCA=False, full_clf=True):
    df = pd.read_csv(fn, skiprows=8, delim_whitespace=True, low_memory=False)
    # get rid of the bogus first columns
    cols = df.columns[2::]
    df.drop(df.columns[-2::], 1, inplace=True)
    df.columns = cols

    tdata = df

    # Need the stats for each bispec component
    print "\nre-normalizing data..."
    for x in tdata.columns[5::]:
        m = pickle.load(open("./data_stats/" + str(x) + "_m.p", "rb"))
        s = pickle.load(open("./data_stats/" + str(x) + "_s.p", "rb"))
        tdata[x] = tdata[x].map(norm(s, m))
    if need_PCA == True:
        # now we transform it using the previously trained PCA
        print "\nloading in PCA..."
        pca = pickle.load(open("pca.p", "rb"))
        trans_values = pca.transform(tdata[tdata.columns[5::]].values)
    else:
        trans_values = tdata[tdata.columns[5::]].values

    if get_cats == True:
        KM = KMeans(n_clusters=2)
        print "\n Separating into " + str(KM.get_params()["n_clusters"]) + " parts, and removing surface atoms."
        nonsurf = remove_surface_atoms(df)
        atom_cats = np.zeros(len(trans_values))
        atom_cats[nonsurf] = KM.fit_predict(trans_values[nonsurf])
        return df, atom_cats
        # make_output(fn,df,atom_cats)

    classdict = pickle.load(open("classdict.p", "rb"))

    print "\n making prediction..."
    if full_clf == False:
        predictions = {}
        values = {}
        for k in classdict.keys():
            # values[k]=clfdict[k].predict(trans_values)
            predictions[k] = clfdict[k].predict_proba(trans_values)

        out = np.zeros(len(trans_values))
        # need the maximum likelyhood defect (could be bulk if all are small)
        for x in range(len(trans_values)):
            poss_def = np.zeros(len(classdict))
            for k in classdict.keys():
                if k > 9:
                    p = 1
                else:
                    p = 0
                if predictions[k][x][p] >= 0.60 and classdict[k].find(expected_struct) != -1:
                    poss_def[k] = predictions[k][x][p]
            if sum(poss_def) > 0:
                out[x] = np.array(poss_def).argmax()
            else:
                out[x] = -1
    else:  # default full clf structure
        probs = clffull.predict_proba(trans_values)
        out = []
        for p in probs:
            # if the difference is less than 5%, and one option is a defect, take the defect!
            mp = np.array(p).argmax()
            """			
			if max(p) - second_largest(p) <= 0.05 and (np.array(p).argmax() == 9 or np.array(p).argmax() == 10): 
				mp = (np.array(p)==second_largest(p)).argmax()
			else:
				mp = np.array(p).argmax()
			"""
            out.append(mp)

        predictions = probs
    return df, out, trans_values, tdata, predictions, classdict
开发者ID:Sandy4321,项目名称:BiDef,代码行数:75,代码来源:identify.py


注:本文中的sklearn.cluster.KMeans.get_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。