当前位置: 首页>>代码示例>>Python>>正文


Python SpectralCoclustering.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.bicluster.SpectralCoclustering.fit方法的典型用法代码示例。如果您正苦于以下问题:Python SpectralCoclustering.fit方法的具体用法?Python SpectralCoclustering.fit怎么用?Python SpectralCoclustering.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.bicluster.SpectralCoclustering的用法示例。


在下文中一共展示了SpectralCoclustering.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_disjoint_biclusters

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
    def find_disjoint_biclusters(self, biclusters_number=50):
        data = np.asarray_chkfinite(self.matrix)
        data[data == 0] = 0.000001
        coclustering = SpectralCoclustering(n_clusters=biclusters_number, random_state=0)
        coclustering.fit(data)

        biclusters = set()
        for i in range(biclusters_number):
            rows, columns = coclustering.get_indices(i)
            row_set = set(rows)
            columns_set = set(columns)
            if len(row_set) > 0 and len(columns_set) > 0:
                density = self._calculate_box_cluster_density(row_set, columns_set)
                odd_columns = set()
                for column in columns_set:
                    col_density = self._calculate_column_density(column, row_set)
                    if col_density < density / 4:
                        odd_columns.add(column)
                columns_set.difference_update(odd_columns)
                if len(columns_set) == 0:
                    continue

                odd_rows = set()
                for row in row_set:
                    row_density = self._calculate_row_density(row, columns_set)
                    if row_density < density / 4:
                        odd_rows.add(row)
                row_set.difference_update(odd_rows)

                if len(row_set) > 0 and len(columns_set) > 0:
                    density = self._calculate_box_cluster_density(row_set, columns_set)
                    biclusters.add(Bicluster(row_set, columns_set, density))

        return biclusters
开发者ID:luntos,项目名称:bianalyzer,代码行数:36,代码来源:spectral_coclustering.py

示例2: main

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
    origin = open('10k.txt', 'r')

    lines = origin.readlines()

    x = []
    label = []

    for l in lines:
        l = l.split(',')
        ip1 = l[2].split('.')
        ip2 = l[3].split('.')
        d = [datetime.fromtimestamp(int(l[1][0:11])).hour, int("%02x%02x%02x%02x"%(int(ip1[0]),int(ip1[1]),int(ip1[2]),int(ip1[3])),16), int("%02x%02x%02x%02x" % (int(ip2[0]),int(ip2[1]),int(ip2[2]),int(ip2[3])),16)] + l[4:6] + l[7:10]
        x.append(d)

    data = np.array(x, dtype='float32')

    model = SpectralCoclustering(n_clusters=5)
    model.fit(data)

    print model.rows_

    for i in range(5):
        print "Cluster" + str(i) + ':'
        for j in range(10000):
            if model.rows_[i][j]:
                print j,
        print ' '
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:30,代码来源:niara_cluster.py

示例3: print_similarity_matrix

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def print_similarity_matrix(sphns, model, model2=None):
    print "      ",
    for phn1 in sphns:
        print phn1, " ",
    print ""
    m = np.ndarray((len(sphns), len(sphns)), dtype=np.float32)
    for i, phn1 in enumerate(sphns):
        print phn1.ljust(4) + ":",
        for j, phn2 in enumerate(sphns):
            sim = model.similarity(phn1, phn2)
            if model2 != None:
                sim -= model2.similarity(phn1, phn2)
            print "%0.2f" % sim,
            m[i][j] = sim
        print ""
    phn_order = [phn for phn in sphns]

    if BICLUSTER:
        #model = SpectralBiclustering(n_clusters=4, method='log',
        model = SpectralCoclustering(n_clusters=n_clusters,
                                             random_state=0)
        model.fit(m)
        print "INDICES:",
        indices = [model.get_indices(i) for i in xrange(n_clusters)]
        print indices
        tmp = []
        for i in xrange(n_clusters):
            tmp.extend([phn_order[indices[i][0][j]] for j in xrange(len(indices[i][0]))])
        phn_order = tmp
        fit_data = m[np.argsort(model.row_labels_)]
        fit_data = fit_data[:, np.argsort(model.column_labels_)]
        m = fit_data

    return phn_order, m
开发者ID:cequencer,项目名称:speech_embeddings,代码行数:36,代码来源:train_word2vec.py

示例4: test_spectral_coclustering

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def test_spectral_coclustering():
    """Test Dhillon's Spectral CoClustering on a simple problem."""
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
开发者ID:ChicoQ,项目名称:scikit-learn,代码行数:27,代码来源:test_spectral.py

示例5: biclustering

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input,num_clusters):
	global agent1_dict
	data = np.matrix(input)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) 
	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:12,代码来源:music_gt_2_a2.py

示例6: main

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
    origin = open('kddcup.txt', 'r')

    lines = origin.readlines()

    x = []
    label = []

    for l in lines:
        l = l.split(',')
        d = l[0:1] + l[4:19] + l[21:-1]
        label.append(l[-1])
        x.append(d)

    data = np.array(x, dtype='float32')

    model = SpectralCoclustering(n_clusters=5)
    model.fit(data)

    evaluation = []

    draw_n_x = []
    draw_n_y = []
    draw_a_x = []
    draw_a_y = []

    for cluster in model.rows_:
        normal = 0.0
        attack = 0.0
        graph_x = []
        graph_y = []
        for idx in range(len(cluster)):
            if cluster[idx]:
                if label[idx] == 'normal.\n':
                    normal += 1
                else:
                    attack += 1
                graph_x.append(data[27])
                graph_y.append(data[30])
        evaluation.append(normal / (normal + attack))

        if normal > attack:
            draw_n_x += graph_x
            draw_n_y += graph_y
        else:
            draw_a_x += graph_x
            draw_a_y += graph_y

    pl.plot(draw_n_x, draw_n_y, 'ro')
    pl.plot(draw_a_x, draw_a_y, 'go')

    print evaluation
    pl.show()
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:55,代码来源:cluster_data.py

示例7: biclustering

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(data,num_clusters):
	clusters = {}
	data = np.asmatrix(data)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
	#model = SpectralBiclustering(n_clusters=num_clusters)
	model.fit(data)
	for c in range(num_clusters):
		clusters[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	#fit_data = data[np.argsort(model.row_labels_)]
	#fit_data = fit_data[:, np.argsort(model.column_labels_)]
	#plot(fit_data)
	return clusters
开发者ID:sneha6791,项目名称:Thesis,代码行数:14,代码来源:current_working.py

示例8: biclustering

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input,num_clusters):
	global agent1_dict
	data = np.matrix(input)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) 
	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	fit_data = data[np.argsort(model.row_labels_)]
	fit_data = fit_data[:, np.argsort(model.column_labels_)]
	plot(fit_data)
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:15,代码来源:music_gt_1.py

示例9: cluster_data

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def cluster_data(flavors, whisky):
    corr_whisky = pd.DataFrame.corr(flavors.transpose())
    model = SpectralCoclustering(n_clusters=6, random_state=0)
    model.fit(corr_whisky)
    whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
    whisky = whisky.ix[np.argsort(model.row_labels_)]
    whisky = whisky.reset_index(drop=True)
    correlation = pd.DataFrame.corr(whisky.iloc[:, 2:14].transpose())
    correlation = np.array(correlation)
    # print(np.sum(model.rows_, axis=1))
    # print(np.sum(model.rows_, axis=0))
    # print(model.row_labels_)
    # print(correlation)
    plot_correlations(correlation)
开发者ID:piotrbla,项目名称:pyExamples,代码行数:16,代码来源:pandas_test.py

示例10: plot_coclusters_raw_data

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def plot_coclusters_raw_data(time_ms, t=False):
    # take the transpose of sliced matrix
    if t:
        channels_data = slice_matrix(matrix, time_ms)
    else:
        channels_data = slice_matrix(matrix, time_ms)
    print len(channels_data), len(channels_data[1])
    z_score = stats.zscore(channels_data)
    plt.title('Z Score Biclustering Over %i ms' % time_ms)
    spectral_model = SpectralCoclustering()
    spectral_model.fit(z_score)
    fit_data = z_score[np.argsort(spectral_model.row_labels_)]
    fit_data = fit_data[:, np.argsort(spectral_model.column_labels_)]
    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.savefig('z_score_raw_coclustering_all_ts_%i_T_%s.svg' % (time_ms, str(t)))
开发者ID:exp0nge,项目名称:eeg-viz,代码行数:17,代码来源:z_score.py

示例11: cocluster

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
 def cocluster(self, mx, blockdiag=False):
     logging.info('Co-clustering Tade..')
     if blockdiag:
         logging.info('blockdiag')
         clusser = SpectralCoclustering(n_jobs=-1)
     else: # checkerboard
         logging.info('checkerboard')
         clusser = SpectralBiclustering(n_jobs=-1, n_clusters=(4,3))
         #n_clusters=3, svd_method='randomized',
     clusser.fit(mx)
     logging.info('Argsorting mx rows..')
     mx = mx[np.argsort(clusser.row_labels_)]
     self.prev = self.prev[np.argsort(clusser.row_labels_)]
     logging.info('Argsorting mx cases..')
     mx = mx[:, np.argsort(clusser.column_labels_)]
     self.case = self.case[np.argsort(clusser.column_labels_)]
     return mx
开发者ID:makrai,项目名称:misc,代码行数:19,代码来源:coclust_tade.py

示例12: main

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
    files = [DATA_DIR + file for file in os.listdir(DATA_DIR) if fnmatch.fnmatch(file, '*.csv')]

    for i in files:
        print('processing', i, '...')
        table = get_data(i)
        cl = SpectralCoclustering(n_clusters=2, random_state=0)
        cl.fit(table)

        # using http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
        fit_data = table[np.argsort(cl.row_labels_)]
        fit_data = fit_data[:, np.argsort(cl.column_labels_)]

        plt.matshow(fit_data, cmap=plt.cm.Reds)
        plt.title(i[len(DATA_DIR):])
        # plt.show()
        plt.savefig(i[len(DATA_DIR):-4] + '.pdf')
开发者ID:danielgeng,项目名称:cs249_data_science,代码行数:19,代码来源:biclustering.py

示例13: main

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main(model):
    store = pd.HDFStore(model)
    
    from_ = store['from_'][0][0]
    to = store['to'][0][0]
    assert from_ == 0
    
    trace_fpath = store['trace_fpath'][0][0]
    Theta_zh = store['Theta_zh'].values
    Psi_oz = store['Psi_sz'].values
    count_z = store['count_z'].values[:, 0]

    Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
    Psi_zo = (Psi_oz * count_z).T
    Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
    obj2id = dict(store['source2id'].values)
    hyper2id = dict(store['hyper2id'].values)
    id2obj = dict((v, k) for k, v in obj2id.items())

    ZtZ = Psi_zo.dot(Psi_oz)
    ZtZ = ZtZ / ZtZ.sum(axis=0)
    L = ZtZ
    #ZtZ[ZtZ < (ZtZ.mean())] = 0
    L[ZtZ >= 1.0 / (len(ZtZ))] = 1
    L[L != 1] = 0

    colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
    print(colormap)
    canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
            colorshow=False, tlabel="To z'", llabel="From")[0]
    #canvas.axes(ylabel='From z', xlabel='To z\'')
    toyplot.pdf.render(canvas, 'tmat.pdf')

    model = SpectralCoclustering(n_clusters=3)
    model.fit(L)
    fit_data = L[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]
    canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
            colorshow=False)[0]
    toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
    
    #AtA = Psi_oz.dot(Psi_zo)
    #np.fill_diagonal(AtA, 0)
    #AtA = AtA / AtA.sum(axis=0)

    store.close()
开发者ID:flaviovdf,项目名称:tribeflow,代码行数:48,代码来源:tmat-toyplot.py

示例14: biclustering

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input_list,num_clusters):
	global agent1_dict
	#clustering agent 1
	data = np.matrix(input_list)
	#plot(data)#original data
	
	#model = SpectralBiclustering(n_clusters=num_clusters) #Biclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_biclustering.html#example-bicluster-plot-spectral-biclustering-py

	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) #Coclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html

	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	fit_data = data[np.argsort(model.row_labels_)]
	fit_data = fit_data[:, np.argsort(model.column_labels_)]
	plot(fit_data)
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:21,代码来源:music_gametheory.py

示例15: TfidfVectorizer

# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
listOfAbstracts = []
for paper in papers:
    if 'Abstract' in paper['MedlineCitation']['Article'].keys():
        listOfAbstracts.append(mergeAbstract(paper['MedlineCitation']['Article']['Abstract']['AbstractText']))

# Create TF-IDF matrix
vect = TfidfVectorizer(max_df = 1)
tfidf = vect.fit_transform(listOfAbstracts)



# Non-negative Matrix Factorization
num_topics = 2
num_top_words = 5
nmf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = nmf.fit_transform(tfidf)
topic_words = []
vocab = np.array(vect.get_feature_names())

for topic in nmf.components_:
    word_idx = np.argsort(topic)[::-1][0:num_top_words]
    topic_words.append([vocab[i] for i in word_idx])

# Coclustering
cocluster = SpectralCoclustering(n_clusters=5,svd_method='arpack', random_state=0)
cocluster.fit(tfidf)
y_cocluster = cocluster.row_labels_
x_cocluster = cocluster.column_labels_

# print(np.array(vect.get_feature_names())[x_cocluster == 4])
开发者ID:valentina-s,项目名称:PubMedMining,代码行数:32,代码来源:pubmedplay.py


注:本文中的sklearn.cluster.bicluster.SpectralCoclustering.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。