本文整理汇总了Python中sklearn.cluster.bicluster.SpectralCoclustering.fit方法的典型用法代码示例。如果您正苦于以下问题:Python SpectralCoclustering.fit方法的具体用法?Python SpectralCoclustering.fit怎么用?Python SpectralCoclustering.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.bicluster.SpectralCoclustering
的用法示例。
在下文中一共展示了SpectralCoclustering.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find_disjoint_biclusters
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def find_disjoint_biclusters(self, biclusters_number=50):
data = np.asarray_chkfinite(self.matrix)
data[data == 0] = 0.000001
coclustering = SpectralCoclustering(n_clusters=biclusters_number, random_state=0)
coclustering.fit(data)
biclusters = set()
for i in range(biclusters_number):
rows, columns = coclustering.get_indices(i)
row_set = set(rows)
columns_set = set(columns)
if len(row_set) > 0 and len(columns_set) > 0:
density = self._calculate_box_cluster_density(row_set, columns_set)
odd_columns = set()
for column in columns_set:
col_density = self._calculate_column_density(column, row_set)
if col_density < density / 4:
odd_columns.add(column)
columns_set.difference_update(odd_columns)
if len(columns_set) == 0:
continue
odd_rows = set()
for row in row_set:
row_density = self._calculate_row_density(row, columns_set)
if row_density < density / 4:
odd_rows.add(row)
row_set.difference_update(odd_rows)
if len(row_set) > 0 and len(columns_set) > 0:
density = self._calculate_box_cluster_density(row_set, columns_set)
biclusters.add(Bicluster(row_set, columns_set, density))
return biclusters
示例2: main
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
origin = open('10k.txt', 'r')
lines = origin.readlines()
x = []
label = []
for l in lines:
l = l.split(',')
ip1 = l[2].split('.')
ip2 = l[3].split('.')
d = [datetime.fromtimestamp(int(l[1][0:11])).hour, int("%02x%02x%02x%02x"%(int(ip1[0]),int(ip1[1]),int(ip1[2]),int(ip1[3])),16), int("%02x%02x%02x%02x" % (int(ip2[0]),int(ip2[1]),int(ip2[2]),int(ip2[3])),16)] + l[4:6] + l[7:10]
x.append(d)
data = np.array(x, dtype='float32')
model = SpectralCoclustering(n_clusters=5)
model.fit(data)
print model.rows_
for i in range(5):
print "Cluster" + str(i) + ':'
for j in range(10000):
if model.rows_[i][j]:
print j,
print ' '
示例3: print_similarity_matrix
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def print_similarity_matrix(sphns, model, model2=None):
print " ",
for phn1 in sphns:
print phn1, " ",
print ""
m = np.ndarray((len(sphns), len(sphns)), dtype=np.float32)
for i, phn1 in enumerate(sphns):
print phn1.ljust(4) + ":",
for j, phn2 in enumerate(sphns):
sim = model.similarity(phn1, phn2)
if model2 != None:
sim -= model2.similarity(phn1, phn2)
print "%0.2f" % sim,
m[i][j] = sim
print ""
phn_order = [phn for phn in sphns]
if BICLUSTER:
#model = SpectralBiclustering(n_clusters=4, method='log',
model = SpectralCoclustering(n_clusters=n_clusters,
random_state=0)
model.fit(m)
print "INDICES:",
indices = [model.get_indices(i) for i in xrange(n_clusters)]
print indices
tmp = []
for i in xrange(n_clusters):
tmp.extend([phn_order[indices[i][0][j]] for j in xrange(len(indices[i][0]))])
phn_order = tmp
fit_data = m[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
m = fit_data
return phn_order, m
示例4: test_spectral_coclustering
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def test_spectral_coclustering():
"""Test Dhillon's Spectral CoClustering on a simple problem."""
param_grid = {'svd_method': ['randomized', 'arpack'],
'n_svd_vecs': [None, 20],
'mini_batch': [False, True],
'init': ['k-means++'],
'n_init': [10],
'n_jobs': [1]}
random_state = 0
S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
random_state=random_state)
S -= S.min() # needs to be nonnegative before making it sparse
S = np.where(S < 1, 0, S) # threshold some values
for mat in (S, csr_matrix(S)):
for kwargs in ParameterGrid(param_grid):
model = SpectralCoclustering(n_clusters=3,
random_state=random_state,
**kwargs)
model.fit(mat)
assert_equal(model.rows_.shape, (3, 30))
assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
assert_equal(consensus_score(model.biclusters_,
(rows, cols)), 1)
示例5: biclustering
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input,num_clusters):
global agent1_dict
data = np.matrix(input)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
return agent1_dict
示例6: main
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
origin = open('kddcup.txt', 'r')
lines = origin.readlines()
x = []
label = []
for l in lines:
l = l.split(',')
d = l[0:1] + l[4:19] + l[21:-1]
label.append(l[-1])
x.append(d)
data = np.array(x, dtype='float32')
model = SpectralCoclustering(n_clusters=5)
model.fit(data)
evaluation = []
draw_n_x = []
draw_n_y = []
draw_a_x = []
draw_a_y = []
for cluster in model.rows_:
normal = 0.0
attack = 0.0
graph_x = []
graph_y = []
for idx in range(len(cluster)):
if cluster[idx]:
if label[idx] == 'normal.\n':
normal += 1
else:
attack += 1
graph_x.append(data[27])
graph_y.append(data[30])
evaluation.append(normal / (normal + attack))
if normal > attack:
draw_n_x += graph_x
draw_n_y += graph_y
else:
draw_a_x += graph_x
draw_a_y += graph_y
pl.plot(draw_n_x, draw_n_y, 'ro')
pl.plot(draw_a_x, draw_a_y, 'go')
print evaluation
pl.show()
示例7: biclustering
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(data,num_clusters):
clusters = {}
data = np.asmatrix(data)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
#model = SpectralBiclustering(n_clusters=num_clusters)
model.fit(data)
for c in range(num_clusters):
clusters[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
#fit_data = data[np.argsort(model.row_labels_)]
#fit_data = fit_data[:, np.argsort(model.column_labels_)]
#plot(fit_data)
return clusters
示例8: biclustering
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input,num_clusters):
global agent1_dict
data = np.matrix(input)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
plot(fit_data)
return agent1_dict
示例9: cluster_data
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def cluster_data(flavors, whisky):
corr_whisky = pd.DataFrame.corr(flavors.transpose())
model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(corr_whisky)
whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
whisky = whisky.ix[np.argsort(model.row_labels_)]
whisky = whisky.reset_index(drop=True)
correlation = pd.DataFrame.corr(whisky.iloc[:, 2:14].transpose())
correlation = np.array(correlation)
# print(np.sum(model.rows_, axis=1))
# print(np.sum(model.rows_, axis=0))
# print(model.row_labels_)
# print(correlation)
plot_correlations(correlation)
示例10: plot_coclusters_raw_data
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def plot_coclusters_raw_data(time_ms, t=False):
# take the transpose of sliced matrix
if t:
channels_data = slice_matrix(matrix, time_ms)
else:
channels_data = slice_matrix(matrix, time_ms)
print len(channels_data), len(channels_data[1])
z_score = stats.zscore(channels_data)
plt.title('Z Score Biclustering Over %i ms' % time_ms)
spectral_model = SpectralCoclustering()
spectral_model.fit(z_score)
fit_data = z_score[np.argsort(spectral_model.row_labels_)]
fit_data = fit_data[:, np.argsort(spectral_model.column_labels_)]
plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.savefig('z_score_raw_coclustering_all_ts_%i_T_%s.svg' % (time_ms, str(t)))
示例11: cocluster
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def cocluster(self, mx, blockdiag=False):
logging.info('Co-clustering Tade..')
if blockdiag:
logging.info('blockdiag')
clusser = SpectralCoclustering(n_jobs=-1)
else: # checkerboard
logging.info('checkerboard')
clusser = SpectralBiclustering(n_jobs=-1, n_clusters=(4,3))
#n_clusters=3, svd_method='randomized',
clusser.fit(mx)
logging.info('Argsorting mx rows..')
mx = mx[np.argsort(clusser.row_labels_)]
self.prev = self.prev[np.argsort(clusser.row_labels_)]
logging.info('Argsorting mx cases..')
mx = mx[:, np.argsort(clusser.column_labels_)]
self.case = self.case[np.argsort(clusser.column_labels_)]
return mx
示例12: main
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main():
files = [DATA_DIR + file for file in os.listdir(DATA_DIR) if fnmatch.fnmatch(file, '*.csv')]
for i in files:
print('processing', i, '...')
table = get_data(i)
cl = SpectralCoclustering(n_clusters=2, random_state=0)
cl.fit(table)
# using http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
fit_data = table[np.argsort(cl.row_labels_)]
fit_data = fit_data[:, np.argsort(cl.column_labels_)]
plt.matshow(fit_data, cmap=plt.cm.Reds)
plt.title(i[len(DATA_DIR):])
# plt.show()
plt.savefig(i[len(DATA_DIR):-4] + '.pdf')
示例13: main
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def main(model):
store = pd.HDFStore(model)
from_ = store['from_'][0][0]
to = store['to'][0][0]
assert from_ == 0
trace_fpath = store['trace_fpath'][0][0]
Theta_zh = store['Theta_zh'].values
Psi_oz = store['Psi_sz'].values
count_z = store['count_z'].values[:, 0]
Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
Psi_zo = (Psi_oz * count_z).T
Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
obj2id = dict(store['source2id'].values)
hyper2id = dict(store['hyper2id'].values)
id2obj = dict((v, k) for k, v in obj2id.items())
ZtZ = Psi_zo.dot(Psi_oz)
ZtZ = ZtZ / ZtZ.sum(axis=0)
L = ZtZ
#ZtZ[ZtZ < (ZtZ.mean())] = 0
L[ZtZ >= 1.0 / (len(ZtZ))] = 1
L[L != 1] = 0
colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
print(colormap)
canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
colorshow=False, tlabel="To z'", llabel="From")[0]
#canvas.axes(ylabel='From z', xlabel='To z\'')
toyplot.pdf.render(canvas, 'tmat.pdf')
model = SpectralCoclustering(n_clusters=3)
model.fit(L)
fit_data = L[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
colorshow=False)[0]
toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
#AtA = Psi_oz.dot(Psi_zo)
#np.fill_diagonal(AtA, 0)
#AtA = AtA / AtA.sum(axis=0)
store.close()
示例14: biclustering
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
def biclustering(input_list,num_clusters):
global agent1_dict
#clustering agent 1
data = np.matrix(input_list)
#plot(data)#original data
#model = SpectralBiclustering(n_clusters=num_clusters) #Biclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_biclustering.html#example-bicluster-plot-spectral-biclustering-py
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) #Coclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
plot(fit_data)
return agent1_dict
示例15: TfidfVectorizer
# 需要导入模块: from sklearn.cluster.bicluster import SpectralCoclustering [as 别名]
# 或者: from sklearn.cluster.bicluster.SpectralCoclustering import fit [as 别名]
listOfAbstracts = []
for paper in papers:
if 'Abstract' in paper['MedlineCitation']['Article'].keys():
listOfAbstracts.append(mergeAbstract(paper['MedlineCitation']['Article']['Abstract']['AbstractText']))
# Create TF-IDF matrix
vect = TfidfVectorizer(max_df = 1)
tfidf = vect.fit_transform(listOfAbstracts)
# Non-negative Matrix Factorization
num_topics = 2
num_top_words = 5
nmf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = nmf.fit_transform(tfidf)
topic_words = []
vocab = np.array(vect.get_feature_names())
for topic in nmf.components_:
word_idx = np.argsort(topic)[::-1][0:num_top_words]
topic_words.append([vocab[i] for i in word_idx])
# Coclustering
cocluster = SpectralCoclustering(n_clusters=5,svd_method='arpack', random_state=0)
cocluster.fit(tfidf)
y_cocluster = cocluster.row_labels_
x_cocluster = cocluster.column_labels_
# print(np.array(vect.get_feature_names())[x_cocluster == 4])