本文整理汇总了Python中sklearn.decomposition.NMF.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python NMF.fit_transform方法的具体用法?Python NMF.fit_transform怎么用?Python NMF.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.NMF
的用法示例。
在下文中一共展示了NMF.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: nmf_new
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def nmf_new(mut_final, mut_diff, mut_mean_qn, mut_median_qn, n_components,
init='nndsvdar', random_state=0):
# Numerical solver to use: ‘pg’ is a Projected Gradient solver (deprecated).
# ‘cd’ is a Coordinate Descent solver (recommended).
model = NMF(n_components=n_components, init=init,
random_state=random_state)
# TODO en boucle
model.fit(mut_final)
gene_comp = model.components_.copy()
patient_strat = np.argmax(model.fit_transform(mut_final), axis=1).copy()
# fit_transform more efficient than calling fit followed by transform
model.fit(mut_diff)
gene_comp_diff = model.components_.copy()
patient_strat_diff = np.argmax(
model.fit_transform(mut_diff), axis=1).copy()
model.fit(mut_mean_qn)
gene_comp_mean_qn = model.components_.copy()
patient_strat_mean_qn = np.argmax(
model.fit_transform(mut_mean_qn), axis=1).copy()
model.fit(mut_median_qn)
gene_comp_median_qn = model.components_.copy()
patient_strat_median_qn = np.argmax(
model.fit_transform(mut_median_qn), axis=1).copy()
return (gene_comp, patient_strat,
gene_comp_diff, patient_strat_diff,
gene_comp_mean_qn, patient_strat_mean_qn,
gene_comp_median_qn, patient_strat_median_qn)
示例2: extractTemplate
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def extractTemplate(y, w=d_w, h=d_h, n_components=nc):
model = NMF(n_components=n_components, max_iter=max_iter, beta=beta)
S = librosa.core.stft(y, n_fft=w, hop_length=h)
model.fit_transform(np.abs(S).T)
components = model.components_.T
#components, activation = librosa.decompose.decompose(np.abs(S), n_components=3)
return components
示例3: test_nmf_inverse_transform
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_inverse_transform():
# Test that NMF.inverse_transform returns close values
random_state = np.random.RandomState(0)
A = np.abs(random_state.randn(6, 4))
m = NMF(n_components=4, init="random", random_state=0)
m.fit_transform(A)
t = m.transform(A)
A_new = m.inverse_transform(t)
assert_array_almost_equal(A, A_new, decimal=2)
示例4: TopicEmbeddingModel
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
class TopicEmbeddingModel():
'''
Wrapper class for different topic models
'''
def __init__(self,folder='model',modeltype='kpca',topics=10):
# the classifier, which also contains the trained BoW transformer
self.bow = Vectorizer(folder=folder,steps=['hashing','tfidf'])
self.folder = folder
self.modeltype = modeltype
self.topics = topics
if self.modeltype is 'kpca':
from sklearn.decomposition import KernelPCA
self.model = KernelPCA(kernel='rbf',gamma=1.,n_components=topics)
if self.modeltype is 'nmf':
from sklearn.decomposition import NMF
self.model = NMF(n_components=topics)
def fit(self,X):
'''
fits a topic model
INPUT
X list of strings
'''
# transform list of strings into sparse BoW matrix
X = self.bow.transform(X)
#X = self.bow['tfidf_transformer'].fit_transform(\
# self.bow['count_vectorizer'].fit_transform(X))
# depending on the model, train
if self.modeltype is 'kpca':
Xc = self.model.fit_transform(X)
if self.modeltype is 'nmf':
Xc = self.model.fit_transform(X)
def predict(self,X):
'''
predicts cluster assignment from list of strings
INPUT
X list of strings
'''
if X is not list: X = [X]
X = self.bow.transform(X)
#X = self.bow['tfidf_transformer'].transform(\
# self.bow['count_vectorizer'].transform(X))
if self.modeltype is 'kpca':
return self.model.transform(X)
if self.modeltype is 'nmf':
return self.model.transform(X)
示例5: test_nmf_transform_custom_init
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_transform_custom_init():
# Smoke test that checks if NMF.transform works with custom initialization
A = np.abs(random_state.randn(6, 5))
n_components = 4
avg = np.sqrt(A.mean() / n_components)
H_init = np.abs(avg * random_state.randn(n_components, 5))
W_init = np.abs(avg * random_state.randn(6, n_components))
m = NMF(solver="cd", n_components=n_components, init="custom", random_state=0)
m.fit_transform(A, W=W_init, H=H_init)
m.transform(A)
示例6: get_features
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def get_features(head_and_body):
filename = "NMF_topics" + str(n_topics) + "topics"
if include_holdout == True:
filename += "_holdout"
if include_unlbled_test == True:
filename += "unlbled_test"
if not (os.path.exists(features_dir + "/" + filename + ".pkl")):
X_all, vocab = get_all_data(head_and_body, filename)
# calculates n most important topics of the bodies. Each topic contains all words but ordered by importance. The
# more important topic words a body contains of a certain topic, the higher its value for this topic
nfm = NMF(n_components=n_topics, random_state=1, alpha=.1)
print("NMF_topics: fit and transform body")
t0 = time()
nfm.fit_transform(X_all)
print("done in %0.3fs." % (time() - t0))
with open(features_dir + "/" + filename + ".pkl", 'wb') as handle:
joblib.dump(nfm, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
vocab = get_vocab(head_and_body, filename)
with open(features_dir + "/" + filename + ".pkl", 'rb') as handle:
nfm = joblib.load(handle)
vectorizer_head = TfidfVectorizer(vocabulary=vocab, norm='l2')
X_train_head = vectorizer_head.fit_transform(headlines)
vectorizer_body = TfidfVectorizer(vocabulary=vocab, norm='l2')
X_train_body = vectorizer_body.fit_transform(bodies)
print("NMF_topics: transform head and body")
# use the lda trained for body topcis on the headlines => if the headlines and bodies share topics
# their vectors should be similar
nfm_head_matrix = nfm.transform(X_train_head)
nfm_body_matrix = nfm.transform(X_train_body)
if cosinus_dist == False:
return np.concatenate([nfm_head_matrix, nfm_body_matrix], axis=1)
else:
# calculate cosine distance between the body and head
X = []
for i in range(len(nfm_head_matrix)):
X_head_vector = np.array(nfm_head_matrix[i]).reshape((1, -1)) # 1d array is deprecated
X_body_vector = np.array(nfm_body_matrix[i]).reshape((1, -1))
cos_dist = cosine_distances(X_head_vector, X_body_vector).flatten()
X.append(cos_dist.tolist())
return X
示例7: test_nmf_transform
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_transform():
# Test that NMF.transform returns close values
A = np.abs(random_state.randn(6, 5))
m = NMF(n_components=4, init="nndsvd", random_state=0)
ft = m.fit_transform(A)
t = m.transform(A)
assert_array_almost_equal(ft, t, decimal=2)
示例8: nmf
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def nmf(self, **kwargs):
"""Perform dimensionality reduction using NMF."""
nmf = NMF(**kwargs)
reduced_matrix = nmf.fit_transform(self.matrix)
# TODO: it is incorrect to pass self.column_labels! There are not column labels.
return Space(reduced_matrix, self.row_labels, self.column_labels)
示例9: test_nmf_fit_nn_output
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_fit_nn_output():
# Test that the decomposition does not contain negative values
A = np.c_[5 * np.ones(5) - np.arange(1, 6), 5 * np.ones(5) + np.arange(1, 6)]
for init in (None, "nndsvd", "nndsvda", "nndsvdar"):
model = NMF(n_components=2, init=init, random_state=0)
transf = model.fit_transform(A)
assert_false((model.components_ < 0).any() or (transf < 0).any())
示例10: get_LDA
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def get_LDA(X, num_components=10, show_topics=True):
""" Latent Dirichlet Allication by NMF.
21 Nov 2015, Keunwoo Choi
LDA for a song-tag matrix. The motivation is same as get_LSI.
With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
where X ~= X' = W*H as a result of NMF.
It is also good to have non-negative elements, straight-forward for both W and H.
"""
from sklearn.decomposition import NMF
if X == None:
print 'X is omitted, so just assume it is the mood tag mtx w audio.'
X = np.load(PATH_DATA + FILE_DICT["mood_tags_matrix"]) #np matrix, 9320-by-100
nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
W = nmf.fit_transform(X)
H = nmf.components_
print '='*60
print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))
term_rankings = []
moodnames = cP.load(open(PATH_DATA + FILE_DICT["moodnames"], 'r')) #list, 100
for topic_index in range( H.shape[0] ):
top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
term_ranking = [moodnames[i] for i in top_indices]
term_rankings.append(term_ranking)
if show_topics:
print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
print '='*60
cP.dump(term_rankings, open(PATH_DATA + (FILE_DICT["mood_topics_strings"] % num_components), 'w'))
return W / np.max(W) # return normalised matrix, [0, 1]
示例11: hog2hognmf
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def hog2hognmf(hog_feature):
"""Transform HOG feature into HOG-NMF feature.
Parameters
----------
hog_feature: np.ndarray
HOG feature.
"""
mat = np.zeros((500, 8), dtype=np.float32)
NMFmodel = NMF(n_components=2, init="random", random_state=0)
# Transform 3780 into 500 * 8
for i in range(7):
mat[:, i] = hog_feature[i * 500 : (i + 1) * 500]
mat[:280, 7] = hog_feature[3500:]
W = NMFmodel.fit_transform(mat)
H = NMFmodel.components_
hognmf_feature = np.array([], dtype=np.float32)
for i in range(8):
_sum = np.sum(H[:, i])
if _sum == 0:
H[:, i] *= 0.0
else:
H[:, i] /= _sum
hognmf_feature = np.append(hognmf_feature, H[:, i])
for i in range(500):
_sum = np.sum(W[i, :])
if _sum == 0:
W[i, :] *= 0.0
else:
W[i, :] /= _sum
hognmf_feature = np.append(hognmf_feature, W[i, :])
return hognmf_feature
示例12: get_LDA
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def get_LDA(X, num_components=10, show_topics=True):
''' Latent Dirichlet Allication by NMF.
21 Nov 2015, Keunwoo Choi
LDA for a song-tag matrix. The motivation is same as get_LSI.
With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
where X ~= X' = W*H as a result of NMF.
It is also good to have non-negative elements, straight-forward for both W and H.
'''
from sklearn.decomposition import NMF
nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
W = nmf.fit_transform(X)
H = nmf.components_
print '='*60
print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))
term_rankings = []
moodnames = cP.load(open(PATH_DATA + FILE_DICT['sorted_tags'], 'r')) #list, 100
for topic_index in range( H.shape[0] ):
top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
term_ranking = [moodnames[i] for i in top_indices]
term_rankings.append(term_ranking)
if show_topics:
print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
print '='*60
cP.dump(nmf, open(PATH_DATA + 'NMF_object.cP', 'w'))
cP.dump(term_rankings, open(PATH_DATA + ('topics_strings_%d_components.cP' % num_components), 'w'))
for row_idx, row in enumerate(W):
if np.max(row) != 0:
W[row_idx] = row / np.max(row)
return W / np.max(W) # return normalised matrix, [0, 1]
''''''
示例13: infer_topics
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def infer_topics(self, num_topics=10):
self.nb_topics = num_topics
nmf = NMF(n_components=num_topics)
topic_document = nmf.fit_transform(self.corpus.sklearn_vector_space)
self.topic_word_matrix = []
self.document_topic_matrix = []
vocabulary_size = len(self.corpus.vocabulary)
row = []
col = []
data = []
for (topic_idx, topic) in enumerate(nmf.components_):
for i in range(vocabulary_size):
row.append(topic_idx)
col.append(i)
data.append(topic[i])
self.topic_word_matrix = coo_matrix((data, (row, col)),
shape=(self.nb_topics, len(self.corpus.vocabulary))).tocsr()
row = []
col = []
data = []
doc_count = 0
for doc in topic_document:
topic_count = 0
for topic_weight in doc:
row.append(doc_count)
col.append(topic_count)
data.append(topic_weight)
topic_count += 1
doc_count += 1
self.document_topic_matrix = coo_matrix((data, (row, col)),
shape=(self.corpus.size, self.nb_topics)).tocsr()
示例14: reduceDimensionality
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def reduceDimensionality(n_components=100):
# import the csv into a pandas df
df = pd.read_csv('data/gameData.csv')
# Normalize the numeric columns to values in [0,1]
numericColumns = ['maxPlayers','maxPlaytime','minAge','minPlayers','minPlaytime','playtime']
colsToNormalize = []
for col in numericColumns:
if col in df.columns:
colsToNormalize.append(col)
df[colsToNormalize] = df[colsToNormalize].apply(lambda x: (x - x.min())/(x.max() - x.min())/2)
# Drop string columns
colsToDrop = ['artists','categories','designers','families','publishers','mechanics','boardGameId','yearPublished']
# Convert df to an array for NMF and stor the board game id column to attach later
boardGameIds = df['boardGameId']
arr = df.as_matrix([col for col in df.columns if col not in colsToDrop])
arr = np.nan_to_num(arr)
# Perform NMF with n_dimensions
model = NMF(n_components=n_components)
W = model.fit_transform(arr)
W = np.insert(W, 0, boardGameIds, axis=1)
np.savetxt("data/reducedGameFeatures.csv", W, delimiter=",")
示例15: extract_tfidf_nmf_feats
# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def extract_tfidf_nmf_feats(self, df_data, n_components):
"""
Extract tfidf features using nmf.
"""
df_feat = pd.DataFrame(index=range(df_data.shape[0]))
tfidf = TfidfVectorizer(ngram_range=(2, 3), stop_words='english')
tsvd = TruncatedSVD(n_components=n_components, random_state = 2016)
nmf = NMF(solver='cd', n_components=n_components, init='nndsvda',
random_state=0, tol=1e-3)
df_data['q'].to_csv('q', index=False)
df_data['t'].to_csv('t', index=False)
df_data['d'].to_csv('d', index=False)
print('fitting in tfidf')
tfidf.set_params(input='filename')
tfidf.fit(['q','t','d'])
tfidf.set_params(input='content')
for col in ['d', 't', 'q', 'b']:
print('process column', col)
txt = df_data[col]
tfidf_mat = tfidf.transform(txt)
nd_feat = nmf.fit_transform(tfidf_mat)
tmp = pd.DataFrame(nd_feat, columns=[col+'_tfidf_nmf_comp'+str(i) \
for i in range(n_components)])
df_feat = pd.merge(df_feat, tmp, left_index=True, right_index=True)
saveit(df_feat, 'df_tfidf_nmf_feats')