本文整理汇总了Python中sklearn.decomposition.NMF类的典型用法代码示例。如果您正苦于以下问题:Python NMF类的具体用法?Python NMF怎么用?Python NMF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NMF类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduceDimensionality
def reduceDimensionality(n_components=100):
# import the csv into a pandas df
df = pd.read_csv('data/gameData.csv')
# Normalize the numeric columns to values in [0,1]
numericColumns = ['maxPlayers','maxPlaytime','minAge','minPlayers','minPlaytime','playtime']
colsToNormalize = []
for col in numericColumns:
if col in df.columns:
colsToNormalize.append(col)
df[colsToNormalize] = df[colsToNormalize].apply(lambda x: (x - x.min())/(x.max() - x.min())/2)
# Drop string columns
colsToDrop = ['artists','categories','designers','families','publishers','mechanics','boardGameId','yearPublished']
# Convert df to an array for NMF and stor the board game id column to attach later
boardGameIds = df['boardGameId']
arr = df.as_matrix([col for col in df.columns if col not in colsToDrop])
arr = np.nan_to_num(arr)
# Perform NMF with n_dimensions
model = NMF(n_components=n_components)
W = model.fit_transform(arr)
W = np.insert(W, 0, boardGameIds, axis=1)
np.savetxt("data/reducedGameFeatures.csv", W, delimiter=",")
示例2: nmf_model2
def nmf_model2(n_topics,document_term_mat):
# print("\n\n---------\n decomposition")
nmf = NMF(n_components=n_topics, l1_ratio=0.0)
W_sklearn = nmf.fit_transform(document_term_mat)
H_sklearn = nmf.components_
# describe_nmf_results(document_term_mat, W_sklearn, H_sklearn)
return W_sklearn, H_sklearn
示例3: extractTemplate
def extractTemplate(y, w=d_w, h=d_h, n_components=nc):
model = NMF(n_components=n_components, max_iter=max_iter, beta=beta)
S = librosa.core.stft(y, n_fft=w, hop_length=h)
model.fit_transform(np.abs(S).T)
components = model.components_.T
#components, activation = librosa.decompose.decompose(np.abs(S), n_components=3)
return components
示例4: do_NMF
def do_NMF(sparse_matrix):
t0 = time.time()
print("* Performing NMF on sparse matrix ... ")
nmf = NMF(n_components=3)
coordinates = nmf.fit_transform(sparse_matrix)
print("done in %0.3fs." % (time.time() - t0))
return(coordinates)
示例5: __Factorize_NMF
def __Factorize_NMF(self,K):
model = NMF(n_components=K,max_iter=self._iteration)
model.fit(self._mat)
user_fmat = model.fit_transform(self._mat)
item_fmat = model.components_.T
return user_fmat,item_fmat
示例6: applyNMF
def applyNMF(self, number_of_clusters, country_specific_tweets):
train, feature_names = self.extractFeatures(country_specific_tweets,False)
name = "nmf"
# Fit the NMF model
if self.results:
print("Fitting the NMF model", end=" - ")
t0 = time()
nmf = NMF(n_components=number_of_clusters, random_state=1, alpha=.1, l1_ratio=.5).fit(train)
if self.results:
print("done in %0.3fs." % (time() - t0))
if self.results:
print("\nNMF:")
parameters = nmf.get_params()
if self.results:
print("Parameter: " + str(parameters))
topics = nmf.components_
doc_topic = nmf.transform(train)
top10, labels = self.printTopicCluster(topics, doc_topic, feature_names)
labels = numpy.asarray(labels)
if self.results:
print("Silhouette Coefficient {0}: {1}".format(name, metrics.silhouette_score(train, labels)))
return name, parameters, top10, labels
示例7: nmf_df
def nmf_df(sym, k, coll):
data = [ item for item in coll.find({'text': { '$in' :[re.compile(sym)] }}) ]
sents = [ sentence['text'] for sentence in data ]
dates = [ str(text['created_at']) for text in data ]
d = np.array(dates).T
d = d.reshape(len(dates), 1)
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X = vectorizer.fit_transform(sents)
#features = vectorizer.get_feature_names()
model = NMF(n_components=k, init='random', random_state=0)
latent_features = model.fit_transform(X)
# lat0 = list(latent_features[:,0])
# lat1 = list(latent_features[:,1])
# lat2 = list(latent_features[:,2])
# lat3 = list(latent_features[:,3])
df = pd.DataFrame(latent_features) #np.concatenate((d, latent_features), axis=1)
df.columns = [ 'lat'+ str(n) for n in xrange(len(df.columns)) ]
df['time_stamp'] = d
#print df.head()
df['date'] = pd.to_datetime(df['time_stamp']).apply(pd.datetools.normalize_date)
df.pop('time_stamp')
#print df.head()
grouped_data = df.groupby(['date']).mean()
grouped_data['sym'] = sym
return grouped_data
示例8: tfidf_nmf
def tfidf_nmf(release_texts, n_components=10, max_features=None):
'''
Creates and fits tfidf and NMF models.
INPUT:
- n_components: number of latent features for the NMF model to find
- max_features: max number of features (vocabulary size) for the tfidf model to consider
OUTPUT:
- tfidf_vectorizer: tfidf model object
- tfidf_sparse:tfidf sparse matrix
- nmf: NMF model object
- W: Feature matrix output from NMF factorization into W and H matrices
'''
# tfidf model
custom_stop_words = make_stop_words()
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words=custom_stop_words, max_features=max_features)
tfidf_sparse = tfidf_vectorizer.fit_transform(release_texts)
# normalize row-wise so each row sums to one
tfidf_sparse = normalize(tfidf_sparse, axis=1, norm='l1')
# nmf model
nmf = NMF(n_components=n_components, random_state=1)
nmf.fit(tfidf_sparse)
W = nmf.transform(tfidf_sparse)
return tfidf_vectorizer, tfidf_sparse, nmf, W
示例9: hog2hognmf
def hog2hognmf(hog_feature):
"""Transform HOG feature into HOG-NMF feature.
Parameters
----------
hog_feature: np.ndarray
HOG feature.
"""
mat = np.zeros((500, 8), dtype=np.float32)
NMFmodel = NMF(n_components=2, init="random", random_state=0)
# Transform 3780 into 500 * 8
for i in range(7):
mat[:, i] = hog_feature[i * 500 : (i + 1) * 500]
mat[:280, 7] = hog_feature[3500:]
W = NMFmodel.fit_transform(mat)
H = NMFmodel.components_
hognmf_feature = np.array([], dtype=np.float32)
for i in range(8):
_sum = np.sum(H[:, i])
if _sum == 0:
H[:, i] *= 0.0
else:
H[:, i] /= _sum
hognmf_feature = np.append(hognmf_feature, H[:, i])
for i in range(500):
_sum = np.sum(W[i, :])
if _sum == 0:
W[i, :] *= 0.0
else:
W[i, :] /= _sum
hognmf_feature = np.append(hognmf_feature, W[i, :])
return hognmf_feature
示例10: nmf
def nmf(self, **kwargs):
"""Perform dimensionality reduction using NMF."""
nmf = NMF(**kwargs)
reduced_matrix = nmf.fit_transform(self.matrix)
# TODO: it is incorrect to pass self.column_labels! There are not column labels.
return Space(reduced_matrix, self.row_labels, self.column_labels)
示例11: get_topics_nmf
def get_topics_nmf(urls, num_topics):
'''Input: URL containing links to each document (pdf) in the
corpus (i.e. arxiv) Output: the num_topics most important latent
topics from the corpus (via NMF)
'''
article_info = []
for url in urls:
article_info.append(get_text(url))
text = []
for thing in article_info:
text.extend(thing[0])
text = clean_pdf_text(text)
tfidf_math = TfidfVectorizer(max_features=100, stop_words=math_stop(),
ngram_range=(1, 1), decode_error='ignore')
M = tfidf_math.fit_transform(text)
feature_names = tfidf_math.get_feature_names()
feature_names = [WordNetLemmatizer().lemmatize(word)
for word in feature_names]
nmf = NMF(n_components=num_topics)
nmf.fit(M)
topics = []
for topic_idx, topic in enumerate(nmf.components_):
topics.append((" ".join([feature_names[i] for i in
topic.argsort()[:-10 - 1:-1]])))
return M, topics, text, title_list, urls
示例12: get_LDA
def get_LDA(X, num_components=10, show_topics=True):
''' Latent Dirichlet Allication by NMF.
21 Nov 2015, Keunwoo Choi
LDA for a song-tag matrix. The motivation is same as get_LSI.
With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
where X ~= X' = W*H as a result of NMF.
It is also good to have non-negative elements, straight-forward for both W and H.
'''
from sklearn.decomposition import NMF
nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
W = nmf.fit_transform(X)
H = nmf.components_
print '='*60
print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))
term_rankings = []
moodnames = cP.load(open(PATH_DATA + FILE_DICT['sorted_tags'], 'r')) #list, 100
for topic_index in range( H.shape[0] ):
top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
term_ranking = [moodnames[i] for i in top_indices]
term_rankings.append(term_ranking)
if show_topics:
print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
print '='*60
cP.dump(nmf, open(PATH_DATA + 'NMF_object.cP', 'w'))
cP.dump(term_rankings, open(PATH_DATA + ('topics_strings_%d_components.cP' % num_components), 'w'))
for row_idx, row in enumerate(W):
if np.max(row) != 0:
W[row_idx] = row / np.max(row)
return W / np.max(W) # return normalised matrix, [0, 1]
''''''
示例13: test_nmf_fit_close
def test_nmf_fit_close(solver):
rng = np.random.mtrand.RandomState(42)
# Test that the fit is not too far away
pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
max_iter=600)
X = np.abs(rng.randn(6, 5))
assert_less(pnmf.fit(X).reconstruction_err_, 0.1)
示例14: fit_nmf
def fit_nmf(tfidf):
'''takes in a tfidf sparse vector and finds the top topics'''
nmf = NMF(n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5)
nmf.fit(tfidf)
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
nmf_topic_dict = print_top_words(nmf, tfidf_feature_names, n_top_words)
return nmf, nmf_topic_dict
示例15: produceEncoding
def produceEncoding( trainX, nComponents ):
'''Produces an NMF encoding from the training
data matrix'''
model = NMF( n_components=nComponents, solver='cd', \
tol=1e-4, max_iter=200, alpha=0.0 )
model.fit( trainX )
return model