当前位置: 首页>>代码示例>>Python>>正文


Python NMF.fit_transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.NMF.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python NMF.fit_transform方法的具体用法?Python NMF.fit_transform怎么用?Python NMF.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.NMF的用法示例。


在下文中一共展示了NMF.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: nmf_new

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def nmf_new(mut_final, mut_diff, mut_mean_qn, mut_median_qn, n_components,
            init='nndsvdar', random_state=0):
    # Numerical solver to use: ‘pg’ is a Projected Gradient solver (deprecated).
    # ‘cd’ is a Coordinate Descent solver (recommended).
    model = NMF(n_components=n_components, init=init,
                random_state=random_state)
    # TODO en boucle
    model.fit(mut_final)
    gene_comp = model.components_.copy()
    patient_strat = np.argmax(model.fit_transform(mut_final), axis=1).copy()
    # fit_transform more efficient than calling fit followed by transform

    model.fit(mut_diff)
    gene_comp_diff = model.components_.copy()
    patient_strat_diff = np.argmax(
        model.fit_transform(mut_diff), axis=1).copy()

    model.fit(mut_mean_qn)
    gene_comp_mean_qn = model.components_.copy()
    patient_strat_mean_qn = np.argmax(
        model.fit_transform(mut_mean_qn), axis=1).copy()

    model.fit(mut_median_qn)
    gene_comp_median_qn = model.components_.copy()
    patient_strat_median_qn = np.argmax(
        model.fit_transform(mut_median_qn), axis=1).copy()

    return (gene_comp, patient_strat,
            gene_comp_diff, patient_strat_diff,
            gene_comp_mean_qn, patient_strat_mean_qn,
            gene_comp_median_qn, patient_strat_median_qn)
开发者ID:0m1n0,项目名称:netstra,代码行数:33,代码来源:decomposition.py

示例2: extractTemplate

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def extractTemplate(y, w=d_w, h=d_h, n_components=nc):
    model = NMF(n_components=n_components, max_iter=max_iter, beta=beta)
    S = librosa.core.stft(y, n_fft=w, hop_length=h)
    model.fit_transform(np.abs(S).T)
    components = model.components_.T
    #components, activation = librosa.decompose.decompose(np.abs(S), n_components=3)
    return components
开发者ID:LemonATsu,项目名称:VIOCLA-source-separation,代码行数:9,代码来源:NMF.py

示例3: test_nmf_inverse_transform

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_inverse_transform():
    # Test that NMF.inverse_transform returns close values
    random_state = np.random.RandomState(0)
    A = np.abs(random_state.randn(6, 4))
    m = NMF(n_components=4, init="random", random_state=0)
    m.fit_transform(A)
    t = m.transform(A)
    A_new = m.inverse_transform(t)
    assert_array_almost_equal(A, A_new, decimal=2)
开发者ID:jnothman,项目名称:scikit-learn,代码行数:11,代码来源:test_nmf.py

示例4: TopicEmbeddingModel

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
class TopicEmbeddingModel():
    '''
    Wrapper class for different topic models
    
    '''
    def __init__(self,folder='model',modeltype='kpca',topics=10):
        # the classifier, which also contains the trained BoW transformer
        self.bow = Vectorizer(folder=folder,steps=['hashing','tfidf'])
        self.folder = folder
        self.modeltype = modeltype
        self.topics = topics

        if self.modeltype is 'kpca':
            from sklearn.decomposition import KernelPCA
            self.model = KernelPCA(kernel='rbf',gamma=1.,n_components=topics)
        if self.modeltype is 'nmf':
            from sklearn.decomposition import NMF
            self.model = NMF(n_components=topics)

    def fit(self,X):
        '''
        fits a topic model

        INPUT
        X   list of strings
        '''

        # transform list of strings into sparse BoW matrix
        X = self.bow.transform(X)
        #X = self.bow['tfidf_transformer'].fit_transform(\
        #    self.bow['count_vectorizer'].fit_transform(X))

        # depending on the model, train
        if self.modeltype is 'kpca':
            Xc = self.model.fit_transform(X)
        if self.modeltype is 'nmf':
            Xc = self.model.fit_transform(X)


    def predict(self,X):
        '''
        predicts cluster assignment from list of strings
        
        INPUT
        X   list of strings
        '''
        if X is not list: X = [X]
        X = self.bow.transform(X)
        #X = self.bow['tfidf_transformer'].transform(\
        #    self.bow['count_vectorizer'].transform(X))
        
        if self.modeltype is 'kpca':
            return self.model.transform(X)
        if self.modeltype is 'nmf':
            return self.model.transform(X)
开发者ID:christinakraus,项目名称:political-affiliation-prediction,代码行数:57,代码来源:embedding.py

示例5: test_nmf_transform_custom_init

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_transform_custom_init():
    # Smoke test that checks if NMF.transform works with custom initialization
    A = np.abs(random_state.randn(6, 5))
    n_components = 4
    avg = np.sqrt(A.mean() / n_components)
    H_init = np.abs(avg * random_state.randn(n_components, 5))
    W_init = np.abs(avg * random_state.randn(6, n_components))

    m = NMF(solver="cd", n_components=n_components, init="custom", random_state=0)
    m.fit_transform(A, W=W_init, H=H_init)
    m.transform(A)
开发者ID:jnothman,项目名称:scikit-learn,代码行数:13,代码来源:test_nmf.py

示例6: get_features

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
    def get_features(head_and_body):
        filename = "NMF_topics" + str(n_topics) + "topics"

        if include_holdout == True:
            filename += "_holdout"

        if include_unlbled_test == True:
            filename += "unlbled_test"

        if not (os.path.exists(features_dir + "/" + filename + ".pkl")):
            X_all, vocab = get_all_data(head_and_body, filename)

            # calculates n most important topics of the bodies. Each topic contains all words but ordered by importance. The
            # more important topic words a body contains of a certain topic, the higher its value for this topic
            nfm = NMF(n_components=n_topics, random_state=1, alpha=.1)

            print("NMF_topics: fit and transform body")
            t0 = time()
            nfm.fit_transform(X_all)
            print("done in %0.3fs." % (time() - t0))

            with open(features_dir + "/" + filename + ".pkl", 'wb') as handle:
                joblib.dump(nfm, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            vocab = get_vocab(head_and_body, filename)
            with open(features_dir + "/" + filename + ".pkl", 'rb') as handle:
                nfm = joblib.load(handle)

        vectorizer_head = TfidfVectorizer(vocabulary=vocab, norm='l2')
        X_train_head = vectorizer_head.fit_transform(headlines)

        vectorizer_body = TfidfVectorizer(vocabulary=vocab, norm='l2')
        X_train_body = vectorizer_body.fit_transform(bodies)

        print("NMF_topics: transform head and body")
        # use the lda trained for body topcis on the headlines => if the headlines and bodies share topics
        # their vectors should be similar
        nfm_head_matrix = nfm.transform(X_train_head)
        nfm_body_matrix = nfm.transform(X_train_body)

        if cosinus_dist == False:
            return np.concatenate([nfm_head_matrix, nfm_body_matrix], axis=1)
        else:
            # calculate cosine distance between the body and head
            X = []
            for i in range(len(nfm_head_matrix)):
                X_head_vector = np.array(nfm_head_matrix[i]).reshape((1, -1))  # 1d array is deprecated
                X_body_vector = np.array(nfm_body_matrix[i]).reshape((1, -1))
                cos_dist = cosine_distances(X_head_vector, X_body_vector).flatten()
                X.append(cos_dist.tolist())
            return X
开发者ID:paris5020,项目名称:athene_system,代码行数:53,代码来源:topic_models.py

示例7: test_nmf_transform

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_transform():
    # Test that NMF.transform returns close values
    A = np.abs(random_state.randn(6, 5))
    m = NMF(n_components=4, init="nndsvd", random_state=0)
    ft = m.fit_transform(A)
    t = m.transform(A)
    assert_array_almost_equal(ft, t, decimal=2)
开发者ID:jnothman,项目名称:scikit-learn,代码行数:9,代码来源:test_nmf.py

示例8: nmf

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
    def nmf(self, **kwargs):
        """Perform dimensionality reduction using NMF."""
        nmf = NMF(**kwargs)

        reduced_matrix = nmf.fit_transform(self.matrix)
        # TODO: it is incorrect to pass self.column_labels! There are not column labels.
        return Space(reduced_matrix, self.row_labels, self.column_labels)
开发者ID:masteradamo,项目名称:fowler.corpora,代码行数:9,代码来源:models.py

示例9: test_nmf_fit_nn_output

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def test_nmf_fit_nn_output():
    # Test that the decomposition does not contain negative values
    A = np.c_[5 * np.ones(5) - np.arange(1, 6), 5 * np.ones(5) + np.arange(1, 6)]
    for init in (None, "nndsvd", "nndsvda", "nndsvdar"):
        model = NMF(n_components=2, init=init, random_state=0)
        transf = model.fit_transform(A)
        assert_false((model.components_ < 0).any() or (transf < 0).any())
开发者ID:jnothman,项目名称:scikit-learn,代码行数:9,代码来源:test_nmf.py

示例10: get_LDA

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def get_LDA(X, num_components=10, show_topics=True):
	""" Latent Dirichlet Allication by NMF.
	21 Nov 2015, Keunwoo Choi

	LDA for a song-tag matrix. The motivation is same as get_LSI. 
	With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
	where X ~= X' = W*H as a result of NMF. 
	It is also good to have non-negative elements, straight-forward for both W and H.

	"""

	from sklearn.decomposition import NMF
	if X == None:
		print 'X is omitted, so just assume it is the mood tag mtx w audio.'
		X = np.load(PATH_DATA + FILE_DICT["mood_tags_matrix"]) #np matrix, 9320-by-100

	nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
	W = nmf.fit_transform(X)
	H = nmf.components_
	print '='*60
	print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))

	term_rankings = []
	moodnames = cP.load(open(PATH_DATA + FILE_DICT["moodnames"], 'r')) #list, 100
	for topic_index in range( H.shape[0] ):
		top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
		term_ranking = [moodnames[i] for i in top_indices]
		term_rankings.append(term_ranking)
		if show_topics:	
			print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
	print '='*60
	cP.dump(term_rankings, open(PATH_DATA + (FILE_DICT["mood_topics_strings"] % num_components), 'w'))
	return W / np.max(W) # return normalised matrix, [0, 1]
开发者ID:keunwoochoi,项目名称:embedding,代码行数:35,代码来源:main_prepare_y.py

示例11: hog2hognmf

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def hog2hognmf(hog_feature):
    """Transform HOG feature into HOG-NMF feature.

    Parameters
    ----------
    hog_feature: np.ndarray
      HOG feature.
    """
    mat = np.zeros((500, 8), dtype=np.float32)
    NMFmodel = NMF(n_components=2, init="random", random_state=0)
    # Transform 3780 into 500 * 8
    for i in range(7):
        mat[:, i] = hog_feature[i * 500 : (i + 1) * 500]
    mat[:280, 7] = hog_feature[3500:]
    W = NMFmodel.fit_transform(mat)
    H = NMFmodel.components_
    hognmf_feature = np.array([], dtype=np.float32)
    for i in range(8):
        _sum = np.sum(H[:, i])
        if _sum == 0:
            H[:, i] *= 0.0
        else:
            H[:, i] /= _sum
        hognmf_feature = np.append(hognmf_feature, H[:, i])
    for i in range(500):
        _sum = np.sum(W[i, :])
        if _sum == 0:
            W[i, :] *= 0.0
        else:
            W[i, :] /= _sum
        hognmf_feature = np.append(hognmf_feature, W[i, :])
    return hognmf_feature
开发者ID:eriche2016,项目名称:yatpd,代码行数:34,代码来源:hog2hognmf.py

示例12: get_LDA

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def get_LDA(X, num_components=10, show_topics=True):
	''' Latent Dirichlet Allication by NMF.
	21 Nov 2015, Keunwoo Choi

	LDA for a song-tag matrix. The motivation is same as get_LSI. 
	With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
	where X ~= X' = W*H as a result of NMF. 
	It is also good to have non-negative elements, straight-forward for both W and H.

	'''

	from sklearn.decomposition import NMF
	
	nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
	W = nmf.fit_transform(X)
	H = nmf.components_
	print '='*60
	print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))

	term_rankings = []
	moodnames = cP.load(open(PATH_DATA + FILE_DICT['sorted_tags'], 'r')) #list, 100
	for topic_index in range( H.shape[0] ):
		top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
		term_ranking = [moodnames[i] for i in top_indices]
		term_rankings.append(term_ranking)
		if show_topics:	
			print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
	print '='*60
	cP.dump(nmf, open(PATH_DATA + 'NMF_object.cP', 'w'))
	cP.dump(term_rankings, open(PATH_DATA + ('topics_strings_%d_components.cP' % num_components), 'w'))
	for row_idx, row in enumerate(W):
		if np.max(row) != 0:
			W[row_idx] = row / np.max(row)
	return W / np.max(W) # return normalised matrix, [0, 1]
	''''''
开发者ID:keunwoochoi,项目名称:magnatagatune,代码行数:37,代码来源:main_prepare.py

示例13: infer_topics

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
 def infer_topics(self, num_topics=10):
     self.nb_topics = num_topics
     nmf = NMF(n_components=num_topics)
     topic_document = nmf.fit_transform(self.corpus.sklearn_vector_space)
     self.topic_word_matrix = []
     self.document_topic_matrix = []
     vocabulary_size = len(self.corpus.vocabulary)
     row = []
     col = []
     data = []
     for (topic_idx, topic) in enumerate(nmf.components_):
         for i in range(vocabulary_size):
             row.append(topic_idx)
             col.append(i)
             data.append(topic[i])
     self.topic_word_matrix = coo_matrix((data, (row, col)),
                                         shape=(self.nb_topics, len(self.corpus.vocabulary))).tocsr()
     row = []
     col = []
     data = []
     doc_count = 0
     for doc in topic_document:
         topic_count = 0
         for topic_weight in doc:
             row.append(doc_count)
             col.append(topic_count)
             data.append(topic_weight)
             topic_count += 1
         doc_count += 1
     self.document_topic_matrix = coo_matrix((data, (row, col)),
                                             shape=(self.corpus.size, self.nb_topics)).tocsr()
开发者ID:CATS-Project,项目名称:CATS-TextMiningServices,代码行数:33,代码来源:topic_model.py

示例14: reduceDimensionality

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
def reduceDimensionality(n_components=100):
	# import the csv into a pandas df
	df = pd.read_csv('data/gameData.csv')

	# Normalize the numeric columns to values in [0,1]
	numericColumns = ['maxPlayers','maxPlaytime','minAge','minPlayers','minPlaytime','playtime']
	colsToNormalize = []
	for col in numericColumns:
		if col in df.columns:
			colsToNormalize.append(col)

	df[colsToNormalize] = df[colsToNormalize].apply(lambda x: (x - x.min())/(x.max() - x.min())/2)

	# Drop string columns
	colsToDrop = ['artists','categories','designers','families','publishers','mechanics','boardGameId','yearPublished']

	# Convert df to an array for NMF and stor the board game id column to attach later
	boardGameIds = df['boardGameId']
	arr = df.as_matrix([col for col in df.columns if col not in colsToDrop])
	arr = np.nan_to_num(arr)

	# Perform NMF with n_dimensions
	model = NMF(n_components=n_components)
	W = model.fit_transform(arr)
	W = np.insert(W, 0, boardGameIds, axis=1)

	np.savetxt("data/reducedGameFeatures.csv", W, delimiter=",")
开发者ID:hutchasaurus1,项目名称:boardGameRecommender,代码行数:29,代码来源:dimensionalityReduction.py

示例15: extract_tfidf_nmf_feats

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import fit_transform [as 别名]
 def extract_tfidf_nmf_feats(self, df_data, n_components):
     """
     Extract tfidf features using nmf.     
     """        
     df_feat = pd.DataFrame(index=range(df_data.shape[0]))
     tfidf = TfidfVectorizer(ngram_range=(2, 3), stop_words='english')
     tsvd = TruncatedSVD(n_components=n_components, random_state = 2016)
     nmf = NMF(solver='cd', n_components=n_components, init='nndsvda',
                 random_state=0, tol=1e-3)
     df_data['q'].to_csv('q', index=False)
     df_data['t'].to_csv('t', index=False)
     df_data['d'].to_csv('d', index=False)
     print('fitting in tfidf')
     tfidf.set_params(input='filename')        
     tfidf.fit(['q','t','d'])
     tfidf.set_params(input='content')  
     for col in ['d', 't', 'q', 'b']:
         print('process column', col)
         txt = df_data[col]
         tfidf_mat = tfidf.transform(txt)
         nd_feat = nmf.fit_transform(tfidf_mat)
         tmp = pd.DataFrame(nd_feat, columns=[col+'_tfidf_nmf_comp'+str(i) \
                                     for i in range(n_components)])
         df_feat = pd.merge(df_feat, tmp, left_index=True, right_index=True)
     saveit(df_feat, 'df_tfidf_nmf_feats')
开发者ID:amsqr,项目名称:hd,代码行数:27,代码来源:feature_generator.py


注:本文中的sklearn.decomposition.NMF.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。