当前位置: 首页>>代码示例>>Python>>正文


Python decomposition.NMF类代码示例

本文整理汇总了Python中sklearn.decomposition.NMF的典型用法代码示例。如果您正苦于以下问题:Python NMF类的具体用法?Python NMF怎么用?Python NMF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了NMF类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: reduceDimensionality

def reduceDimensionality(n_components=100):
	# import the csv into a pandas df
	df = pd.read_csv('data/gameData.csv')

	# Normalize the numeric columns to values in [0,1]
	numericColumns = ['maxPlayers','maxPlaytime','minAge','minPlayers','minPlaytime','playtime']
	colsToNormalize = []
	for col in numericColumns:
		if col in df.columns:
			colsToNormalize.append(col)

	df[colsToNormalize] = df[colsToNormalize].apply(lambda x: (x - x.min())/(x.max() - x.min())/2)

	# Drop string columns
	colsToDrop = ['artists','categories','designers','families','publishers','mechanics','boardGameId','yearPublished']

	# Convert df to an array for NMF and stor the board game id column to attach later
	boardGameIds = df['boardGameId']
	arr = df.as_matrix([col for col in df.columns if col not in colsToDrop])
	arr = np.nan_to_num(arr)

	# Perform NMF with n_dimensions
	model = NMF(n_components=n_components)
	W = model.fit_transform(arr)
	W = np.insert(W, 0, boardGameIds, axis=1)

	np.savetxt("data/reducedGameFeatures.csv", W, delimiter=",")
开发者ID:hutchasaurus1,项目名称:boardGameRecommender,代码行数:27,代码来源:dimensionalityReduction.py

示例2: nmf_model2

def nmf_model2(n_topics,document_term_mat):
    # print("\n\n---------\n decomposition")
    nmf = NMF(n_components=n_topics, l1_ratio=0.0)
    W_sklearn = nmf.fit_transform(document_term_mat)
    H_sklearn = nmf.components_
    # describe_nmf_results(document_term_mat, W_sklearn, H_sklearn)
    return W_sklearn, H_sklearn
开发者ID:maryamre,项目名称:final_project,代码行数:7,代码来源:main0.py

示例3: extractTemplate

def extractTemplate(y, w=d_w, h=d_h, n_components=nc):
    model = NMF(n_components=n_components, max_iter=max_iter, beta=beta)
    S = librosa.core.stft(y, n_fft=w, hop_length=h)
    model.fit_transform(np.abs(S).T)
    components = model.components_.T
    #components, activation = librosa.decompose.decompose(np.abs(S), n_components=3)
    return components
开发者ID:LemonATsu,项目名称:VIOCLA-source-separation,代码行数:7,代码来源:NMF.py

示例4: do_NMF

def do_NMF(sparse_matrix):
  t0 = time.time()
  print("* Performing NMF on sparse matrix ... ")
  nmf = NMF(n_components=3)
  coordinates = nmf.fit_transform(sparse_matrix)
  print("done in %0.3fs." % (time.time() - t0))
  return(coordinates)
开发者ID:hclent,项目名称:BioNLP-literature-tool,代码行数:7,代码来源:kmeans1.py

示例5: __Factorize_NMF

	def __Factorize_NMF(self,K):
		model = NMF(n_components=K,max_iter=self._iteration)
		model.fit(self._mat)
		user_fmat = model.fit_transform(self._mat)
		item_fmat = model.components_.T

		return user_fmat,item_fmat
开发者ID:BinXia,项目名称:KeepLearning,代码行数:7,代码来源:MatrixFactorization.py

示例6: applyNMF

 def applyNMF(self, number_of_clusters, country_specific_tweets):
     train, feature_names = self.extractFeatures(country_specific_tweets,False)
     name = "nmf"
     
     # Fit the NMF model
     if self.results:
         print("Fitting the NMF model", end=" - ")
     
     t0 = time()
     nmf = NMF(n_components=number_of_clusters, random_state=1, alpha=.1, l1_ratio=.5).fit(train)
     
     if self.results:
         print("done in %0.3fs." % (time() - t0))
     
     if self.results:
         print("\nNMF:")
     
     parameters = nmf.get_params()
     
     if self.results:
         print("Parameter: " + str(parameters))
     topics = nmf.components_
     doc_topic = nmf.transform(train)
     top10, labels = self.printTopicCluster(topics, doc_topic, feature_names)
     labels = numpy.asarray(labels)
     
     if self.results:
         print("Silhouette Coefficient {0}: {1}".format(name, metrics.silhouette_score(train, labels)))
                
     return name, parameters, top10, labels
开发者ID:michaelprummer,项目名称:datascience,代码行数:30,代码来源:clustering.py

示例7: nmf_df

def nmf_df(sym, k, coll):
    data = [ item for item in coll.find({'text': { '$in' :[re.compile(sym)] }}) ]
    sents = [ sentence['text'] for sentence in data ]
    dates = [ str(text['created_at']) for text in data ]
    d = np.array(dates).T
    d = d.reshape(len(dates), 1)

    vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
    X = vectorizer.fit_transform(sents)
    #features = vectorizer.get_feature_names()

    model = NMF(n_components=k, init='random', random_state=0)
    latent_features = model.fit_transform(X)

    # lat0 = list(latent_features[:,0])
    # lat1 = list(latent_features[:,1])
    # lat2 = list(latent_features[:,2])
    # lat3 = list(latent_features[:,3])

    df = pd.DataFrame(latent_features)   #np.concatenate((d, latent_features), axis=1)
    df.columns = [ 'lat'+ str(n) for n in xrange(len(df.columns)) ]
    df['time_stamp'] = d
    #print df.head()

    df['date'] = pd.to_datetime(df['time_stamp']).apply(pd.datetools.normalize_date)
    df.pop('time_stamp')
    #print df.head()
    grouped_data = df.groupby(['date']).mean()
    grouped_data['sym'] = sym

    return grouped_data
开发者ID:gravity226,项目名称:NASDAQ,代码行数:31,代码来源:clustering.py

示例8: tfidf_nmf

def tfidf_nmf(release_texts, n_components=10, max_features=None):
    '''
        Creates and fits tfidf and NMF models.

        INPUT:
        - n_components: number of latent features for the NMF model to find
        - max_features: max number of features (vocabulary size) for the tfidf model to consider

        OUTPUT:
        - tfidf_vectorizer: tfidf model object
        - tfidf_sparse:tfidf sparse matrix
        - nmf: NMF model object
        - W: Feature matrix output from NMF factorization into W and H matrices
    '''
    # tfidf model
    custom_stop_words = make_stop_words()
    tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words=custom_stop_words, max_features=max_features)
    tfidf_sparse = tfidf_vectorizer.fit_transform(release_texts)

    # normalize row-wise so each row sums to one
    tfidf_sparse = normalize(tfidf_sparse, axis=1, norm='l1')

    # nmf model
    nmf = NMF(n_components=n_components, random_state=1)
    nmf.fit(tfidf_sparse)
    W = nmf.transform(tfidf_sparse)
    return tfidf_vectorizer, tfidf_sparse, nmf, W
开发者ID:NashC,项目名称:startup_press_release_analyzer,代码行数:27,代码来源:model.py

示例9: hog2hognmf

def hog2hognmf(hog_feature):
    """Transform HOG feature into HOG-NMF feature.

    Parameters
    ----------
    hog_feature: np.ndarray
      HOG feature.
    """
    mat = np.zeros((500, 8), dtype=np.float32)
    NMFmodel = NMF(n_components=2, init="random", random_state=0)
    # Transform 3780 into 500 * 8
    for i in range(7):
        mat[:, i] = hog_feature[i * 500 : (i + 1) * 500]
    mat[:280, 7] = hog_feature[3500:]
    W = NMFmodel.fit_transform(mat)
    H = NMFmodel.components_
    hognmf_feature = np.array([], dtype=np.float32)
    for i in range(8):
        _sum = np.sum(H[:, i])
        if _sum == 0:
            H[:, i] *= 0.0
        else:
            H[:, i] /= _sum
        hognmf_feature = np.append(hognmf_feature, H[:, i])
    for i in range(500):
        _sum = np.sum(W[i, :])
        if _sum == 0:
            W[i, :] *= 0.0
        else:
            W[i, :] /= _sum
        hognmf_feature = np.append(hognmf_feature, W[i, :])
    return hognmf_feature
开发者ID:eriche2016,项目名称:yatpd,代码行数:32,代码来源:hog2hognmf.py

示例10: nmf

    def nmf(self, **kwargs):
        """Perform dimensionality reduction using NMF."""
        nmf = NMF(**kwargs)

        reduced_matrix = nmf.fit_transform(self.matrix)
        # TODO: it is incorrect to pass self.column_labels! There are not column labels.
        return Space(reduced_matrix, self.row_labels, self.column_labels)
开发者ID:masteradamo,项目名称:fowler.corpora,代码行数:7,代码来源:models.py

示例11: get_topics_nmf

def get_topics_nmf(urls, num_topics):
    '''Input: URL containing links to each document (pdf) in the
    corpus (i.e. arxiv)  Output: the num_topics most important latent
    topics from the corpus (via NMF)
    '''
    article_info = []
    for url in urls:
        article_info.append(get_text(url))

    text = []
    for thing in article_info:
        text.extend(thing[0])
    text = clean_pdf_text(text)

    tfidf_math = TfidfVectorizer(max_features=100, stop_words=math_stop(),
                                 ngram_range=(1, 1), decode_error='ignore')
    M = tfidf_math.fit_transform(text)

    feature_names = tfidf_math.get_feature_names()
    feature_names = [WordNetLemmatizer().lemmatize(word)
                     for word in feature_names]
    nmf = NMF(n_components=num_topics)
    nmf.fit(M)
    topics = []
    for topic_idx, topic in enumerate(nmf.components_):
        topics.append((" ".join([feature_names[i] for i in
                                topic.argsort()[:-10 - 1:-1]])))
    return M, topics, text, title_list, urls
开发者ID:ColinFerguson,项目名称:Project,代码行数:28,代码来源:math_scraping_and_recommending_functions.py

示例12: get_LDA

def get_LDA(X, num_components=10, show_topics=True):
	''' Latent Dirichlet Allication by NMF.
	21 Nov 2015, Keunwoo Choi

	LDA for a song-tag matrix. The motivation is same as get_LSI. 
	With NMF, it is easier to explain what each topic represent - by inspecting 'H' matrix,
	where X ~= X' = W*H as a result of NMF. 
	It is also good to have non-negative elements, straight-forward for both W and H.

	'''

	from sklearn.decomposition import NMF
	
	nmf = NMF(init='nndsvd', n_components=num_components, max_iter=400) # 400 is too large, but it doesn't hurt.
	W = nmf.fit_transform(X)
	H = nmf.components_
	print '='*60
	print "NMF done with k=%d, average error:%2.4f" % (num_components, nmf.reconstruction_err_/(X.shape[0]*X.shape[1]))

	term_rankings = []
	moodnames = cP.load(open(PATH_DATA + FILE_DICT['sorted_tags'], 'r')) #list, 100
	for topic_index in range( H.shape[0] ):
		top_indices = np.argsort( H[topic_index,:] )[::-1][0:10]
		term_ranking = [moodnames[i] for i in top_indices]
		term_rankings.append(term_ranking)
		if show_topics:	
			print "Topic %d: %s" % ( topic_index, ", ".join( term_ranking ) )
	print '='*60
	cP.dump(nmf, open(PATH_DATA + 'NMF_object.cP', 'w'))
	cP.dump(term_rankings, open(PATH_DATA + ('topics_strings_%d_components.cP' % num_components), 'w'))
	for row_idx, row in enumerate(W):
		if np.max(row) != 0:
			W[row_idx] = row / np.max(row)
	return W / np.max(W) # return normalised matrix, [0, 1]
	''''''
开发者ID:keunwoochoi,项目名称:magnatagatune,代码行数:35,代码来源:main_prepare.py

示例13: test_nmf_fit_close

def test_nmf_fit_close(solver):
    rng = np.random.mtrand.RandomState(42)
    # Test that the fit is not too far away
    pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
               max_iter=600)
    X = np.abs(rng.randn(6, 5))
    assert_less(pnmf.fit(X).reconstruction_err_, 0.1)
开发者ID:kjacks21,项目名称:scikit-learn,代码行数:7,代码来源:test_nmf.py

示例14: fit_nmf

def fit_nmf(tfidf):
    '''takes in a tfidf sparse vector and finds the top topics'''
    nmf = NMF(n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5)
    nmf.fit(tfidf)
    tfidf_feature_names = tfidf_vectorizer.get_feature_names()
    nmf_topic_dict = print_top_words(nmf, tfidf_feature_names, n_top_words)
    return nmf, nmf_topic_dict
开发者ID:scsherm,项目名称:Congress_work,代码行数:7,代码来源:topic_modeling2.py

示例15: produceEncoding

def produceEncoding( trainX, nComponents ):
    '''Produces an NMF encoding from the training
    data matrix'''
    model = NMF( n_components=nComponents, solver='cd', \
                tol=1e-4, max_iter=200, alpha=0.0 )
    model.fit( trainX )
    return model
开发者ID:potachen,项目名称:COS424_Project,代码行数:7,代码来源:nmf.py


注:本文中的sklearn.decomposition.NMF类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。