当前位置: 首页>>代码示例>>Python>>正文


Python NMF.predict方法代码示例

本文整理汇总了Python中sklearn.decomposition.NMF.predict方法的典型用法代码示例。如果您正苦于以下问题:Python NMF.predict方法的具体用法?Python NMF.predict怎么用?Python NMF.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.NMF的用法示例。


在下文中一共展示了NMF.predict方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Topicmodel

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import predict [as 别名]
class Topicmodel():
    '''
    Wrapper class for different topic models
    
    '''
    def __init__(self,folder='model',modeltype='kmeans',topics=100,topwords=10):
        # the classifier, which also contains the trained BoW transformer
        self.bow = cPickle.load(open(folder+'/BoW_transformer.pickle'))
        self.folder = folder
        self.modeltype = modeltype
        self.topics = topics
        self.topwords = topwords
        if self.modeltype is 'kmeans':
            from sklearn.cluster import KMeans
            self.model = KMeans(n_clusters=topics,n_init=50)
        if self.modeltype is 'kpcakmeans':
            from sklearn.cluster import KMeans
            from sklearn.decomposition import KernelPCA
            self.model = {'kpca':KernelPCA(kernel='rbf',gamma=.1),\
                'kmeans':KMeans(n_clusters=topics,n_init=50)}
        if self.modeltype is 'nmf':
            from sklearn.decomposition import NMF
            self.model = NMF(n_components=topics)

    def fit(self,X):
        '''
        fits a topic model

        INPUT
        X   list of strings
        '''

        # transform list of strings into sparse BoW matrix
        X = self.bow['tfidf_transformer'].fit_transform(\
            self.bow['count_vectorizer'].fit_transform(X))

        # transform word to BoW index into reverse lookup table
        words = self.bow['count_vectorizer'].vocabulary_.values()
        wordidx = self.bow['count_vectorizer'].vocabulary_.keys()
        self.idx2word = dict(zip(words,wordidx))         

        # depending on the model, train
        if self.modeltype is 'kmeans':
            Xc = self.model.fit_predict(X)
        if self.modeltype is 'kpcakmeans':
            Xc = self.model['kpca'].fit_transform(X)
            Xc = self.model['kmeans'].fit_predict(Xc)
        if self.modeltype is 'nmf':
            Xc = self.model.fit_transform(X).argmax(axis=0)
        # for each cluster/topic compute covariance of word with cluster label
        # this measure is indicative of the importance of the word for the topic
        ass = zeros(self.topics)
        self.topicstats = []
        for cluster in range(self.topics): 
            # this is a binary vector, true if a data point was in this cluster
            y = double(Xc==cluster)
            # this is the covariance of the data with the cluster label
            Xcov = X.T.dot(y)
            # find the most strongly covarying (with the cluster label) words
            wordidx = reversed(Xcov.argsort()[-self.topwords:])
            topicwords = dict([(self.idx2word[idx],Xcov[idx]) for idx in wordidx])
            self.topicstats.append({'assignments':y.sum(),'clusterid':cluster,\
                'words': topicwords})

            print 'Topic %d: %3d Assignments '%(cluster,y.sum())\
                + 'Topwords: ' + ' '.join(topicwords.keys()[:10])

        datestr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        fn = self.folder+'/topicmodel-%s-'%self.modeltype +datestr+'.json'
        print "Saving model stats to "+fn
        open(fn,'wb').write(json.dumps(self.topicstats))

    def predict(self,X):
        '''
        predicts cluster assignment from list of strings
        
        INPUT
        X   list of strings
        '''
        if X is not list: X = [X]
        X = self.bow['tfidf_transformer'].transform(\
            self.bow['count_vectorizer'].transform(X))
        if self.modeltype is 'kmeans':
            return self.model.predict(X)
        if self.modeltype is 'kpcakmeans':
            return self.model['kmeans'].predict(self.model['kpca'].transform(X))
        if self.modeltype is 'nmf':
            return self.model.transform(X).argmax(axis=0)
开发者ID:christinakraus,项目名称:political-affiliation-prediction,代码行数:90,代码来源:topicmodel.py

示例2: rows

# 需要导入模块: from sklearn.decomposition import NMF [as 别名]
# 或者: from sklearn.decomposition.NMF import predict [as 别名]

#.........这里部分代码省略.........
    >>> explo.fit() # fitting!
    >>> W = explo.model.transform(explo.X_train_sc) # getting the mixture array
    >>> H = explo.X_scaler.inverse_transform(explo.model.components_) # components in the original space
    >>> plt.plot(X,H.T) # plot the two components

    """

    def __init__(self,x,**kwargs):
        """
        Parameters
        ----------
        x : array{Float64}
            the spectra organised in rows (1 row = one spectrum). The spectra should share a common X axis.

        """
        self.x = x
        #
        # Kwargs extractions
        #
        self.X_test = kwargs.get("X_test",[0.0])
        self.algorithm = kwargs.get("algorithm","PCA")
        self.test_size = kwargs.get("test_size",0.3)
        self.scaling = kwargs.get("scaling",True)
        self.scaler = kwargs.get("scaler","MinMaxScaler")
        self.rand_state = kwargs.get("rand_state",42)
        self.nb_compo = kwargs.get("n_components",2)

        if len(self.X_test) == 1:
            self.X_train, self.X_test = sklearn.model_selection.train_test_split(
            self.x, test_size=self.test_size, random_state=self.rand_state)
        elif self.X_test.shape[1] == self.x.shape[1]:
            self.X_train = np.copy(self.x)
        else:
            ValueError("You tried to provide a testing dataset that has a different number of features (in columns) than the training set. Please correct this.")

        # initialising the preprocessor scaler
        if self.scaler == "StandardScaler":
            self.X_scaler = sklearn.preprocessing.StandardScaler()
        elif self.scaler == "MinMaxScaler":
            self.X_scaler = sklearn.preprocessing.MinMaxScaler()
        else:
            InputError("Choose the scaler between MinMaxScaler and StandardScaler")

        # fitting scaler
        self.X_scaler.fit(self.X_train)

        # scaling the data in all cases, it may not be used during the fit later
        self.X_train_sc = self.X_scaler.transform(self.X_train)
        self.X_test_sc = self.X_scaler.transform(self.X_test)

    def fit(self):
        """Train the model with the indicated algorithm.

        Do not forget to tune the hyperparameters.

        """
        if self.algorithm == "PCA":
            self.model = PCA(n_components=self.nb_compo)
        elif self.algorithm == "NMF":
            self.model = NMF(n_components=self.nb_compo,init = "nndsvd")

        if self.scaling == True:
            self.model.fit(self.X_train_sc)
        else:
            self.model.fit(self.X_train)

    def refit(self):
        """Train the model with the indicated algorithm.

        Do not forget to tune the hyperparameters.

        """
        if self.scaling == True:
            self.model.fit(self.X_train_sc)
        else:
            self.model.fit(self.X_train)

    def predict(self,X):
        """Predict using the model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
            Samples.

        Returns
        -------
        C : array, shape = (n_samples,)
            Returns predicted values.

        Remark
        ------
        if self.scaling == "yes", scaling will be performed on the input X.
        """
        if self.scaling == True:
            X_sc = self.X_scaler.transform(X)
            pred_sc = self.model.predict(X_sc)
            return self.Y_scaler.inverse_transform(pred_sc.reshape(-1,1))
        else:
            return self.model.predict(self.X)
开发者ID:charlesll,项目名称:rampy,代码行数:104,代码来源:ml_exploration.py


注:本文中的sklearn.decomposition.NMF.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。