当前位置: 首页>>代码示例>>Python>>正文


Python TruncatedSVD.partial_fit方法代码示例

本文整理汇总了Python中sklearn.decomposition.TruncatedSVD.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python TruncatedSVD.partial_fit方法的具体用法?Python TruncatedSVD.partial_fit怎么用?Python TruncatedSVD.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.TruncatedSVD的用法示例。


在下文中一共展示了TruncatedSVD.partial_fit方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TopicModel

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]

#.........这里部分代码省略.........
                random_state=kwargs.get('random_state', 1),
                learning_method=kwargs.get('learning_method', 'online'),
                learning_offset=kwargs.get('learning_offset', 10.0),
                batch_size=kwargs.get('batch_size', 128),
                n_jobs=kwargs.get('n_jobs', 1))
        elif model == 'lsa':
            self.model = TruncatedSVD(
                n_components=n_topics,
                algorithm=kwargs.get('algorithm', 'randomized'),
                n_iter=kwargs.get('n_iter', 5),
                random_state=kwargs.get('random_state', 1))
        else:
            msg = 'model "{}" invalid; must be {}'.format(
                model, {'nmf', 'lda', 'lsa'})
            raise ValueError(msg)

    def __repr__(self):
        return 'TopicModel(n_topics={}, model={})'.format(
            self.n_topics, str(self.model).split('(', 1)[0])

    def save(self, filename):
        _ = joblib.dump(self.model, filename, compress=3)
        logger.info('{} model saved to {}'.format(self.model, filename))

    @classmethod
    def load(cls, filename):
        model = joblib.load(filename)
        n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
        return cls(model, n_topics=n_topics)

    def fit(self, doc_term_matrix):
        self.model.fit(doc_term_matrix)

    def partial_fit(self, doc_term_matrix):
        if isinstance(self.model, LatentDirichletAllocation):
            self.model.partial_fit(doc_term_matrix)
        else:
            raise TypeError('only LatentDirichletAllocation models have partial_fit')

    def transform(self, doc_term_matrix):
        return self.model.transform(doc_term_matrix)

    @property
    def n_topics(self):
        try:
            return self.model.n_topics
        except AttributeError:
            return self.model.n_components

    def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
        """
        Transform a document-term matrix into a document-topic matrix, where rows
        correspond to documents and columns to the topics in the topic model.

        Args:
            doc_term_matrix (array-like or sparse matrix): corpus represented as a
                document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
                tf-weighting, while NMF and LSA may do better with tfidf-weighting!
            normalize (bool): if True, the values in each row are normalized,
                i.e. topic weights on each document sum to 1

        Returns:
            ``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
        """
        doc_topic_matrix = self.transform(doc_term_matrix)
        if normalize is True:
开发者ID:chartbeat-labs,项目名称:textacy,代码行数:70,代码来源:topic_model.py

示例2: TopicModel

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]
class TopicModel(object):
    """
    Args:
        model ({'nmf', 'lda', 'lsa'} or ``sklearn.decomposition.<model>``)
        n_topics (int, optional): number of topics in the model to be initialized
        kwargs:
            variety of parameters used to initialize the model; see individual
            sklearn pages for full details

    Raises:
        ValueError: if ``model`` not in ``{'nmf', 'lda', 'lsa'}`` or is not an
            NMF, LatentDirichletAllocation, or TruncatedSVD instance

    Notes:
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
    """
    def __init__(self, model, n_topics=10, **kwargs):
        if isinstance(model, (NMF, LatentDirichletAllocation, TruncatedSVD)):
            self.model = model
        else:
            self.init_model(model, n_topics=n_topics, **kwargs)

    def init_model(self, model, n_topics=10, **kwargs):
        if model == 'nmf':
            self.model = NMF(
                n_components=n_topics,
                alpha=kwargs.get('alpha', 0.1),
                l1_ratio=kwargs.get('l1_ratio', 0.5),
                max_iter=kwargs.get('max_iter', 200),
                random_state=kwargs.get('random_state', 1),
                shuffle=kwargs.get('shuffle', False))
        elif model == 'lda':
            self.model = LatentDirichletAllocation(
                n_topics=n_topics,
                max_iter=kwargs.get('max_iter', 10),
                random_state=kwargs.get('random_state', 1),
                learning_method=kwargs.get('learning_method', 'online'),
                learning_offset=kwargs.get('learning_offset', 10.0),
                batch_size=kwargs.get('batch_size', 128),
                n_jobs=kwargs.get('n_jobs', 1))
        elif model == 'lsa':
            self.model = TruncatedSVD(
                n_components=n_topics,
                algorithm=kwargs.get('algorithm', 'randomized'),
                n_iter=kwargs.get('n_iter', 5),
                random_state=kwargs.get('random_state', 1))
        else:
            msg = 'model "{}" invalid; must be {}'.format(
                model, {'nmf', 'lda', 'lsa'})
            raise ValueError(msg)

    def save(self, filename):
        _ = joblib.dump(self.model, filename, compress=3)
        logger.info('{} model saved to {}'.format(self.model, filename))

    @classmethod
    def load(cls, filename):
        model = joblib.load(filename)
        n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
        return cls(model, n_topics=n_topics)

    def fit(self, doc_term_matrix):
        self.model.fit(doc_term_matrix)

    def partial_fit(self, doc_term_matrix):
        if isinstance(self.model, LatentDirichletAllocation):
            self.model.partial_fit(doc_term_matrix)
        else:
            raise TypeError('only LatentDirichletAllocation models have partial_fit')

    def transform(self, doc_term_matrix):
        return self.model.transform(doc_term_matrix)

    @property
    def n_topics(self):
        try:
            return self.model.n_topics
        except AttributeError:
            return self.model.n_components

    def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
        """
        Transform a document-term matrix into a document-topic matrix, where rows
        correspond to documents and columns to the topics in the topic model.

        Args:
            doc_term_matrix (array-like or sparse matrix): corpus represented as a
                document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
                tf-weighting, while NMF and LSA may do better with tfidf-weighting!
            normalize (bool, optional): if True, the values in each row are normalized,
                i.e. topic weights on each document sum to 1

        Returns:
            ``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
        """
        doc_topic_matrix = self.transform(doc_term_matrix)
        if normalize is True:
            return doc_topic_matrix / np.sum(doc_topic_matrix, axis=1, keepdims=True)
#.........这里部分代码省略.........
开发者ID:markcassar,项目名称:textacy,代码行数:103,代码来源:topic_model.py

示例3: TopicModel

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]
class TopicModel(object):
    """
    Args:
        model ({'nmf', 'lda', 'lsa'} or ``sklearn.decomposition.<model>``)
        n_topics (int, optional): number of topics in the model to be initialized
        kwargs:
            variety of parameters used to initialize the model; see individual
            sklearn pages for full details

    Raises:
        ValueError: if ``model`` not in ``{'nmf', 'lda', 'lsa'}`` or is not an
            NMF, LatentDirichletAllocation, or TruncatedSVD instance

    Notes:
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html
        - http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
    """
    def __init__(self, model, n_topics=10, **kwargs):
        if isinstance(model, (NMF, LatentDirichletAllocation, TruncatedSVD)):
            self.model = model
        else:
            self.init_model(model, n_topics=n_topics, **kwargs)

    def init_model(self, model, n_topics=10, **kwargs):
        if model == 'nmf':
            self.model = NMF(
                n_components=n_topics,
                alpha=kwargs.get('alpha', 0.1),
                l1_ratio=kwargs.get('l1_ratio', 0.5),
                max_iter=kwargs.get('max_iter', 200),
                random_state=kwargs.get('random_state', 1),
                shuffle=kwargs.get('shuffle', False))
        elif model == 'lda':
            self.model = LatentDirichletAllocation(
                n_topics=n_topics,
                max_iter=kwargs.get('max_iter', 10),
                random_state=kwargs.get('random_state', 1),
                learning_method=kwargs.get('learning_method', 'online'),
                learning_offset=kwargs.get('learning_offset', 10.0),
                batch_size=kwargs.get('batch_size', 128),
                n_jobs=kwargs.get('n_jobs', 1))
        elif model == 'lsa':
            self.model = TruncatedSVD(
                n_components=n_topics,
                algorithm=kwargs.get('algorithm', 'randomized'),
                n_iter=kwargs.get('n_iter', 5),
                random_state=kwargs.get('random_state', 1))
        else:
            msg = 'model "{}" invalid; must be {}'.format(
                model, {'nmf', 'lda', 'lsa'})
            raise ValueError(msg)

    def save(self, filename):
        _ = joblib.dump(self.model, filename, compress=3)
        logger.info('{} model saved to {}'.format(self.model, filename))

    @classmethod
    def load(cls, filename):
        model = joblib.load(filename)
        n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
        return cls(model, n_topics=n_topics)

    def fit(self, doc_term_matrix):
        self.model.fit(doc_term_matrix)

    def partial_fit(self, doc_term_matrix):
        if isinstance(self.model, LatentDirichletAllocation):
            self.model.partial_fit(doc_term_matrix)
        else:
            raise TypeError('only LatentDirichletAllocation models have partial_fit')

    def transform(self, doc_term_matrix):
        return self.model.transform(doc_term_matrix)

    @property
    def n_topics(self):
        try:
            return self.model.n_topics
        except AttributeError:
            return self.model.n_components

    def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
        """
        Transform a document-term matrix into a document-topic matrix, where rows
        correspond to documents and columns to the topics in the topic model.

        Args:
            doc_term_matrix (array-like or sparse matrix): corpus represented as a
                document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
                tf-weighting, while NMF and LSA may do better with tfidf-weighting!
            normalize (bool, optional): if True, the values in each row are normalized,
                i.e. topic weights on each document sum to 1

        Returns:
            ``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
        """
        doc_topic_matrix = self.transform(doc_term_matrix)
        if normalize is True:
            return doc_topic_matrix / np.sum(doc_topic_matrix, axis=1, keepdims=True)
#.........这里部分代码省略.........
开发者ID:EricSchles,项目名称:textacy,代码行数:103,代码来源:topic_model.py


注:本文中的sklearn.decomposition.TruncatedSVD.partial_fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。