当前位置: 首页>>代码示例>>Python>>正文


Python LatentDirichletAllocation.tolist方法代码示例

本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.tolist方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.tolist方法的具体用法?Python LatentDirichletAllocation.tolist怎么用?Python LatentDirichletAllocation.tolist使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.LatentDirichletAllocation的用法示例。


在下文中一共展示了LatentDirichletAllocation.tolist方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: weka_lda

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import tolist [as 别名]
def weka_lda(input_dict):
    arff_file_train = StringIO(str(input_dict['train']))
    arff_file_test = StringIO(str(input_dict['test']))
    data_train, meta_train = scipy_arff.loadarff(arff_file_train)
    data_test, meta_test = scipy_arff.loadarff(arff_file_test)
    n_topics = int(input_dict['n_topics'])
    n_iter = int(input_dict['n_iter'])
    relation_name = input_dict['relation_name']
    random_state = int(input_dict['random_state'])
    keep_original_dimensions = input_dict['keep_original']
    model = LatentDirichletAllocation(n_topics=n_topics, max_iter=n_iter, random_state=random_state)
    #model = TruncatedSVD(n_components=n_topics, n_iter=n_iter, random_state=random_state)
    data_train, meta_train, data_test, meta_test = list(data_train), list(meta_train), list(data_test), list(meta_test) 
    dataTable = []
    yTable = []
    
    #add missing attributes from testset to trainset
    for instance in data_train:
        row = []
        for attribute in instance:
            row.append(attribute)
        row = row[:-1]
        for attribute_name in meta_test:
            if attribute_name not in meta_train:
                row.append(0.0)
        dataTable.append(row)
        yTable.append(instance[-1])

    splitIndex = len(dataTable)

    #add missing attributes from trainset to testset
    for instance in data_test:
        row = []
        for attribute in meta_train:
            try:
                idx = meta_test.index(attribute)
                row.append(instance[idx])
            except:
                row.append(0.0)
        row = row[:-1]
        for i, attribute_name in enumerate(meta_test):
            if attribute_name not in meta_train:
                row.append(instance[i])
        dataTable.append(row)
        yTable.append(instance[-1])

    
    dataset = numpy.array(dataTable)
    model= model.fit_transform(dataset)
    lda_list = model.tolist()
    attributes = []
    attributes_train = []
    attributes_test = []
    attribute_dict = {}
    for i in range(n_topics):
        attributes.append(('topic_' + str(i), u'REAL'))
    if keep_original_dimensions:
        attributes_train = []
        attributes_test = []
        for i, attribute in enumerate(meta_train[:-1]):
            attribute_dict[attribute] = "old_attribute_" + str(i)
            attributes_train.append(("old_attribute_" + str(i), u'REAL'))
        for i, attribute in enumerate(meta_test[:-1]):
            if attribute in attribute_dict:
                attributes_test.append((attribute_dict[attribute], u'REAL'))
            else:
                attributes_test.append(("old_test_attribute_" + str(i), u'REAL'))
        for i, row in enumerate(lda_list[:splitIndex]):
            for old_attribute in list(data_train[i])[:-1]:
                row.append(old_attribute)
        for i, row in enumerate(lda_list[splitIndex:]):
            for old_attribute in list(data_test[i])[:-1]:
                row.append(old_attribute)

    for i, row in enumerate(lda_list):
        row.append(yTable[i])
    attributes_train = attributes + attributes_train
    attributes_test  = attributes + attributes_test
    attributes_train.append(('class' , list(set(yTable))))
    attributes_test.append(('class' , list(set(yTable))))

    data_dict_train = {}
    data_dict_train['attributes'] = attributes_train
    data_dict_train['data'] = lda_list[:splitIndex]
    data_dict_train['description'] = u''
    data_dict_train['relation'] = relation_name

    data_dict_test = {}
    data_dict_test['attributes'] = attributes_test
    data_dict_test['data'] = lda_list[splitIndex:]
    data_dict_test['description'] = u''
    data_dict_test['relation'] = relation_name

    return {'test': arff.dumps(data_dict_test), 'train': arff.dumps(data_dict_train)}
开发者ID:xflows,项目名称:clowdflows,代码行数:96,代码来源:library.py


注:本文中的sklearn.decomposition.LatentDirichletAllocation.tolist方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。