本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.tolist方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.tolist方法的具体用法?Python LatentDirichletAllocation.tolist怎么用?Python LatentDirichletAllocation.tolist使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.LatentDirichletAllocation
的用法示例。
在下文中一共展示了LatentDirichletAllocation.tolist方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: weka_lda
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import tolist [as 别名]
def weka_lda(input_dict):
arff_file_train = StringIO(str(input_dict['train']))
arff_file_test = StringIO(str(input_dict['test']))
data_train, meta_train = scipy_arff.loadarff(arff_file_train)
data_test, meta_test = scipy_arff.loadarff(arff_file_test)
n_topics = int(input_dict['n_topics'])
n_iter = int(input_dict['n_iter'])
relation_name = input_dict['relation_name']
random_state = int(input_dict['random_state'])
keep_original_dimensions = input_dict['keep_original']
model = LatentDirichletAllocation(n_topics=n_topics, max_iter=n_iter, random_state=random_state)
#model = TruncatedSVD(n_components=n_topics, n_iter=n_iter, random_state=random_state)
data_train, meta_train, data_test, meta_test = list(data_train), list(meta_train), list(data_test), list(meta_test)
dataTable = []
yTable = []
#add missing attributes from testset to trainset
for instance in data_train:
row = []
for attribute in instance:
row.append(attribute)
row = row[:-1]
for attribute_name in meta_test:
if attribute_name not in meta_train:
row.append(0.0)
dataTable.append(row)
yTable.append(instance[-1])
splitIndex = len(dataTable)
#add missing attributes from trainset to testset
for instance in data_test:
row = []
for attribute in meta_train:
try:
idx = meta_test.index(attribute)
row.append(instance[idx])
except:
row.append(0.0)
row = row[:-1]
for i, attribute_name in enumerate(meta_test):
if attribute_name not in meta_train:
row.append(instance[i])
dataTable.append(row)
yTable.append(instance[-1])
dataset = numpy.array(dataTable)
model= model.fit_transform(dataset)
lda_list = model.tolist()
attributes = []
attributes_train = []
attributes_test = []
attribute_dict = {}
for i in range(n_topics):
attributes.append(('topic_' + str(i), u'REAL'))
if keep_original_dimensions:
attributes_train = []
attributes_test = []
for i, attribute in enumerate(meta_train[:-1]):
attribute_dict[attribute] = "old_attribute_" + str(i)
attributes_train.append(("old_attribute_" + str(i), u'REAL'))
for i, attribute in enumerate(meta_test[:-1]):
if attribute in attribute_dict:
attributes_test.append((attribute_dict[attribute], u'REAL'))
else:
attributes_test.append(("old_test_attribute_" + str(i), u'REAL'))
for i, row in enumerate(lda_list[:splitIndex]):
for old_attribute in list(data_train[i])[:-1]:
row.append(old_attribute)
for i, row in enumerate(lda_list[splitIndex:]):
for old_attribute in list(data_test[i])[:-1]:
row.append(old_attribute)
for i, row in enumerate(lda_list):
row.append(yTable[i])
attributes_train = attributes + attributes_train
attributes_test = attributes + attributes_test
attributes_train.append(('class' , list(set(yTable))))
attributes_test.append(('class' , list(set(yTable))))
data_dict_train = {}
data_dict_train['attributes'] = attributes_train
data_dict_train['data'] = lda_list[:splitIndex]
data_dict_train['description'] = u''
data_dict_train['relation'] = relation_name
data_dict_test = {}
data_dict_test['attributes'] = attributes_test
data_dict_test['data'] = lda_list[splitIndex:]
data_dict_test['description'] = u''
data_dict_test['relation'] = relation_name
return {'test': arff.dumps(data_dict_test), 'train': arff.dumps(data_dict_train)}