本文整理汇总了Java中cc.mallet.topics.ParallelTopicModel.getAlphabet方法的典型用法代码示例。如果您正苦于以下问题:Java ParallelTopicModel.getAlphabet方法的具体用法?Java ParallelTopicModel.getAlphabet怎么用?Java ParallelTopicModel.getAlphabet使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.topics.ParallelTopicModel
的用法示例。
在下文中一共展示了ParallelTopicModel.getAlphabet方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: selectTopLDAFeatures
import cc.mallet.topics.ParallelTopicModel; //导入方法依赖的package包/类
/**
* Select top features in LDA topics.
*
* @param numSelFeatures Number of features to select.
* @param ldaEst LDAEstimatePr which provides an interface to an LDA model.
* @param seqAlphabet The alphabet for the sequence dataset, which may be different from the vector dataset alphabet.
* @param alphabet The vector dataset alphabet.
* @return ArrayList with the int indices of the selected features.
*/
public static ArrayList<Integer> selectTopLDAFeatures(int numSelFeatures, ParallelTopicModel lda, Alphabet alphabet) {
ArrayList<Integer> features = new ArrayList<Integer>();
Alphabet seqAlphabet = lda.getAlphabet();
int numTopics = lda.getNumTopics();
Object[][] sorted = lda.getTopWords(seqAlphabet.size());
for (int pos = 0; pos < seqAlphabet.size(); pos++) {
for (int ti = 0; ti < numTopics; ti++) {
Object feat = sorted[ti][pos].toString();
int fi = alphabet.lookupIndex(feat,false);
if ((fi >=0) && (!features.contains(fi))) {
logger.info("Selected feature: " + feat);
features.add(fi);
if (features.size() == numSelFeatures) {
return features;
}
}
}
}
return features;
}
示例2: printTopics
import cc.mallet.topics.ParallelTopicModel; //导入方法依赖的package包/类
public void printTopics(ParallelTopicModel model, String writePathDocTopic, String writePathTopicTerm, String writePathTopicTermMatrix) throws Exception {
ArrayList<String> topicKeys = new ArrayList<String>();
BufferedWriter writerDocTopic = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathDocTopic), "UTF8"));
BufferedWriter writerTopicTerm = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathTopicTerm), "UTF8"));
File file = new File(writePathTopicTerm);
String path = file.getName().substring(0, file.getName().length()-4) + "-T" + String.valueOf(maxCount) + ".txt";
String parentPath = new File(writePathTopicTerm).getParentFile().getAbsolutePath();
BufferedWriter writerTopicTermShort = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(parentPath,path))));
BufferedWriter writerTopicTermMatrix = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathTopicTermMatrix), "UTF8"));
/* Write header */
writerDocTopic.write("Class,Document");
for(int j = 0; j < model.numTopics; j++) {
writerDocTopic.write(",T" + j);
}
writerDocTopic.newLine();
/* Write document-topic probabilities to file */
for(int i=0;i<this.textList.size(); i++){
double[] topicProbs = model.getTopicProbabilities(i);
//writerDocTopic.write(i + ",");
String docName = this.idDocMapping.get(i);
writerDocTopic.write(this.classDocMapping.get(docName) + ",");
writerDocTopic.write(docName);
for(int j=0; j < topicProbs.length; j++){
writerDocTopic.write("," + topicProbs[j]);
}
writerDocTopic.newLine();
}
/* Write topic-term probabilities to file */
// Alphabet alphabet = model.getAlphabet();
// for (int i = 0; i < model.getSortedWords().size(); i++) {
// writerTopicTermMatrix.write("TOPIC " + i + ": ");
// /**topic for the label*/
// TreeSet<IDSorter> set = model.getSortedWords().get(i);
// for (IDSorter s : set) {
//
// }
// writerTopicTerm.newLine();
// writerTopicTermShort.newLine();
// }
//
/* Write topic term associations */
Alphabet alphabet = model.getAlphabet();
for (int i = 0; i < model.getSortedWords().size(); i++) {
writerTopicTerm.write("TOPIC " + i + ": ");
writerTopicTermShort.write("TOPIC " + i + ": ");
writerTopicTermMatrix.write("TOPIC " + i + ": ");
/**topic for the label*/
String tmpTopic = "";
int count = 0;
TreeSet<IDSorter> set = model.getSortedWords().get(i);
for (IDSorter s : set) {
if(count <= maxCount) {
writerTopicTermShort.write(alphabet.lookupObject(s.getID()) + ", " );
}
count++;
writerTopicTerm.write(alphabet.lookupObject(s.getID()) + ", ");
writerTopicTermMatrix.write(alphabet.lookupObject(s.getID()) + " (" + s.getWeight() + "), ");
/**add to topic label*/
tmpTopic += alphabet.lookupObject(s.getID()) + "\t";
}
topicKeys.add(tmpTopic);
writerTopicTerm.newLine();
writerTopicTermShort.newLine();
writerTopicTermMatrix.newLine();
}
writerTopicTermMatrix.close();
writerDocTopic.close();
writerTopicTerm.close();
writerTopicTermShort.close();
}