本文整理汇总了Java中cc.mallet.topics.ParallelTopicModel类的典型用法代码示例。如果您正苦于以下问题:Java ParallelTopicModel类的具体用法?Java ParallelTopicModel怎么用?Java ParallelTopicModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ParallelTopicModel类属于cc.mallet.topics包,在下文中一共展示了ParallelTopicModel类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: selectTopLDAFeatures
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
/**
* Select top features in LDA topics.
*
* @param numSelFeatures Number of features to select.
* @param ldaEst LDAEstimatePr which provides an interface to an LDA model.
* @param seqAlphabet The alphabet for the sequence dataset, which may be different from the vector dataset alphabet.
* @param alphabet The vector dataset alphabet.
* @return ArrayList with the int indices of the selected features.
*/
public static ArrayList<Integer> selectTopLDAFeatures(int numSelFeatures, ParallelTopicModel lda, Alphabet alphabet) {
ArrayList<Integer> features = new ArrayList<Integer>();
Alphabet seqAlphabet = lda.getAlphabet();
int numTopics = lda.getNumTopics();
Object[][] sorted = lda.getTopWords(seqAlphabet.size());
for (int pos = 0; pos < seqAlphabet.size(); pos++) {
for (int ti = 0; ti < numTopics; ti++) {
Object feat = sorted[ti][pos].toString();
int fi = alphabet.lookupIndex(feat,false);
if ((fi >=0) && (!features.contains(fi))) {
logger.info("Selected feature: " + feat);
features.add(fi);
if (features.size() == numSelFeatures) {
return features;
}
}
}
}
return features;
}
示例2: predictValuesProbs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
public void predictValuesProbs(boolean topicCreation) {
ParallelTopicModel LDA = new ParallelTopicModel(this.numTopics, ALPHA * this.numTopics, BETA); // TODO
LDA.addInstances(this.instances);
LDA.setNumThreads(1);
LDA.setNumIterations(NUM_ITERATIONS);
LDA.setRandomSeed(43);
try {
LDA.estimate();
} catch (Exception e) {
e.printStackTrace();
}
this.docList = getMaxTopicsByDocs(LDA, this.numTopics);
System.out.println("Fetched Doc-List");
this.topicList = !topicCreation ? getMaxTermsByTopics(LDA, MAX_TERMS) : null;
System.out.println("Fetched Topic-List");
}
示例3: predictValuesProbs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
/**
* What does this boolean value signify.
* @param topicCreation
*/
public void predictValuesProbs(boolean topicCreation) {
ParallelTopicModel LDA = new ParallelTopicModel(this.numTopics, ALPHA * this.numTopics, BETA); // TODO
LDA.addInstances(this.instances);
LDA.setNumThreads(1);
LDA.setNumIterations(NUM_ITERATIONS);
LDA.setRandomSeed(43);
try {
LDA.estimate();
} catch (Exception e) {
e.printStackTrace();
}
this.docList = getMaxTopicsByDocs(LDA, this.numTopics);
System.out.println("Fetched Doc-List");
this.topicList = !topicCreation ? getMaxTermsByTopics(LDA, MAX_TERMS) : null;
System.out.println("Fetched Topic-List");
}
示例4: extractTopics
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
public void extractTopics(String inputPath, String writePathDocTopic, String writePathTopicTerm, String writePathTopicTermMatrix,
int numTopics, int maxCount) throws IOException {
this.maxCount = maxCount;
this.numTopics = numTopics;
try {
File dir = new File(inputPath);
browseDirectory(dir);
ParallelTopicModel model = getOrCreateModel();
printTopics(model, writePathDocTopic, writePathTopicTerm, writePathTopicTermMatrix);
} catch (Exception e) {
e.printStackTrace();
}
}
示例5: getMaxTopicsByDocs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
private List<Map<Integer, Double>> getMaxTopicsByDocs(ParallelTopicModel LDA, int maxTopicsPerDoc) {
List<Map<Integer, Double>> docList = new ArrayList<Map<Integer, Double>>();
int numDocs = this.instances.size();
for (int doc = 0; doc < numDocs; ++doc) {
Map<Integer, Double> topicList = new LinkedHashMap<Integer, Double>();
double[] topicProbs = LDA.getTopicProbabilities(doc);
//double probSum = 0.0;
for (int topic = 0; topic < topicProbs.length && topic < maxTopicsPerDoc; topic++) {
//if (topicProbs[topic] > 0.01) { // TODO
topicList.put(topic, topicProbs[topic]);
//probSum += topicProbs[topic];
//}
}
//System.out.println("Topic Sum: " + probSum);
Map<Integer, Double> sortedTopicList = new TreeMap<Integer, Double>(new DoubleMapComparator(topicList));
sortedTopicList.putAll(topicList);
docList.add(sortedTopicList);
}
return docList;
}
示例6: predictValuesProbs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
public void predictValuesProbs() {
ParallelTopicModel LDA = new ParallelTopicModel(this.numTopics, ALPHA * this.numTopics, BETA); // TODO
LDA.addInstances(this.instances);
LDA.setNumThreads(1);
LDA.setNumIterations(NUM_ITERATIONS);
LDA.setRandomSeed(43);
try {
LDA.estimate();
} catch (Exception e) {
e.printStackTrace();
}
this.docList = getMaxTopicsByDocs(LDA, this.numTopics);
System.out.println("Fetched Doc-List");
this.topicList = getMaxTermsByTopics(LDA, MAX_TERMS);
System.out.println("Fetched Topic-List");
}
示例7: getMaxTopicsByDocs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
private List<Map<Integer, Double>> getMaxTopicsByDocs(ParallelTopicModel LDA, int maxTopicsPerDoc) {
List<Map<Integer, Double>> docList = new ArrayList<Map<Integer, Double>>();
Map<Integer, Double> unsortedMostPopularTopics = new LinkedHashMap<Integer, Double>();
int numDocs = this.instances.size();
for (int doc = 0; doc < numDocs; ++doc) {
Map<Integer, Double> topicList = new LinkedHashMap<Integer, Double>();
double[] topicProbs = LDA.getTopicProbabilities(doc);
//double probSum = 0.0;
for (int topic = 0; topic < topicProbs.length && topic < maxTopicsPerDoc; topic++) {
if (topicProbs[topic] > TOPIC_THRESHOLD) { // TODO
double newTopicProb = topicProbs[topic];
topicList.put(topic, newTopicProb);
Double oldTopicProb = unsortedMostPopularTopics.get(topic);
unsortedMostPopularTopics.put(topic, oldTopicProb == null ? newTopicProb : oldTopicProb.doubleValue() + newTopicProb);
//probSum += topicProbs[topic];
}
}
//System.out.println("Topic Sum: " + probSum);
Map<Integer, Double> sortedTopicList = new TreeMap<Integer, Double>(new DoubleMapComparator(topicList));
sortedTopicList.putAll(topicList);
docList.add(sortedTopicList);
}
Map<Integer, Double> sortedMostPopularTopics = new TreeMap<Integer, Double>(new DoubleMapComparator(unsortedMostPopularTopics));
sortedMostPopularTopics.putAll(unsortedMostPopularTopics);
for (Map.Entry<Integer, Double> entry : sortedMostPopularTopics.entrySet()) {
if (this.mostPopularTopics.size() < MAX_RECOMMENDATIONS) {
this.mostPopularTopics.put(entry.getKey(), entry.getValue());
}
}
return docList;
}
示例8: getMaxTopicsByDocs
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
/**
* What does this function returns.
* @param LDA
* @param maxTopicsPerDoc
* @return
*/
private List<Map<Integer, Double>> getMaxTopicsByDocs(ParallelTopicModel LDA, int maxTopicsPerDoc){
List<Map<Integer, Double>> docList = new ArrayList<Map<Integer, Double>>();
Map<Integer, Double> unsortedMostPopularTopics = new LinkedHashMap<Integer, Double>();
int numDocs = this.instances.size();
for (int doc = 0; doc < numDocs; ++doc) {
Map<Integer, Double> topicList = new LinkedHashMap<Integer, Double>();
double[] topicProbs = LDA.getTopicProbabilities(doc);
//double probSum = 0.0;
for (int topic = 0; topic < topicProbs.length && topic < maxTopicsPerDoc; topic++) {
if (topicProbs[topic] > TOPIC_THRESHOLD) { // TODO
double newTopicProb = topicProbs[topic];
topicList.put(topic, newTopicProb);
Double oldTopicProb = unsortedMostPopularTopics.get(topic);
unsortedMostPopularTopics.put(topic, oldTopicProb == null ? newTopicProb : oldTopicProb.doubleValue() + newTopicProb);
//probSum += topicProbs[topic];
}
}
//System.out.println("Topic Sum: " + probSum);
Map<Integer, Double> sortedTopicList = new TreeMap<Integer, Double>(new DoubleMapComparator(topicList));
sortedTopicList.putAll(topicList);
docList.add(sortedTopicList);
}
Map<Integer, Double> sortedMostPopularTopics = new TreeMap<Integer, Double>(new DoubleMapComparator(unsortedMostPopularTopics));
sortedMostPopularTopics.putAll(unsortedMostPopularTopics);
for (Map.Entry<Integer, Double> entry : sortedMostPopularTopics.entrySet()) {
if (this.mostPopularTopics.size() < MAX_RECOMMENDATIONS) {
this.mostPopularTopics.put(entry.getKey(), entry.getValue());
}
}
return docList;
}
示例9: getOrCreateModel
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
private ParallelTopicModel getOrCreateModel(String directoryPath) throws Exception {
File directory = new File(directoryPath);
if (!directory.exists()) {
directory.mkdir();
}
File file = new File(directory, "mallet-lda.model");
ParallelTopicModel model = null;
if (!file.exists() || !keepOldModel) {
model = createNewModel();
model.write(file);
} else {
model = ParallelTopicModel.read(file);
}
return model;
}
示例10: createLDAModel
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
/**
* Creates the LDA model on the specified document corpus
* @param texts a list of documents
* @param numTopics the number of desired documents
* @param numIterations the number of LDA iterationss
* @return An LDA topic model
* @throws IOException
*/
private ParallelTopicModel createLDAModel(List<String> texts, int numTopics, int numIterations) throws IOException
{
InstanceList instanceList = createInstanceList(texts);
ParallelTopicModel model = new ParallelTopicModel(numTopics);
model.addInstances(instanceList);
model.setNumIterations(numIterations);
model.estimate();
return model;
}
示例11: LDA
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
public LDA(int K) {
numTopics = K;
// Create a model with 100 topics, alpha_t = 0.01, beta_w = 0.01
double alpha_t = 0.01, beta_w = 0.01;
_model = new ParallelTopicModel(numTopics, numTopics*alpha_t, beta_w);
}
示例12: loadModel
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
@Override
public void loadModel(String serializedModelFile) throws Exception {
_model = ParallelTopicModel.read(new File(serializedModelFile));
numTopics = _model.getNumTopics();
}
示例13: createNewModel
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
private ParallelTopicModel createNewModel() throws IOException {
InstanceList instanceList = createInstanceList(textList);
//int numTopics = instanceList.size() / 2;
ParallelTopicModel model = new ParallelTopicModel(numTopics);
System.out.println(" NUMBER OF TOPICS "+numTopics);
model.addInstances(instanceList);
//model.beta = this.beta;
model.setNumIterations(this.numIterations);
model.setOptimizeInterval(this.optimizeInterval);
model.setNumThreads(4);
if (loggingHandler!=null) model.logger.addHandler(loggingHandler);
model.estimate();
System.out.println("Model log likelihood: " + model.modelLogLikelihood());
return model;
}
示例14: printTopics
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
public void printTopics(ParallelTopicModel model, String writePathDocTopic, String writePathTopicTerm, String writePathTopicTermMatrix) throws Exception {
ArrayList<String> topicKeys = new ArrayList<String>();
BufferedWriter writerDocTopic = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathDocTopic), "UTF8"));
BufferedWriter writerTopicTerm = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathTopicTerm), "UTF8"));
File file = new File(writePathTopicTerm);
String path = file.getName().substring(0, file.getName().length()-4) + "-T" + String.valueOf(maxCount) + ".txt";
String parentPath = new File(writePathTopicTerm).getParentFile().getAbsolutePath();
BufferedWriter writerTopicTermShort = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(parentPath,path))));
BufferedWriter writerTopicTermMatrix = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(writePathTopicTermMatrix), "UTF8"));
/* Write header */
writerDocTopic.write("Class,Document");
for(int j = 0; j < model.numTopics; j++) {
writerDocTopic.write(",T" + j);
}
writerDocTopic.newLine();
/* Write document-topic probabilities to file */
for(int i=0;i<this.textList.size(); i++){
double[] topicProbs = model.getTopicProbabilities(i);
//writerDocTopic.write(i + ",");
String docName = this.idDocMapping.get(i);
writerDocTopic.write(this.classDocMapping.get(docName) + ",");
writerDocTopic.write(docName);
for(int j=0; j < topicProbs.length; j++){
writerDocTopic.write("," + topicProbs[j]);
}
writerDocTopic.newLine();
}
/* Write topic-term probabilities to file */
// Alphabet alphabet = model.getAlphabet();
// for (int i = 0; i < model.getSortedWords().size(); i++) {
// writerTopicTermMatrix.write("TOPIC " + i + ": ");
// /**topic for the label*/
// TreeSet<IDSorter> set = model.getSortedWords().get(i);
// for (IDSorter s : set) {
//
// }
// writerTopicTerm.newLine();
// writerTopicTermShort.newLine();
// }
//
/* Write topic term associations */
Alphabet alphabet = model.getAlphabet();
for (int i = 0; i < model.getSortedWords().size(); i++) {
writerTopicTerm.write("TOPIC " + i + ": ");
writerTopicTermShort.write("TOPIC " + i + ": ");
writerTopicTermMatrix.write("TOPIC " + i + ": ");
/**topic for the label*/
String tmpTopic = "";
int count = 0;
TreeSet<IDSorter> set = model.getSortedWords().get(i);
for (IDSorter s : set) {
if(count <= maxCount) {
writerTopicTermShort.write(alphabet.lookupObject(s.getID()) + ", " );
}
count++;
writerTopicTerm.write(alphabet.lookupObject(s.getID()) + ", ");
writerTopicTermMatrix.write(alphabet.lookupObject(s.getID()) + " (" + s.getWeight() + "), ");
/**add to topic label*/
tmpTopic += alphabet.lookupObject(s.getID()) + "\t";
}
topicKeys.add(tmpTopic);
writerTopicTerm.newLine();
writerTopicTermShort.newLine();
writerTopicTermMatrix.newLine();
}
writerTopicTermMatrix.close();
writerDocTopic.close();
writerTopicTerm.close();
writerTopicTermShort.close();
}
示例15: estimate
import cc.mallet.topics.ParallelTopicModel; //导入依赖的package包/类
/**
* Estimate a topic model for collaborative filtering data.
*
* @param <U> user type
* @param <I> item type
* @param preferences preference data
* @param k number of topics
* @param alpha alpha in model
* @param beta beta in model
* @param numIterations number of iterations
* @param burninPeriod burnin period
* @return a topic model
* @throws IOException when internal IO error occurs
*/
public static <U, I> ParallelTopicModel estimate(FastPreferenceData<U, I> preferences, int k, double alpha, double beta, int numIterations, int burninPeriod) throws IOException {
ParallelTopicModel topicModel = new ParallelTopicModel(k, alpha * k, beta);
topicModel.addInstances(new LDAInstanceList<>(preferences));
topicModel.setTopicDisplay(numIterations + 1, 0);
topicModel.setNumIterations(numIterations);
topicModel.setBurninPeriod(burninPeriod);
topicModel.setNumThreads(Runtime.getRuntime().availableProcessors());
topicModel.estimate();
return topicModel;
}