当前位置: 首页>>代码示例>>Java>>正文


Java LabelSequence.getFeatures方法代码示例

本文整理汇总了Java中cc.mallet.types.LabelSequence.getFeatures方法的典型用法代码示例。如果您正苦于以下问题:Java LabelSequence.getFeatures方法的具体用法?Java LabelSequence.getFeatures怎么用?Java LabelSequence.getFeatures使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.LabelSequence的用法示例。


在下文中一共展示了LabelSequence.getFeatures方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: printTheta

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
public void printTheta(ArrayList<Topication> dataset, File f, double threshold, int max) throws IOException{
	PrintWriter pw = new PrintWriter(new FileWriter(f));
	int[] topicCounts = new int[ numTopics ];
	int docLen;
	
	for (int di = 0; di < dataset.size(); di++) {
		LabelSequence topicSequence = dataset.get(di).topicSequence;
		int[] currentDocTopics = topicSequence.getFeatures();
		docLen = currentDocTopics.length;
		for (int token=0; token < docLen; token++) {
			topicCounts[ currentDocTopics[token] ]++;
		}
		pw.println(dataset.get(di).instance.getName());
		// n(t|d)+alpha(t) / docLen + alphaSum
		for (int topic = 0; topic < numTopics; topic++) {
			double prob = (double) (topicCounts[topic]+alpha[topic]) / (docLen + alphaSum);
			pw.println("topic"+ topic + "\t" + prob);
		}

		pw.println();
		Arrays.fill(topicCounts, 0);
	}
	pw.close();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:25,代码来源:LDAStream.java

示例2: sampleTopicsForOneTestDocAll

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	TIntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		localTopicCounts[oneDocTopics[position]] ++;
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];

		// Remove this token from all counts
		localTopicCounts[oldTopic] --;

		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.adjustValue(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:61,代码来源:LDAStream.java

示例3: sampleTopicsForOneTestDoc

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	TIntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		if(oneDocTopics[position] != -1) {
			localTopicCounts[oneDocTopics[position]] ++;
		}
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

		// Remove this token from all counts
    		localTopicCounts[oldTopic] --;
    		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.adjustValue(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:65,代码来源:LDAStream.java

示例4: sampleTopicsForOneDocWithTheta

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
		LabelSequence topicSequence, double[] topicDistribution) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	TIntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();
	
	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

 		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.adjustValue(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:51,代码来源:LDAStream.java

示例5: printDocumentTopics

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
/**
 *  @param pw          A print writer
 *  @param threshold   Only print topics with proportion greater than this number
 *  @param max         Print no more than this many topics
 */
public void printDocumentTopics (ArrayList<Topication> dataset, PrintWriter pw, double threshold, int max)	{
	pw.print ("#doc source topic proportion ...\n");
	int docLen;
	int[] topicCounts = new int[ numTopics ];

	IDSorter[] sortedTopics = new IDSorter[ numTopics ];
	for (int topic = 0; topic < numTopics; topic++) {
		// Initialize the sorters with dummy values
		sortedTopics[topic] = new IDSorter(topic, topic);
	}

	if (max < 0 || max > numTopics) {
		max = numTopics;
	}

	for (int di = 0; di < dataset.size(); di++) {
		LabelSequence topicSequence = dataset.get(di).topicSequence;
		int[] currentDocTopics = topicSequence.getFeatures();

		pw.print (di); pw.print (' ');

		if (dataset.get(di).instance.getSource() != null) {
			pw.print (dataset.get(di).instance.getSource());
		}
		else {
			pw.print ("null-source");
		}

		pw.print (' ');
		docLen = currentDocTopics.length;

		// Count up the tokens
		int realDocLen = 0;
		for (int token=0; token < docLen; token++) {
			if(currentDocTopics[token] != -1) {
				topicCounts[ currentDocTopics[token] ]++;
				realDocLen ++;
			}
		}
		assert(realDocLen == docLen);
           alphaSum=0.0;
		for(int topic=0; topic < numTopics; topic++){
			alphaSum+=alpha[topic];
		}
		
		// And normalize and smooth by Dirichlet prior alpha
		for (int topic = 0; topic < numTopics; topic++) {
			sortedTopics[topic].set(topic, (double) (topicCounts[topic]+alpha[topic]) / (docLen + alphaSum));
		}
    
		Arrays.sort(sortedTopics);

		for (int i = 0; i < max; i++) {
			if (sortedTopics[i].getWeight() < threshold) { break; }

			pw.print (sortedTopics[i].getID() + " " +
					  sortedTopics[i].getWeight() + " ");
		}
		pw.print (" \n");

		Arrays.fill(topicCounts, 0);
	}
       pw.close();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:70,代码来源:LDAStream.java

示例6: sampleTopicsForOneTestDocAll

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	IntIntOpenHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		localTopicCounts[oneDocTopics[position]] ++;
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];

		// Remove this token from all counts
		localTopicCounts[oldTopic] --;

		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:cmoen,项目名称:mallet,代码行数:61,代码来源:LDAStream.java

示例7: sampleTopicsForOneTestDoc

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	IntIntOpenHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		if(oneDocTopics[position] != -1) {
			localTopicCounts[oneDocTopics[position]] ++;
		}
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

		// Remove this token from all counts
    		localTopicCounts[oldTopic] --;
    		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:cmoen,项目名称:mallet,代码行数:65,代码来源:LDAStream.java

示例8: sampleTopicsForOneDocWithTheta

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
		LabelSequence topicSequence, double[] topicDistribution) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

   IntIntOpenHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();
	
	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

 		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:cmoen,项目名称:mallet,代码行数:51,代码来源:LDAStream.java

示例9: sampleTopicsForOneTestDocAll

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	IntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		localTopicCounts[oneDocTopics[position]] ++;
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];

		// Remove this token from all counts
		localTopicCounts[oldTopic] --;

		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:61,代码来源:LDAStream.java

示例10: sampleTopicsForOneTestDoc

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
		LabelSequence topicSequence) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	IntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();

	//		populate topic counts
	int[] localTopicCounts = new int[numTopics];
	for (int ti = 0; ti < numTopics; ti++){
		localTopicCounts[ti] = 0;
	}
	for (int position = 0; position < docLength; position++) {
		if(oneDocTopics[position] != -1) {
			localTopicCounts[oneDocTopics[position]] ++;
		}
	}

	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

		// Remove this token from all counts
    		localTopicCounts[oldTopic] --;
    		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		localTopicCounts[newTopic] ++;
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:65,代码来源:LDAStream.java

示例11: sampleTopicsForOneDocWithTheta

import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
		LabelSequence topicSequence, double[] topicDistribution) {
	// TODO Auto-generated method stub
	int[] oneDocTopics = topicSequence.getFeatures();

	IntIntHashMap currentTypeTopicCounts;
	int type, oldTopic, newTopic;
	double tw;
	double[] topicWeights = new double[numTopics];
	double topicWeightsSum;
	int docLength = tokenSequence.getLength();
	
	// Iterate over the positions (words) in the document
	for (int si = 0; si < docLength; si++) {
		type = tokenSequence.getIndexAtPosition(si);
		oldTopic = oneDocTopics[si];
		if(oldTopic == -1) {
			continue;
		}

 		currentTypeTopicCounts = typeTopicCounts[type];
		assert(currentTypeTopicCounts.get(oldTopic) >= 0);

		if (currentTypeTopicCounts.get(oldTopic) == 1) {
			currentTypeTopicCounts.remove(oldTopic);
		}
		else {
			currentTypeTopicCounts.addTo(oldTopic, -1);
		}
		tokensPerTopic[oldTopic]--;

		// Build a distribution over topics for this token
		Arrays.fill (topicWeights, 0.0);
		topicWeightsSum = 0;

		for (int ti = 0; ti < numTopics; ti++) {
			tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
			      * topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
			topicWeightsSum += tw;
			topicWeights[ti] = tw;
		}
		// Sample a topic assignment from this distribution
		newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);

		// Put that new topic into the counts
		oneDocTopics[si] = newTopic;
		currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
		tokensPerTopic[newTopic]++;
	}
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:51,代码来源:LDAStream.java


注:本文中的cc.mallet.types.LabelSequence.getFeatures方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。