本文整理汇总了Java中cc.mallet.types.LabelSequence.getFeatures方法的典型用法代码示例。如果您正苦于以下问题:Java LabelSequence.getFeatures方法的具体用法?Java LabelSequence.getFeatures怎么用?Java LabelSequence.getFeatures使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.LabelSequence
的用法示例。
在下文中一共展示了LabelSequence.getFeatures方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: printTheta
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
public void printTheta(ArrayList<Topication> dataset, File f, double threshold, int max) throws IOException{
PrintWriter pw = new PrintWriter(new FileWriter(f));
int[] topicCounts = new int[ numTopics ];
int docLen;
for (int di = 0; di < dataset.size(); di++) {
LabelSequence topicSequence = dataset.get(di).topicSequence;
int[] currentDocTopics = topicSequence.getFeatures();
docLen = currentDocTopics.length;
for (int token=0; token < docLen; token++) {
topicCounts[ currentDocTopics[token] ]++;
}
pw.println(dataset.get(di).instance.getName());
// n(t|d)+alpha(t) / docLen + alphaSum
for (int topic = 0; topic < numTopics; topic++) {
double prob = (double) (topicCounts[topic]+alpha[topic]) / (docLen + alphaSum);
pw.println("topic"+ topic + "\t" + prob);
}
pw.println();
Arrays.fill(topicCounts, 0);
}
pw.close();
}
示例2: sampleTopicsForOneTestDocAll
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
TIntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
localTopicCounts[oneDocTopics[position]] ++;
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.adjustValue(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例3: sampleTopicsForOneTestDoc
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
TIntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
if(oneDocTopics[position] != -1) {
localTopicCounts[oneDocTopics[position]] ++;
}
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.adjustValue(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例4: sampleTopicsForOneDocWithTheta
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
LabelSequence topicSequence, double[] topicDistribution) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
TIntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.adjustValue(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.adjustOrPutValue(newTopic, 1, 1);
tokensPerTopic[newTopic]++;
}
}
示例5: printDocumentTopics
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
/**
* @param pw A print writer
* @param threshold Only print topics with proportion greater than this number
* @param max Print no more than this many topics
*/
public void printDocumentTopics (ArrayList<Topication> dataset, PrintWriter pw, double threshold, int max) {
pw.print ("#doc source topic proportion ...\n");
int docLen;
int[] topicCounts = new int[ numTopics ];
IDSorter[] sortedTopics = new IDSorter[ numTopics ];
for (int topic = 0; topic < numTopics; topic++) {
// Initialize the sorters with dummy values
sortedTopics[topic] = new IDSorter(topic, topic);
}
if (max < 0 || max > numTopics) {
max = numTopics;
}
for (int di = 0; di < dataset.size(); di++) {
LabelSequence topicSequence = dataset.get(di).topicSequence;
int[] currentDocTopics = topicSequence.getFeatures();
pw.print (di); pw.print (' ');
if (dataset.get(di).instance.getSource() != null) {
pw.print (dataset.get(di).instance.getSource());
}
else {
pw.print ("null-source");
}
pw.print (' ');
docLen = currentDocTopics.length;
// Count up the tokens
int realDocLen = 0;
for (int token=0; token < docLen; token++) {
if(currentDocTopics[token] != -1) {
topicCounts[ currentDocTopics[token] ]++;
realDocLen ++;
}
}
assert(realDocLen == docLen);
alphaSum=0.0;
for(int topic=0; topic < numTopics; topic++){
alphaSum+=alpha[topic];
}
// And normalize and smooth by Dirichlet prior alpha
for (int topic = 0; topic < numTopics; topic++) {
sortedTopics[topic].set(topic, (double) (topicCounts[topic]+alpha[topic]) / (docLen + alphaSum));
}
Arrays.sort(sortedTopics);
for (int i = 0; i < max; i++) {
if (sortedTopics[i].getWeight() < threshold) { break; }
pw.print (sortedTopics[i].getID() + " " +
sortedTopics[i].getWeight() + " ");
}
pw.print (" \n");
Arrays.fill(topicCounts, 0);
}
pw.close();
}
示例6: sampleTopicsForOneTestDocAll
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntOpenHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
localTopicCounts[oneDocTopics[position]] ++;
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例7: sampleTopicsForOneTestDoc
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntOpenHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
if(oneDocTopics[position] != -1) {
localTopicCounts[oneDocTopics[position]] ++;
}
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例8: sampleTopicsForOneDocWithTheta
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
LabelSequence topicSequence, double[] topicDistribution) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntOpenHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
tokensPerTopic[newTopic]++;
}
}
示例9: sampleTopicsForOneTestDocAll
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDocAll(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
localTopicCounts[oneDocTopics[position]] ++;
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例10: sampleTopicsForOneTestDoc
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneTestDoc(FeatureSequence tokenSequence,
LabelSequence topicSequence) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// populate topic counts
int[] localTopicCounts = new int[numTopics];
for (int ti = 0; ti < numTopics; ti++){
localTopicCounts[ti] = 0;
}
for (int position = 0; position < docLength; position++) {
if(oneDocTopics[position] != -1) {
localTopicCounts[oneDocTopics[position]] ++;
}
}
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
// Remove this token from all counts
localTopicCounts[oldTopic] --;
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* ((localTopicCounts[ti] + alpha[ti])); // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
localTopicCounts[newTopic] ++;
tokensPerTopic[newTopic]++;
}
}
示例11: sampleTopicsForOneDocWithTheta
import cc.mallet.types.LabelSequence; //导入方法依赖的package包/类
private void sampleTopicsForOneDocWithTheta(FeatureSequence tokenSequence,
LabelSequence topicSequence, double[] topicDistribution) {
// TODO Auto-generated method stub
int[] oneDocTopics = topicSequence.getFeatures();
IntIntHashMap currentTypeTopicCounts;
int type, oldTopic, newTopic;
double tw;
double[] topicWeights = new double[numTopics];
double topicWeightsSum;
int docLength = tokenSequence.getLength();
// Iterate over the positions (words) in the document
for (int si = 0; si < docLength; si++) {
type = tokenSequence.getIndexAtPosition(si);
oldTopic = oneDocTopics[si];
if(oldTopic == -1) {
continue;
}
currentTypeTopicCounts = typeTopicCounts[type];
assert(currentTypeTopicCounts.get(oldTopic) >= 0);
if (currentTypeTopicCounts.get(oldTopic) == 1) {
currentTypeTopicCounts.remove(oldTopic);
}
else {
currentTypeTopicCounts.addTo(oldTopic, -1);
}
tokensPerTopic[oldTopic]--;
// Build a distribution over topics for this token
Arrays.fill (topicWeights, 0.0);
topicWeightsSum = 0;
for (int ti = 0; ti < numTopics; ti++) {
tw = ((currentTypeTopicCounts.get(ti) + beta) / (tokensPerTopic[ti] + betaSum))
* topicDistribution[ti]; // (/docLen-1+tAlpha); is constant across all topics
topicWeightsSum += tw;
topicWeights[ti] = tw;
}
// Sample a topic assignment from this distribution
newTopic = random.nextDiscrete (topicWeights, topicWeightsSum);
// Put that new topic into the counts
oneDocTopics[si] = newTopic;
currentTypeTopicCounts.putOrAdd(newTopic, 1, 1);
tokensPerTopic[newTopic]++;
}
}