当前位置: 首页>>代码示例>>Java>>正文


Java Clustering类代码示例

本文整理汇总了Java中cc.mallet.cluster.Clustering的典型用法代码示例。如果您正苦于以下问题:Java Clustering类的具体用法?Java Clustering怎么用?Java Clustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Clustering类属于cc.mallet.cluster包,在下文中一共展示了Clustering类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pipe

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public Instance pipe(Instance carrier) {
	AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
			.getData();
	Clustering original = neighbor.getOriginal();
	int[] cluster1 = neighbor.getOldClusters()[0];
	int[] cluster2 = neighbor.getOldClusters()[1];
	InstanceList list = original.getInstances();
	int[] mergedIndices = neighbor.getNewCluster();
	Record[] records = array2Records(mergedIndices, list);
	Alphabet fieldAlph = records[0].fieldAlphabet();
	Alphabet valueAlph = records[0].valueAlphabet();

	PropertyList features = null;
	features = addExactMatch(records, fieldAlph, valueAlph, features);
	features = addApproxMatch(records, fieldAlph, valueAlph, features);
	features = addSubstringMatch(records, fieldAlph, valueAlph, features);
	carrier
			.setData(new FeatureVector(getDataAlphabet(), features,
					true));

	LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
	String label = (original.getLabel(cluster1[0]) == original
			.getLabel(cluster2[0])) ? "YES" : "NO";
	carrier.setTarget(ldict.lookupLabel(label));			
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:27,代码来源:Clusterings2Clusterer.java

示例2: getEvaluationScores

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
	int correct = 0;
	int comparisons = 0;
	
	for (int i = 0; i < truth.getNumInstances(); i++)
		for (int j = i + 1; j < truth.getNumInstances(); j++) {
			if ((truth.getLabel(i) == truth.getLabel(j)) == 
				(predicted.getLabel(i) == predicted.getLabel(j)))
				correct++;
			comparisons++;
		}

	this.correctTotal += correct;
	this.comparisonsTotal += comparisons;

	return new double[]{(double)correct / comparisons};
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:AccuracyEvaluator.java

示例3: sampleClustering

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
 * Sample a InstanceList and its true clustering.
 * @param alph
 * @return
 */
private Clustering sampleClustering (Alphabet alph) {
	InstanceList instances =
		new InstanceList(random,
										 alph,
										 new String[]{"foo", "bar"},
										 30).subList(0, 20);
	Clustering singletons = ClusterUtils.createSingletonClustering(instances);
	// Merge instances that both have feature0
	for (int i = 0; i < instances.size(); i++) {
		FeatureVector fvi = (FeatureVector)instances.get(i).getData();
		for (int j = i + 1; j < instances.size(); j++) {
			FeatureVector fvj = (FeatureVector)instances.get(j).getData();
			if (fvi.contains("feature0") && fvj.contains("feature0")) {
				singletons = ClusterUtils.mergeClusters(singletons,
																								singletons.getLabel(i),
																								singletons.getLabel(j));
			} else if (!(fvi.contains("feature0") || fvj.contains("feature0"))
								 && random.nextUniform() < noise) {
				// Random noise.
				singletons = ClusterUtils.mergeClusters(singletons,
																								singletons.getLabel(i),
																								singletons.getLabel(j));					
			}
		}
	}
	return singletons;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:33,代码来源:FirstOrderClusterExample.java

示例4: getMedWeights

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private double[] getMedWeights(int medIdx,int[] indices,Clustering original)
   {
double result[] = new double[indices.length];
for(int i=0;i<result.length;i++)
    {
	if(medIdx==i)
	    result[i]=1;
	else
	    {
		AgglomerativeNeighbor an = new AgglomerativeNeighbor(original,original,indices[medIdx],indices[i]);
		result[i] = getScore(an);
	    }
    }
return result;	
   }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:16,代码来源:MedoidEvaluator.java

示例5: getCentroid

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private int getCentroid(int[] indices,Clustering original)
   {
if(indices.length<2)
    return 0;
    //return indices[0];

double centDist=Double.NEGATIVE_INFINITY;
int centIdx=-1;
double[] scores = new double[indices.length];
for(int i=0;i<indices.length;i++)
    {
	double acc=0;
	for(int k=0;k<indices.length;k++)
	    {
		if(i==k)break;
		AgglomerativeNeighbor pwn = new AgglomerativeNeighbor(original,original,indices[i],indices[k]);
		double score=getScore(pwn);
		acc+=score;
		//scores[i] = getScore(pwn);
	    }
	acc/=(indices.length-1);
	scores[i]=acc;
    }
for(int i=0;i<scores.length;i++)
    {
	if(scores[i]>centDist)
	    {
		centDist=scores[i];
		centIdx=i;
		//centIdx=indices[i];
	    }
    }
return centIdx;
   }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:35,代码来源:MedoidEvaluator.java

示例6: AgglomerativeNeighbor

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
 *
 * @param original
 * @param modified
 * @param cluster1 Instance indices for one cluster that was merged.
 * @param cluster2 Instance indices for other cluster that was merged.
 * @return
 */
public AgglomerativeNeighbor (Clustering original,
															Clustering modified,
															int[][] oldClusters) {
	super(original, modified);
	if (oldClusters.length != 2)
		throw new IllegalArgumentException("Agglomerations of more than 2 clusters not yet implemented.");
	this.oldClusters = oldClusters;
	this.newCluster = ArrayUtils.append(oldClusters[0], oldClusters[1]);	
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:18,代码来源:AgglomerativeNeighbor.java

示例7: getEvaluationScores

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
	// Precision = \sum_i [ |siprime| - |pOfsiprime| ] / \sum_i [ |siprime| - 1 ]		
	// where siprime is a predicted cluster, pOfsiprime is the set of
	// true clusters that contain elements of siprime.
	int numerator = 0;
	int denominator = 0;
	for (int i = 0; i < predicted.getNumClusters(); i++) {
		int[] siprime = predicted.getIndicesWithLabel(i);
		HashSet<Integer> pOfsiprime = new HashSet<Integer>();
		for (int j = 0; j < siprime.length; j++) 
			pOfsiprime.add(truth.getLabel(siprime[j]));
		numerator += siprime.length - pOfsiprime.size();
		denominator += siprime.length - 1;
	}
	precisionNumerator += numerator;
	precisionDenominator += denominator;
	double precision = (double)numerator / denominator;

	// Recall = \sum_i [ |si| - |pOfsi| ] / \sum_i [ |si| - 1 ]		
	// where si is a true cluster, pOfsi is the set of predicted
	// clusters that contain elements of si.
	numerator = denominator = 0;
	for (int i = 0; i < truth.getNumClusters(); i++) {
		int[] si = truth.getIndicesWithLabel(i);
		HashSet<Integer> pOfsi = new HashSet<Integer>();
		for (int j = 0; j < si.length; j++) 
			pOfsi.add(new Integer(predicted.getLabel(si[j])));
		numerator += si.length - pOfsi.size();
		denominator += si.length - 1;
	}
	recallNumerator += numerator;
	recallDenominator += denominator;
	double recall = (double)numerator / denominator;
	return new double[]{precision,recall,(2 * precision * recall / (precision + recall))};
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:37,代码来源:MUCEvaluator.java

示例8: getEvaluationScores

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
	double precision = 0.0;
	double recall = 0.0;

	InstanceList instances = truth.getInstances();

	for (int i = 0; i < instances.size(); i++) {
		int trueLabel = truth.getLabel(i);
		int predLabel = predicted.getLabel(i);
		int[] trueIndices = truth.getIndicesWithLabel(trueLabel);
		int[] predIndices = predicted.getIndicesWithLabel(predLabel);

		int correct = 0;
		for (int j = 0; j < predIndices.length; j++) {
			for (int k = 0; k < trueIndices.length; k++)
				if (trueIndices[k] == predIndices[j])
					correct++;
		}			
		precision += (double)correct / predIndices.length;
		recall += (double)correct / trueIndices.length;		
	}

	macroPrecision += precision;
	macroRecall += recall;
	macroNumInstances += instances.size();

	precision /= instances.size();
	recall /= instances.size();
	return new double[]{precision, recall, (2 * precision * recall / (precision + recall))};
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:32,代码来源:BCubedEvaluator.java

示例9: generatePredicted

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private Clustering[] generatePredicted (InstanceList instances) {
	Clustering[] clusterings = new Clustering[4];
	clusterings[0] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,1,1,1,1,1});
	clusterings[1] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,0,0,0,0,0});
	clusterings[2] = new Clustering(instances, 1, new int[]{0,0,0,0,0,0,0,0,0,0,0,0});
	clusterings[3] = new Clustering(instances, 12, new int[]{0,1,2,3,4,5,6,7,8,9,10,11});
	return clusterings;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:9,代码来源:TestClusteringEvaluators.java

示例10: testEvaluators

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public void testEvaluators ()
{
	InstanceList instances = new InstanceList(new Randoms(1), 100, 2).subList(0,12);
	System.err.println(instances.size() + " instances");
	Clustering truth = generateTruth(instances);
	System.err.println("truth=" + truth);

	Clustering[] predicted = generatePredicted(instances);
	ClusteringEvaluator pweval = new PairF1Evaluator();
	ClusteringEvaluator bceval = new BCubedEvaluator();
	ClusteringEvaluator muceval = new MUCEvaluator();

	for (int i = 0; i < predicted.length; i++) {
		System.err.println("\npred" + i + "=" + predicted[i]);
		System.err.println("pairs: " + pweval.evaluate(truth, predicted[i]));
		System.err.println("bcube: " + bceval.evaluate(truth, predicted[i]));
		System.err.println("  muc: " + muceval.evaluate(truth, predicted[i]));
	}

	System.err.println("totals:");
	System.err.println("pairs: " + pweval.evaluateTotals());
	System.err.println("bcube: " + bceval.evaluateTotals());
	System.err.println("  muc: " + muceval.evaluateTotals());

	assertTrue(pweval.evaluateTotals().matches(".*f1=0\\.5550.*"));
	assertTrue(bceval.evaluateTotals().matches(".*f1=0\\.7404.*"));
	assertTrue(muceval.evaluateTotals().matches(".*f1=0\\.8059.*"));
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestClusteringEvaluators.java

示例11: getEvaluationScores

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
	int tp, fn, fp;
	tp = fn = fp = 0;
	
	for (int i = 0; i < predicted.getNumClusters(); i++) {
		int[] predIndices = predicted.getIndicesWithLabel(i);
		
		for (int j = 0; j < predIndices.length; j++) 
			for (int k = j + 1; k < predIndices.length; k++) 
				if (truth.getLabel(predIndices[j]) == truth.getLabel(predIndices[k]))
					tp++;
				else 
					fp++;
	}

	for (int i = 0; i < truth.getNumClusters(); i++) {
		int[] trueIndices = truth.getIndicesWithLabel(i);
		for (int j = 0; j < trueIndices.length; j++) 
			for (int k = j + 1; k < trueIndices.length; k++) 
				if (predicted.getLabel(trueIndices[j]) != predicted.getLabel(trueIndices[k]))
					fn++;
	}

	double pr = (double)tp / (tp+fp);
	double rec = (double)tp / (tp+fn);
	double f1 = 2*pr*rec/(pr+rec);
	this.tpTotal += tp;
	this.fpTotal += fp;
	this.fnTotal += fn;

	return new double[]{pr, rec, f1};
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:34,代码来源:PairF1Evaluator.java

示例12: evaluate

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
 *
 * @param truth
 * @param predicted
 * @return A String summarizing the evaluation metric.
 */
public String evaluate (Clustering truth, Clustering predicted) {
	String results = "";
	for (int i = 0; i < evaluators.length; i++) {
		String name = evaluators[i].getClass().getName();
		results += name.substring(name.lastIndexOf('.') + 1) + ": " +
							 evaluators[i].evaluate(truth, predicted) + "\n";
	}
	return results;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:16,代码来源:ClusteringEvaluators.java

示例13: mergeInstances

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
 * Merge clusters containing the specified instances.
 * @param clustering
 * @param instances
 * @return Modified Clustering.
 */
public static Clustering mergeInstances (Clustering clustering,
																				 int[] instances) {
	for (int i = 0; i < instances.length; i++) {
		for (int j = i + 1; j < instances.length; j++) {
			int labeli = clustering.getLabel(instances[i]);
			int labelj = clustering.getLabel(instances[j]);
			clustering = mergeClusters(clustering, labeli, labelj);
		}
	}		
	return clustering;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:18,代码来源:ClusterUtils.java

示例14: getCombinedInstances

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public static int[] getCombinedInstances (Clustering clustering, int i, int j) {
	int[] ci = clustering.getIndicesWithLabel(i);
	int[] cj = clustering.getIndicesWithLabel(j);
	int[] merged = new int[ci.length + cj.length];
	System.arraycopy(ci, 0, merged, 0, ci.length);
	System.arraycopy(cj, 0, merged, ci.length, cj.length);
	return merged;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:9,代码来源:ClusterUtils.java

示例15: createSingletonClustering

import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
 * Initializes Clustering to one Instance per cluster.
 * @param instances
 * @return Singleton Clustering.
 */
public static Clustering createSingletonClustering (InstanceList instances) {
	int[] labels = new int[instances.size()];
	for (int i = 0; i < labels.length; i++)
		labels[i] = i;
		return new Clustering(instances,
												labels.length,
												labels);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:ClusterUtils.java


注:本文中的cc.mallet.cluster.Clustering类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。