本文整理汇总了Java中cc.mallet.cluster.Clustering类的典型用法代码示例。如果您正苦于以下问题:Java Clustering类的具体用法?Java Clustering怎么用?Java Clustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Clustering类属于cc.mallet.cluster包,在下文中一共展示了Clustering类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pipe
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public Instance pipe(Instance carrier) {
AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
.getData();
Clustering original = neighbor.getOriginal();
int[] cluster1 = neighbor.getOldClusters()[0];
int[] cluster2 = neighbor.getOldClusters()[1];
InstanceList list = original.getInstances();
int[] mergedIndices = neighbor.getNewCluster();
Record[] records = array2Records(mergedIndices, list);
Alphabet fieldAlph = records[0].fieldAlphabet();
Alphabet valueAlph = records[0].valueAlphabet();
PropertyList features = null;
features = addExactMatch(records, fieldAlph, valueAlph, features);
features = addApproxMatch(records, fieldAlph, valueAlph, features);
features = addSubstringMatch(records, fieldAlph, valueAlph, features);
carrier
.setData(new FeatureVector(getDataAlphabet(), features,
true));
LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
String label = (original.getLabel(cluster1[0]) == original
.getLabel(cluster2[0])) ? "YES" : "NO";
carrier.setTarget(ldict.lookupLabel(label));
return carrier;
}
示例2: getEvaluationScores
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
int correct = 0;
int comparisons = 0;
for (int i = 0; i < truth.getNumInstances(); i++)
for (int j = i + 1; j < truth.getNumInstances(); j++) {
if ((truth.getLabel(i) == truth.getLabel(j)) ==
(predicted.getLabel(i) == predicted.getLabel(j)))
correct++;
comparisons++;
}
this.correctTotal += correct;
this.comparisonsTotal += comparisons;
return new double[]{(double)correct / comparisons};
}
示例3: sampleClustering
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
* Sample a InstanceList and its true clustering.
* @param alph
* @return
*/
private Clustering sampleClustering (Alphabet alph) {
InstanceList instances =
new InstanceList(random,
alph,
new String[]{"foo", "bar"},
30).subList(0, 20);
Clustering singletons = ClusterUtils.createSingletonClustering(instances);
// Merge instances that both have feature0
for (int i = 0; i < instances.size(); i++) {
FeatureVector fvi = (FeatureVector)instances.get(i).getData();
for (int j = i + 1; j < instances.size(); j++) {
FeatureVector fvj = (FeatureVector)instances.get(j).getData();
if (fvi.contains("feature0") && fvj.contains("feature0")) {
singletons = ClusterUtils.mergeClusters(singletons,
singletons.getLabel(i),
singletons.getLabel(j));
} else if (!(fvi.contains("feature0") || fvj.contains("feature0"))
&& random.nextUniform() < noise) {
// Random noise.
singletons = ClusterUtils.mergeClusters(singletons,
singletons.getLabel(i),
singletons.getLabel(j));
}
}
}
return singletons;
}
示例4: getMedWeights
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private double[] getMedWeights(int medIdx,int[] indices,Clustering original)
{
double result[] = new double[indices.length];
for(int i=0;i<result.length;i++)
{
if(medIdx==i)
result[i]=1;
else
{
AgglomerativeNeighbor an = new AgglomerativeNeighbor(original,original,indices[medIdx],indices[i]);
result[i] = getScore(an);
}
}
return result;
}
示例5: getCentroid
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private int getCentroid(int[] indices,Clustering original)
{
if(indices.length<2)
return 0;
//return indices[0];
double centDist=Double.NEGATIVE_INFINITY;
int centIdx=-1;
double[] scores = new double[indices.length];
for(int i=0;i<indices.length;i++)
{
double acc=0;
for(int k=0;k<indices.length;k++)
{
if(i==k)break;
AgglomerativeNeighbor pwn = new AgglomerativeNeighbor(original,original,indices[i],indices[k]);
double score=getScore(pwn);
acc+=score;
//scores[i] = getScore(pwn);
}
acc/=(indices.length-1);
scores[i]=acc;
}
for(int i=0;i<scores.length;i++)
{
if(scores[i]>centDist)
{
centDist=scores[i];
centIdx=i;
//centIdx=indices[i];
}
}
return centIdx;
}
示例6: AgglomerativeNeighbor
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
*
* @param original
* @param modified
* @param cluster1 Instance indices for one cluster that was merged.
* @param cluster2 Instance indices for other cluster that was merged.
* @return
*/
public AgglomerativeNeighbor (Clustering original,
Clustering modified,
int[][] oldClusters) {
super(original, modified);
if (oldClusters.length != 2)
throw new IllegalArgumentException("Agglomerations of more than 2 clusters not yet implemented.");
this.oldClusters = oldClusters;
this.newCluster = ArrayUtils.append(oldClusters[0], oldClusters[1]);
}
示例7: getEvaluationScores
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
// Precision = \sum_i [ |siprime| - |pOfsiprime| ] / \sum_i [ |siprime| - 1 ]
// where siprime is a predicted cluster, pOfsiprime is the set of
// true clusters that contain elements of siprime.
int numerator = 0;
int denominator = 0;
for (int i = 0; i < predicted.getNumClusters(); i++) {
int[] siprime = predicted.getIndicesWithLabel(i);
HashSet<Integer> pOfsiprime = new HashSet<Integer>();
for (int j = 0; j < siprime.length; j++)
pOfsiprime.add(truth.getLabel(siprime[j]));
numerator += siprime.length - pOfsiprime.size();
denominator += siprime.length - 1;
}
precisionNumerator += numerator;
precisionDenominator += denominator;
double precision = (double)numerator / denominator;
// Recall = \sum_i [ |si| - |pOfsi| ] / \sum_i [ |si| - 1 ]
// where si is a true cluster, pOfsi is the set of predicted
// clusters that contain elements of si.
numerator = denominator = 0;
for (int i = 0; i < truth.getNumClusters(); i++) {
int[] si = truth.getIndicesWithLabel(i);
HashSet<Integer> pOfsi = new HashSet<Integer>();
for (int j = 0; j < si.length; j++)
pOfsi.add(new Integer(predicted.getLabel(si[j])));
numerator += si.length - pOfsi.size();
denominator += si.length - 1;
}
recallNumerator += numerator;
recallDenominator += denominator;
double recall = (double)numerator / denominator;
return new double[]{precision,recall,(2 * precision * recall / (precision + recall))};
}
示例8: getEvaluationScores
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
double precision = 0.0;
double recall = 0.0;
InstanceList instances = truth.getInstances();
for (int i = 0; i < instances.size(); i++) {
int trueLabel = truth.getLabel(i);
int predLabel = predicted.getLabel(i);
int[] trueIndices = truth.getIndicesWithLabel(trueLabel);
int[] predIndices = predicted.getIndicesWithLabel(predLabel);
int correct = 0;
for (int j = 0; j < predIndices.length; j++) {
for (int k = 0; k < trueIndices.length; k++)
if (trueIndices[k] == predIndices[j])
correct++;
}
precision += (double)correct / predIndices.length;
recall += (double)correct / trueIndices.length;
}
macroPrecision += precision;
macroRecall += recall;
macroNumInstances += instances.size();
precision /= instances.size();
recall /= instances.size();
return new double[]{precision, recall, (2 * precision * recall / (precision + recall))};
}
示例9: generatePredicted
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
private Clustering[] generatePredicted (InstanceList instances) {
Clustering[] clusterings = new Clustering[4];
clusterings[0] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,1,1,1,1,1});
clusterings[1] = new Clustering(instances, 2, new int[]{0,0,0,0,0,1,1,0,0,0,0,0});
clusterings[2] = new Clustering(instances, 1, new int[]{0,0,0,0,0,0,0,0,0,0,0,0});
clusterings[3] = new Clustering(instances, 12, new int[]{0,1,2,3,4,5,6,7,8,9,10,11});
return clusterings;
}
示例10: testEvaluators
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public void testEvaluators ()
{
InstanceList instances = new InstanceList(new Randoms(1), 100, 2).subList(0,12);
System.err.println(instances.size() + " instances");
Clustering truth = generateTruth(instances);
System.err.println("truth=" + truth);
Clustering[] predicted = generatePredicted(instances);
ClusteringEvaluator pweval = new PairF1Evaluator();
ClusteringEvaluator bceval = new BCubedEvaluator();
ClusteringEvaluator muceval = new MUCEvaluator();
for (int i = 0; i < predicted.length; i++) {
System.err.println("\npred" + i + "=" + predicted[i]);
System.err.println("pairs: " + pweval.evaluate(truth, predicted[i]));
System.err.println("bcube: " + bceval.evaluate(truth, predicted[i]));
System.err.println(" muc: " + muceval.evaluate(truth, predicted[i]));
}
System.err.println("totals:");
System.err.println("pairs: " + pweval.evaluateTotals());
System.err.println("bcube: " + bceval.evaluateTotals());
System.err.println(" muc: " + muceval.evaluateTotals());
assertTrue(pweval.evaluateTotals().matches(".*f1=0\\.5550.*"));
assertTrue(bceval.evaluateTotals().matches(".*f1=0\\.7404.*"));
assertTrue(muceval.evaluateTotals().matches(".*f1=0\\.8059.*"));
}
示例11: getEvaluationScores
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
@Override
public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
int tp, fn, fp;
tp = fn = fp = 0;
for (int i = 0; i < predicted.getNumClusters(); i++) {
int[] predIndices = predicted.getIndicesWithLabel(i);
for (int j = 0; j < predIndices.length; j++)
for (int k = j + 1; k < predIndices.length; k++)
if (truth.getLabel(predIndices[j]) == truth.getLabel(predIndices[k]))
tp++;
else
fp++;
}
for (int i = 0; i < truth.getNumClusters(); i++) {
int[] trueIndices = truth.getIndicesWithLabel(i);
for (int j = 0; j < trueIndices.length; j++)
for (int k = j + 1; k < trueIndices.length; k++)
if (predicted.getLabel(trueIndices[j]) != predicted.getLabel(trueIndices[k]))
fn++;
}
double pr = (double)tp / (tp+fp);
double rec = (double)tp / (tp+fn);
double f1 = 2*pr*rec/(pr+rec);
this.tpTotal += tp;
this.fpTotal += fp;
this.fnTotal += fn;
return new double[]{pr, rec, f1};
}
示例12: evaluate
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
*
* @param truth
* @param predicted
* @return A String summarizing the evaluation metric.
*/
public String evaluate (Clustering truth, Clustering predicted) {
String results = "";
for (int i = 0; i < evaluators.length; i++) {
String name = evaluators[i].getClass().getName();
results += name.substring(name.lastIndexOf('.') + 1) + ": " +
evaluators[i].evaluate(truth, predicted) + "\n";
}
return results;
}
示例13: mergeInstances
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
* Merge clusters containing the specified instances.
* @param clustering
* @param instances
* @return Modified Clustering.
*/
public static Clustering mergeInstances (Clustering clustering,
int[] instances) {
for (int i = 0; i < instances.length; i++) {
for (int j = i + 1; j < instances.length; j++) {
int labeli = clustering.getLabel(instances[i]);
int labelj = clustering.getLabel(instances[j]);
clustering = mergeClusters(clustering, labeli, labelj);
}
}
return clustering;
}
示例14: getCombinedInstances
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
public static int[] getCombinedInstances (Clustering clustering, int i, int j) {
int[] ci = clustering.getIndicesWithLabel(i);
int[] cj = clustering.getIndicesWithLabel(j);
int[] merged = new int[ci.length + cj.length];
System.arraycopy(ci, 0, merged, 0, ci.length);
System.arraycopy(cj, 0, merged, ci.length, cj.length);
return merged;
}
示例15: createSingletonClustering
import cc.mallet.cluster.Clustering; //导入依赖的package包/类
/**
* Initializes Clustering to one Instance per cluster.
* @param instances
* @return Singleton Clustering.
*/
public static Clustering createSingletonClustering (InstanceList instances) {
int[] labels = new int[instances.size()];
for (int i = 0; i < labels.length; i++)
labels[i] = i;
return new Clustering(instances,
labels.length,
labels);
}