当前位置: 首页>>代码示例>>Java>>正文


Java Dataset类代码示例

本文整理汇总了Java中net.sf.javaml.core.Dataset的典型用法代码示例。如果您正苦于以下问题:Java Dataset类的具体用法?Java Dataset怎么用?Java Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Dataset类属于net.sf.javaml.core包,在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {
	Instance[] medoids = new Instance[numberOfClusters];
	Dataset[] output = new DefaultDataset[numberOfClusters];
	for (int i = 0; i < numberOfClusters; i++) {
		int random = rg.nextInt(data.size());
		medoids[i] = data.instance(random);
	}

	boolean changed = true;
	int count = 0;
	while (changed && count < maxIterations) {
		changed = false;
		count++;
		int[] assignment = assign(medoids, data);
		changed = recalculateMedoids(assignment, medoids, output, data);

	}
	System.out.print("Count: " + count + "\n");
	
	this.medoids = medoids;
	return output;

}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:24,代码来源:CustomKMedoids.java

示例2: assign

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Assign all instances from the data set to the medoids.
 * 
 * @param medoids candidate medoids
 * @param data the data to assign to the medoids
 * @return best cluster indices for each instance in the data set
 */
private int[] assign(Instance[] medoids, Dataset data) {
	int[] out = new int[data.size()];
	for (int i = 0; i < data.size(); i++) {
		double bestDistance = dm.measure(data.instance(i), medoids[0]);
		int bestIndex = 0;
		for (int j = 1; j < medoids.length; j++) {
			double tmpDistance = dm.measure(data.instance(i), medoids[j]);
			if (dm.compare(tmpDistance, bestDistance)) {
				bestDistance = tmpDistance;
				bestIndex = j;
			}
		}
		out[i] = bestIndex;

	}
	return out;

}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:26,代码来源:CustomKMedoids.java

示例3: average

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private Instance average(Dataset set) {
	
	Instance instance = null;
	double fValue = 0.0;
	for (Instance ins : set) {
		double value = 0.0;
		for (Instance subIns : set) {
			if(!ins.equals(subIns)){
				value += dm.measure(ins, subIns);
			}
		}
		
		if (instance == null || value < fValue ){
			instance = ins;
			fValue = value;
		}
	}
	
	return instance;
}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:21,代码来源:CustomKMedoids.java

示例4: clustering

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
@Override
public List[] clustering(List<Objective> objectives) {
	Dataset ds = new DefaultDataset();
	for (Objective obj : objectives) {
		ds.add(new DenseInstance(obj.getArray(),obj));
	}
	long time = System.currentTimeMillis();
	//SpearmanRankCorrelation sc = new SpearmanRankCorrelation();
	//System.out.print("Correlation " + sc.measure(ds.get(1), ds.get(2)) + "\n");
	//SpearmanRankCorrelation
	CustomKMean ckm = new CustomKMean(2, 1000, new SpearmanDistance());
	Dataset[] clusters = ckm.cluster(ds);
	System.out.print("Time taken on clustering: " + ( System.currentTimeMillis() - time) + "\n");
	
	return clusters;
}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:18,代码来源:JavaMLNeighborhood.java

示例5: main

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Tests the k-means algorithm with default parameter settings.
 */
public static void main(String[] args) throws Exception {

    /* Load a dataset */
    Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ",");
    /*
     * Create a new instance of the KMeans algorithm, with no options
     * specified. By default this will generate 4 clusters.
     */
    Clusterer km = new KMeans();
    /*
     * Cluster the data, it will be returned as an array of data sets, with
     * each dataset representing a cluster
     */
    Dataset[] clusters = km.cluster(data);
    System.out.println("Cluster count: " + clusters.length);

    
    
    
    
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:25,代码来源:TutorialKMeans.java

示例6: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * XXX add doc
 */
public Dataset[] cluster(Dataset data) {
    KMeans km = new KMeans(this.kMin, this.iterations, this.dm);
    Dataset[] bestClusters = km.cluster(data);
    double bestScore = this.ce.score(bestClusters);
    for (int i = kMin + 1; i <= kMax; i++) {
        km = new KMeans(i, this.iterations, this.dm);
        Dataset[] tmpClusters = km.cluster(data);
        double tmpScore = this.ce.score(tmpClusters);
        if (this.ce.compareScore(bestScore, tmpScore)) {
            bestScore = tmpScore;
            bestClusters = tmpClusters;
        }
    }
    return bestClusters;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:IterativeKMeans.java

示例7: assign

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Assign all instances from the data set to the medoids.
 * 
 * @param medoids candidate medoids
 * @param data the data to assign to the medoids
 * @return best cluster indices for each instance in the data set
 */
private int[] assign(Instance[] medoids, Dataset data) {
	LOGGER.log( Level.INFO, "Assign all instances from the data set to the medoids.");
	int[] out = new int[data.size()];
	for (int i = 0; i < data.size(); i++) {
		double bestDistance = dm.measure(data.instance(i), medoids[0]);
		int bestIndex = 0;
		for (int j = 1; j < medoids.length; j++) {
			double tmpDistance = dm.measure(data.instance(i), medoids[j]);
			LOGGER.log(Level.FINE, "Distance:{0}",tmpDistance);
			if (dm.compare(tmpDistance, bestDistance)) {
				bestDistance = tmpDistance;
				bestIndex = j;
			}
		}
		out[i] = bestIndex;

	}
	return out;

}
 
开发者ID:eracle,项目名称:gap,代码行数:28,代码来源:KMedoids.java

示例8: recalculateMedoids

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Return a array with on each position the clusterIndex to which the
 * Instance on that position in the dataset belongs.
 * 
 * @param medoids
 *            the current set of cluster medoids, will be modified to fit
 *            the new assignment
 * @param assignment
 *            the new assignment of all instances to the different medoids
 * @param output
 *            the cluster output, this will be modified at the end of the
 *            method
 * @return the
 */
public boolean recalculateMedoids(int[] assignment, Instance[] medoids,
		Dataset[] output, Dataset data) {
	boolean changed = false;
	for (int i = 0; i < numberOfClusters; i++) {
		output[i] = new DefaultDataset();
		for (int j = 0; j < assignment.length; j++) {
			if (assignment[j] == i) {
				output[i].add(data.instance(j));
			}
		}
		if (output[i].size() == 0) { // new random, empty medoid
			medoids[i] = data.instance(rg.nextInt(data.size()));
			changed = true;
		} else {
			Instance centroid = DatasetTools.average(output[i]);
			Instance oldMedoid = medoids[i];
			medoids[i] = data.kNearest(1, centroid, dm).iterator().next();
			if (!medoids[i].equals(oldMedoid))
				changed = true;
		}
	}
	return changed;
}
 
开发者ID:eracle,项目名称:gap,代码行数:38,代码来源:KMedoids.java

示例9: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * XXX DOC
 */
public double score(Dataset[] datas) {
   
    double sum=0;
    for(int i=0;i<datas.length;i++){
        double tmpSum=0;
        for(int j=0;j<datas[i].size();j++){
            for(int k=0;k<datas[i].size();k++){
                double error=dm.measure(datas[i].instance(j),datas[i].instance(k));
                tmpSum+=error;
            }  
        }
        sum+=tmpSum/datas[i].size();
    }
   return sum;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:SumOfAveragePairwiseSimilarities.java

示例10: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
    * XXX DOC
    */
public double score(Dataset[] datas) {

	Instance[] centroids = new Instance[datas.length];
	for (int i = 0; i < datas.length; i++) {
		centroids[i] = DatasetTools.average(datas[i]);
	}
	double sum = 0;
	for (int i = 0; i < datas.length; i++) {
		for (int j = 0; j < datas[i].size(); j++) {
			double error = dm.measure(datas[i].instance(j),
					centroids[i]);
			sum += error;
		}
	}
	return sum;
}
 
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:SumOfCentroidSimilarities.java

示例11: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double score(Dataset[] clusters) {
	// number of free parameters K
	double k = 1;
	// sampelsize N
	double datasize = 0;

	for (int i = 0; i < clusters.length; i++) {
		datasize += clusters[i].size();
	}
	LogLikelihoodFunction likelihood = new LogLikelihoodFunction();
	// loglikelihood log(L)
	double l = likelihood.loglikelihoodsum(clusters);
	// BIC score
	double bic = -2 * l + Math.log10(datasize) * k;
	return bic;
}
 
开发者ID:eracle,项目名称:gap,代码行数:17,代码来源:BICScore.java

示例12: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
    * XXX add doc
    */
public Dataset[] cluster(Dataset data) {
	KMeans km = new KMeans(this.clusters, this.iterations,
			this.dm);
	Dataset[] bestClusters = km.cluster(data);
	double bestScore = this.ce.score(bestClusters);
	for (int i = 0; i < repeats; i++) {
		Dataset[] tmpClusters = km.cluster(data);
		double tmpScore = this.ce.score(tmpClusters);
		if (this.ce.compareScore(bestScore, tmpScore)) {
			bestScore = tmpScore;
			bestClusters = tmpClusters;
		}
	}
	return bestClusters;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:MultiKMeans.java

示例13: transformDataset

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private static svm_problem transformDataset(Dataset data) {
	svm_problem p = new svm_problem();
	p.l = data.size();
	p.y = new double[data.size()];
	p.x = new svm_node[data.size()][];
	int tmpIndex = 0;
	for (int j = 0; j < data.size(); j++) {
		Instance tmp = data.instance(j);
		p.y[tmpIndex] = data.classIndex(tmp.classValue());
		p.x[tmpIndex] = new svm_node[tmp.keySet().size()];
		int i = 0;
		SortedSet<Integer> tmpSet = tmp.keySet();
		for (int index : tmpSet) {
			p.x[tmpIndex][i] = new svm_node();
			p.x[tmpIndex][i].index = index;
			p.x[tmpIndex][i].value = tmp.value(index);
			i++;
		}
		tmpIndex++;
	}
	return p;
}
 
开发者ID:eracle,项目名称:gap,代码行数:23,代码来源:LibSVM.java

示例14: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {

        filter.filter(data);
        m_numberOfClusters = -1;
        m_cobwebTree = null;
        m_numberSplits = 0;
        m_numberMerges = 0;
        for (int i = 0; i < data.size(); i++) {
            updateClusterer(data.instance(i));
        }
        determineNumberOfClusters();
        // printNode(m_cobwebTree, 0);

        Vector<Dataset> clusters = new Vector<Dataset>();
        createClusters(m_cobwebTree, clusters);
        Dataset[] out = new Dataset[clusters.size()];
        clusters.toArray(out);
        return out;
    }
 
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:Cobweb.java

示例15: logLikelihood

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double logLikelihood(Dataset cluster) {
	double instanceLength = cluster.instance(0).size();
	this.count = instanceLength * cluster.size();
	sum = 0;
	sum2 = 0;

	for (int row = 0; row < cluster.size(); row++) {
		for (int column = 0; column < instanceLength; column++) {
			sum += cluster.instance(row).value(column);
			sum2 += cluster.instance(row).value(column)
					* cluster.instance(row).value(column);
		}
	}

	double loglikelihood = logLikelihoodFunction(count, sum, sum2);
	if (loglikelihood == Double.NEGATIVE_INFINITY
			|| loglikelihood == Double.POSITIVE_INFINITY) {
		loglikelihood = 0;
	}
	return (loglikelihood);
}
 
开发者ID:eracle,项目名称:gap,代码行数:22,代码来源:LogLikelihoodFunction.java


注:本文中的net.sf.javaml.core.Dataset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。