当前位置: 首页>>代码示例>>Java>>正文


Java Instances.add方法代码示例

本文整理汇总了Java中weka.core.Instances.add方法的典型用法代码示例。如果您正苦于以下问题:Java Instances.add方法的具体用法?Java Instances.add怎么用?Java Instances.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在weka.core.Instances的用法示例。


在下文中一共展示了Instances.add方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: relationToInstances

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * Create an Instances object from the tuples provided. The Instances has
 * name `name` and every value from every tuple. The TupleDesc is provided
 * separately just to validate that all of the provided Tuples share this
 * TupleDesc.
 * @param name the name of the resulting Instances object
 * @param ts list of Tuples
 * @param td TupleDesc
 * @param fields indices identifying which fields should be included in the new Instances object.
 * @return new Instances object containing the values from all the tuples.
 */
public static Instances relationToInstances(String name, List<Tuple> ts, TupleDesc td,
        List<Integer> fields){
    ArrayList<Attribute> attrs = tupleDescToAttributeList(td, fields);
    int relationSize = ts.size();
    Instances instances = new Instances(name, attrs, relationSize);
    
    for (int i=0; i<ts.size(); i++){
        Tuple t = ts.get(i);
        if (!t.getTupleDesc().equals(td)){
            throw new RuntimeException("All TupleDescs must match.");
        }
        instances.add(i, tupleToInstance(t, attrs, fields));
    }
    
    return instances;
}
 
开发者ID:mitdbg,项目名称:imputedb,代码行数:28,代码来源:WekaUtil.java

示例2: testCOMT2

import weka.core.Instances; //导入方法依赖的package包/类
public static void testCOMT2() throws Exception{
	BestConf bestconf = new BestConf();
	Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
	trainingSet.setClassIndex(trainingSet.numAttributes()-1);
	
	Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
	samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
	samplePoints.setClassIndex(samplePoints.numAttributes()-1);
	
	COMT2 comt = new COMT2(samplePoints, COMT2Iteration);
	
	comt.buildClassifier(trainingSet);
	
	Evaluation eval = new Evaluation(trainingSet);
	eval.evaluateModel(comt, trainingSet);
	System.err.println(eval.toSummaryString());
	
	Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
	Instances bestInstances = new Instances(trainingSet,2);
	bestInstances.add(best);
	DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);
	
	//now we output the training set with the class value updated as the predicted value
	Instances output = new Instances(trainingSet, trainingSet.numInstances());
	Enumeration<Instance> enu = trainingSet.enumerateInstances();
	while(enu.hasMoreElements()){
		Instance ins = enu.nextElement();
		double[] values = ins.toDoubleArray();
		values[values.length-1] = comt.classifyInstance(ins);
		output.add(ins.copy(values));
	}
	DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}
 
开发者ID:zhuyuqing,项目名称:BestConfig,代码行数:34,代码来源:BestConf.java

示例3: retrieveMore

import weka.core.Instances; //导入方法依赖的package包/类
private Instances retrieveMore(int toGen){
	Instances retval = new Instances(this.unlabeldPool, toGen);
	for(int i=0;i<toGen;i++){
		retval.add(this.unlabeldPool.remove(rand.nextInt(this.unlabeldPool.size())));
	}
	return retval;
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:8,代码来源:COMT2.java

示例4: processCollection

import weka.core.Instances; //导入方法依赖的package包/类
@Override
public void processCollection() {

	File loc = new File(this.parent.getTargetLocation());
	String cluster = loc.getName();

	// prepare concepts
	concepts = this.parent.getComponent(ConceptExtractor.class).getConcepts();

	ConceptRanker.rankConcepts(concepts, RankingStrategy.CF, false, this.parent, "");
	ConceptMerger.mergeConcepts(concepts, conceptMapping, MergeStrategy.STEM_SW);
	Map<Concept, Set<Concept>> groupLookup = ClassifierUtils.buildConceptGroupingLookup(concepts, conceptMapping);
	this.parent.log(this, "concepts: " + this.concepts.size());

	ConceptDict cd = parent.getComponent(ConceptDict.class);
	TextRankScorer tr = parent.getComponent(TextRankScorer.class);
	tr.compute();

	// load gold data
	FilenameFilter filter = new FilenameFilter() {
		public boolean accept(File dir, String name) {
			return name.endsWith(".cmap");
		}
	};
	File goldFile = new File(documentLocation.listFiles(filter)[0].getPath());
	ConceptMap mapGold = ConceptMapReader.readFromFile(goldFile, Format.TSV);

	// create data
	this.parent.log(this, "computing features");

	URL sw = getClass().getResource("lists/stopwords_en_eval.txt");
	StemSWMatch match = new StemSWMatch(sw);
	int matched = 0;

	String topicFile = documentLocation.getParent() + "/topics.tsv";
	String clusterSizeFile = documentLocation.getParent() + "/cluster_size.txt";
	ClassifierUtils util = new ClassifierUtils(clusterSizeFile, topicFile);

	Instances data = util.createEmptyDataset("ConceptSelectionTrain");
	for (Concept c : concepts) {

		// label
		boolean isGold = false;
		for (Concept cg : mapGold.getConcepts()) {
			if (match.isMatch(cg.name, c.name)) {
				isGold = true;
				matched++;
				break;
			}
		}

		Instance instance = util.createInstance(c, isGold, cd, tr, cluster, groupLookup);
		data.add(instance);
	}

	try {
		BufferedWriter writer = new BufferedWriter(
				new FileWriter(parent.getTargetLocation() + "/" + parent.getName() + ".arff"));
		writer.write(data.toString());
		writer.close();
	} catch (IOException e) {
		e.printStackTrace();
	}
	this.parent.log(this,
			"arff-file created: " + data.numInstances() + ", " + data.numAttributes() + ", " + data.numClasses());
	this.parent.log(this, "positive instances: " + matched);
}
 
开发者ID:UKPLab,项目名称:emnlp2017-cmapsum-corpus,代码行数:68,代码来源:TrainingDataGenerator.java

示例5: getMultiDimContinuousDiv

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:BestConfig,代码行数:61,代码来源:LHSSampler.java

示例6: updateModels

import weka.core.Instances; //导入方法依赖的package包/类
/**
     * Train a model and create the feature weight.
     * This implementation will take each highlight span feedback as a "normal" 
     * feedback. Hence, we will merge the instanceDS and the feedbackDS into one 
     * training set for the new model.
     * 
     * @param sessionID
     * @param userID
     * @param varID
     * @throws Exception 
     */
    public void updateModels(String sessionID, String userID, String varID,
            SVMPredictor model) throws Exception {
        // if the model exists, do nothing
        String fn_model = getModelFileName(sessionID, userID, varID);
//        HashMap<String, Double> predictorFeatureWeightMap;
        
        if(! Util.fileExists(fn_model)) {
//            attrNameToIndexMap.put(varID, attrNameToIndexMap.size());
            
            // merge the 2 instance sets
            String fn_instanceDS = getInstanceDSFileName(sessionID, userID, varID);
            String fn_feedbackDS = getFeedbackDSFileName(sessionID, userID, varID);
            if(! Util.fileExists(fn_instanceDS)) {
                throw new UnsupportedOperationException("Training data set does not exist. "
                        + "Create the (instance) variable value data set for " +
                        fn_model + " before re-train it");
            }
            if(! Util.fileExists(fn_feedbackDS)) {
                throw new UnsupportedOperationException("Training data set does not exist. "
                        + "Create the (feedback) highlight span data set for " +
                        fn_model + " before re-train it");
            }
            
            Instances instanceDS = loadInstancesFromFile(fn_instanceDS);
            Instances feedbackDS = loadInstancesFromFile(fn_feedbackDS);
            for(int i = 0; i < feedbackDS.numInstances(); i++) {
                Instance feedbackInstance = feedbackDS.instance(i);
                instanceDS.add(feedbackInstance);
            }
            
            // train the model
            model.train((Object)instanceDS);
            
            // save model
            model.saveModel(fn_model);
//            predictors.add(model);
            
            // save feature weight + keyword weight
            String fn_featureWeight = getFeatureWeightFileName(sessionID, userID, varID);
            String[] globalFeatureName = Util.loadList(fn_globalFeatureName);
            model.saveFeatureWeights(globalFeatureName, fn_featureWeight);
//            // create a hash map for this variable's feature weight
//            predictorFeatureWeightMap = new HashMap<>();
//            List<Map.Entry<String, Double>> predictorsSortedTermWeightList = new ArrayList<>();
//            for(int i = 0; i < globalFeatureName.length; i++) {
//                predictorFeatureWeightMap.put(globalFeatureName[i], featureWeights[i]);
//                predictorsSortedTermWeightList.add(
//                        new AbstractMap.SimpleEntry<>(globalFeatureName[i], 
//                        featureWeights[i]));
//            }
//            predictorsFeatureWeightMap.add(predictorFeatureWeightMap);
//            predictorsSortedTermWeightMap.add(predictorsSortedTermWeightList);
            
            // create tuple for keyword weight list
//            String fn_keywordWeight = getKeywordFeatureWeightFileName(featureWeightFolder,
//                    varID, sessionID, userID);
//            String[][] keywordWeightTable = Util.loadTable(fn_keywordWeight);
//            List<Map.Entry<String, Double>> predictorsKeywordWeightList = new ArrayList<>();
//            for(int i = 0; i < keywordWeightTable.length; i++) {
//                predictorsKeywordWeightList.add(
//                        new AbstractMap.SimpleEntry<>(keywordWeightTable[i][0], 
//                        Double.parseDouble(keywordWeightTable[i][1])));
//            }
//            predictorsKeywordWeightMap.add(predictorsKeywordWeightList);
        }
    }
 
开发者ID:NLPReViz,项目名称:emr-nlp-server,代码行数:78,代码来源:TextFileFeedbackManager.java

示例7: transformToWeka

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * Transform features into Weka format
 * 
 * @param features
 *            Holds all features including a label, if training data is
 *            created.
 * @param datasetName
 *            Holds the data set´s name
 * @return returns the created data set
 */

public Instances transformToWeka(FeatureVectorDataSet features, String datasetName) {
	Instances dataset = defineDataset(features, datasetName);
	// Loop through all features
	for (Iterator<Record> iterator = features.get().iterator(); iterator.hasNext();) {
		Record record = iterator.next();

		// calculate feature number
		Collection<Attribute> attributes = features.getSchema().get();
		int featureNum = attributes.size();

		double[] values = new double[featureNum];
		int index = 0;
		for (Iterator<Attribute> attrIterator = attributes.iterator(); attrIterator.hasNext();) {
			Attribute attr = attrIterator.next();
			// get features
			if (!attr.equals(FeatureVectorDataSet.ATTRIBUTE_LABEL)) {
				String feature = record.getValue(attr);
				// convert to double if applicable
				if (feature != null) {
					double featureValue = Double.parseDouble(feature);
					values[index] = featureValue;
				} else {
					values[index] = 0;
				}

				index++;
			}
		}

		Instance inst = new DenseInstance(1.0, values);
		// Treat the label as a special case, which is always at the last
		// position of the dataset.
		if (datasetName.equals(this.trainingSet)) {
			String labelRecord = record.getValue(FeatureVectorDataSet.ATTRIBUTE_LABEL);
			values[index] = dataset.attribute(index).indexOfValue(labelRecord);
		}

		dataset.add(inst);
	}

	return dataset;
}
 
开发者ID:olehmberg,项目名称:winter,代码行数:54,代码来源:WekaMatchingRule.java

示例8: orderByCompactClass

import weka.core.Instances; //导入方法依赖的package包/类
/** 
 * Reorder the data by compactness of each class using Euclidean distance
 * @param data
 * @return
 */
public static Instances orderByCompactClass(Instances data) {
	Instances newData = new Instances(data, data.numInstances());
	
	// get the number of class in the data
	int nbClass = data.numClasses();
	int[] instancePerClass = new int[nbClass];
	int[] labels = new int[nbClass];
	int[] classIndex = new int[nbClass];
	double[] compactness = new double[nbClass];
	
	// sort the data base on its class
	data.sort(data.classAttribute());
	
	int start = 0;
	// get the number of instances per class in the data
	for (int i = 0; i < nbClass; i++) {
		instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i];
		labels[i] = i;
		if (i > 0) 
			classIndex[i] = classIndex[i-1] + instancePerClass[i-1];
		int end = start + instancePerClass[i];
		int counter = 0;
		double[][] dataPerClass = new double[instancePerClass[i]][data.numAttributes()-1];
		for (int j = start; j < end; j++) {
			dataPerClass[counter++] = data.instance(j).toDoubleArray();
		}
		double[] mean = arithmeticMean(dataPerClass);
		double d = 0;
		for (int j = 0; j < instancePerClass[i]; j++) {
			double temp = euclideanDistance(mean, dataPerClass[j]);
			temp *= temp;
			temp -= (mean[0] - dataPerClass[j][0]) * (mean[0] - dataPerClass[j][0]);
			d += temp;
		}
		compactness[i] = d / instancePerClass[i];
		start = end;
	}
	
	QuickSort.sort(compactness, labels);
	
	for (int i = nbClass-1; i >=0 ; i--) {
		for (int j = 0; j < instancePerClass[labels[i]]; j++) {
			newData.add(data.instance(classIndex[labels[i]] + j));
		}
	}
	
	return newData;
}
 
开发者ID:ChangWeiTan,项目名称:FastWWSearch,代码行数:54,代码来源:Sampling.java

示例9: train

import weka.core.Instances; //导入方法依赖的package包/类
private void train() throws Exception{
	models = new M5P[ModelNum];
	for(int i=0;i<ModelNum;i++){
		models[i] = buildModel(labeledInstances, M[i]);
	}
	
	for(int i=0;i<this.comtIterations;i++){
		ArrayList<Instance>[] InstancePiSet = new ArrayList[ModelNum];
		for(int j=0;j<ModelNum;j++)
			InstancePiSet[j] = new ArrayList<Instance>();
		
		for(int m=0;m<ModelNum;m++){
			double maxDelta = 0;
			Instance maxDeltaXY = null;
			Enumeration<Instance> enu = this.unlabeledInstances.enumerateInstances();
			
			while(enu.hasMoreElements()){
				Instance ulIns = enu.nextElement();
				Instances omega = getSiblings(models[m], ulIns);
				double y = models[m].classifyInstance(ulIns);
				if(indexOfClass==-1)
					indexOfClass = labeledInstances.classIndex();
				ulIns.setValue(indexOfClass, y);
				
				Instances instancesPi = new Instances(models[m].getM5RootNode().zyqGetTrainingSet());
				instancesPi.add(ulIns);
				M5P modelPi = buildModel(instancesPi, M[m]);
				double delta = computeOmegaDelta(models[m],modelPi,omega);
				if(maxDelta<delta){
					maxDelta = delta;
					maxDeltaXY = ulIns;
				}
			}
			
			//now check facts about delta
			if(maxDelta>0){
				InstancePiSet[m].add(maxDeltaXY);
				this.unlabeledInstances.delete(this.unlabeledInstances.indexOf(maxDeltaXY));
			}
		}//check for both model
		
		boolean toExit = true;
		for(int m=0;m<ModelNum;m++){
			if(InstancePiSet[m].size()>0){
				toExit = false;
				break;
			}
		}
		
		if(toExit)
			break;
		else{
			//update the models
			int toGen = 0;
			for(int m=0;m<ModelNum;m++){
				Instances set = models[m].getM5RootNode().zyqGetTrainingSet();
				toGen += InstancePiSet[m].size();
				for(Instance ins : InstancePiSet[m])
					set.add(ins);
				
				models[m] = buildModel(set, M[m]);
			}
			
			//Replenish pool U' to size p
			Instances toAdd = retrieveMore(toGen);
			unlabeledInstances.addAll(toAdd);
		}//we will go to another round of iteration
	}//iterate for a number of rounds or break out on empty InstancesPiSets
	
	//now we have the model as y = 0.5*sum(models[m].predict(x))
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:72,代码来源:COMT2.java

示例10: getMultiDim

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:79,代码来源:LHSSampler.java

示例11: sampleMultiDimContinuous

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	boolean[] roundToInt = new boolean[atts.size()];
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
		//flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
		
		if(bounds[i][sampleSetSize]-bounds[i][0]>sampleSetSize)
			roundToInt[i]=true;
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
			if(roundToInt[j])
				vals[j] = (int)vals[j];
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:59,代码来源:LHSSampler.java

示例12: getMultiDimContinuousDiv

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:BestConfig,代码行数:61,代码来源:LHSInitializer.java

示例13: getMultiDim

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:79,代码来源:LHSInitializer.java

示例14: getMultiDimContinuousLog

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be log10(upper/lower)
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousLog(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	int step, crntStep;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		crntStep = (int)Math.log10(bounds[i][sampleSetSize] - bounds[i][0]);
		step = sampleSetSize/crntStep;//num of points drawn after the multiplication of 10
		int left = sampleSetSize%crntStep;
		if(bounds[i][0]==0)
			bounds[i][0]=uniRand.nextInt(10);
		crntStep = 1;
		double theBound = bounds[i][sampleSetSize]/10;
		for(int j=1;j<sampleSetSize;j++){
			if(crntStep>=step && bounds[i][j-1]<=theBound)
				crntStep=0;
			
			if(crntStep==0)
				bounds[i][j] = bounds[i][j-step] * 10;
			else if(crntStep<step)
				bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./((double)step+1.));
			else if(crntStep>=step)
				bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./(double)(left+step+1));
			
			if(bounds[i][j]>=bounds[i][sampleSetSize])
				System.err.println("be careful!!!!");
			crntStep++;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:BestConfig,代码行数:79,代码来源:LHSInitializer.java

示例15: getMultiDimContinuous

import weka.core.Instances; //导入方法依赖的package包/类
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be log10(upper/lower)
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	boolean[] roundToInt = new boolean[atts.size()];
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
		//flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
		
		if(bounds[i][sampleSetSize]-bounds[i][0]>sampleSetSize)
			roundToInt[i]=true;
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
			if(roundToInt[j])
				vals[j] = (int)vals[j];
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
开发者ID:zhuyuqing,项目名称:bestconf,代码行数:61,代码来源:LHSInitializer.java


注:本文中的weka.core.Instances.add方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。