本文整理汇总了Java中weka.core.Instances.add方法的典型用法代码示例。如果您正苦于以下问题:Java Instances.add方法的具体用法?Java Instances.add怎么用?Java Instances.add使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.core.Instances
的用法示例。
在下文中一共展示了Instances.add方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: relationToInstances
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Create an Instances object from the tuples provided. The Instances has
* name `name` and every value from every tuple. The TupleDesc is provided
* separately just to validate that all of the provided Tuples share this
* TupleDesc.
* @param name the name of the resulting Instances object
* @param ts list of Tuples
* @param td TupleDesc
* @param fields indices identifying which fields should be included in the new Instances object.
* @return new Instances object containing the values from all the tuples.
*/
public static Instances relationToInstances(String name, List<Tuple> ts, TupleDesc td,
List<Integer> fields){
ArrayList<Attribute> attrs = tupleDescToAttributeList(td, fields);
int relationSize = ts.size();
Instances instances = new Instances(name, attrs, relationSize);
for (int i=0; i<ts.size(); i++){
Tuple t = ts.get(i);
if (!t.getTupleDesc().equals(td)){
throw new RuntimeException("All TupleDescs must match.");
}
instances.add(i, tupleToInstance(t, attrs, fields));
}
return instances;
}
示例2: testCOMT2
import weka.core.Instances; //导入方法依赖的package包/类
public static void testCOMT2() throws Exception{
BestConf bestconf = new BestConf();
Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
trainingSet.setClassIndex(trainingSet.numAttributes()-1);
Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
samplePoints.setClassIndex(samplePoints.numAttributes()-1);
COMT2 comt = new COMT2(samplePoints, COMT2Iteration);
comt.buildClassifier(trainingSet);
Evaluation eval = new Evaluation(trainingSet);
eval.evaluateModel(comt, trainingSet);
System.err.println(eval.toSummaryString());
Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
Instances bestInstances = new Instances(trainingSet,2);
bestInstances.add(best);
DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);
//now we output the training set with the class value updated as the predicted value
Instances output = new Instances(trainingSet, trainingSet.numInstances());
Enumeration<Instance> enu = trainingSet.enumerateInstances();
while(enu.hasMoreElements()){
Instance ins = enu.nextElement();
double[] values = ins.toDoubleArray();
values[values.length-1] = comt.classifyInstance(ins);
output.add(ins.copy(values));
}
DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}
示例3: retrieveMore
import weka.core.Instances; //导入方法依赖的package包/类
private Instances retrieveMore(int toGen){
Instances retval = new Instances(this.unlabeldPool, toGen);
for(int i=0;i<toGen;i++){
retval.add(this.unlabeldPool.remove(rand.nextInt(this.unlabeldPool.size())));
}
return retval;
}
示例4: processCollection
import weka.core.Instances; //导入方法依赖的package包/类
@Override
public void processCollection() {
File loc = new File(this.parent.getTargetLocation());
String cluster = loc.getName();
// prepare concepts
concepts = this.parent.getComponent(ConceptExtractor.class).getConcepts();
ConceptRanker.rankConcepts(concepts, RankingStrategy.CF, false, this.parent, "");
ConceptMerger.mergeConcepts(concepts, conceptMapping, MergeStrategy.STEM_SW);
Map<Concept, Set<Concept>> groupLookup = ClassifierUtils.buildConceptGroupingLookup(concepts, conceptMapping);
this.parent.log(this, "concepts: " + this.concepts.size());
ConceptDict cd = parent.getComponent(ConceptDict.class);
TextRankScorer tr = parent.getComponent(TextRankScorer.class);
tr.compute();
// load gold data
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".cmap");
}
};
File goldFile = new File(documentLocation.listFiles(filter)[0].getPath());
ConceptMap mapGold = ConceptMapReader.readFromFile(goldFile, Format.TSV);
// create data
this.parent.log(this, "computing features");
URL sw = getClass().getResource("lists/stopwords_en_eval.txt");
StemSWMatch match = new StemSWMatch(sw);
int matched = 0;
String topicFile = documentLocation.getParent() + "/topics.tsv";
String clusterSizeFile = documentLocation.getParent() + "/cluster_size.txt";
ClassifierUtils util = new ClassifierUtils(clusterSizeFile, topicFile);
Instances data = util.createEmptyDataset("ConceptSelectionTrain");
for (Concept c : concepts) {
// label
boolean isGold = false;
for (Concept cg : mapGold.getConcepts()) {
if (match.isMatch(cg.name, c.name)) {
isGold = true;
matched++;
break;
}
}
Instance instance = util.createInstance(c, isGold, cd, tr, cluster, groupLookup);
data.add(instance);
}
try {
BufferedWriter writer = new BufferedWriter(
new FileWriter(parent.getTargetLocation() + "/" + parent.getName() + ".arff"));
writer.write(data.toString());
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
this.parent.log(this,
"arff-file created: " + data.numInstances() + ", " + data.numAttributes() + ", " + data.numClasses());
this.parent.log(this, "positive instances: " + matched);
}
示例5: getMultiDimContinuousDiv
import weka.core.Instances; //导入方法依赖的package包/类
/**
* At current version, we assume all attributes are numeric attributes with bounds
*
* Let PACE be upper-lower DIVided by the sampleSetSize
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
double pace;
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
bounds[i][0] = crntAttr.getLowerNumericBound();
bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
for(int j=1;j<sampleSetSize;j++){
bounds[i][j] = bounds[i][j-1] + pace;
}
}
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例6: updateModels
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Train a model and create the feature weight.
* This implementation will take each highlight span feedback as a "normal"
* feedback. Hence, we will merge the instanceDS and the feedbackDS into one
* training set for the new model.
*
* @param sessionID
* @param userID
* @param varID
* @throws Exception
*/
public void updateModels(String sessionID, String userID, String varID,
SVMPredictor model) throws Exception {
// if the model exists, do nothing
String fn_model = getModelFileName(sessionID, userID, varID);
// HashMap<String, Double> predictorFeatureWeightMap;
if(! Util.fileExists(fn_model)) {
// attrNameToIndexMap.put(varID, attrNameToIndexMap.size());
// merge the 2 instance sets
String fn_instanceDS = getInstanceDSFileName(sessionID, userID, varID);
String fn_feedbackDS = getFeedbackDSFileName(sessionID, userID, varID);
if(! Util.fileExists(fn_instanceDS)) {
throw new UnsupportedOperationException("Training data set does not exist. "
+ "Create the (instance) variable value data set for " +
fn_model + " before re-train it");
}
if(! Util.fileExists(fn_feedbackDS)) {
throw new UnsupportedOperationException("Training data set does not exist. "
+ "Create the (feedback) highlight span data set for " +
fn_model + " before re-train it");
}
Instances instanceDS = loadInstancesFromFile(fn_instanceDS);
Instances feedbackDS = loadInstancesFromFile(fn_feedbackDS);
for(int i = 0; i < feedbackDS.numInstances(); i++) {
Instance feedbackInstance = feedbackDS.instance(i);
instanceDS.add(feedbackInstance);
}
// train the model
model.train((Object)instanceDS);
// save model
model.saveModel(fn_model);
// predictors.add(model);
// save feature weight + keyword weight
String fn_featureWeight = getFeatureWeightFileName(sessionID, userID, varID);
String[] globalFeatureName = Util.loadList(fn_globalFeatureName);
model.saveFeatureWeights(globalFeatureName, fn_featureWeight);
// // create a hash map for this variable's feature weight
// predictorFeatureWeightMap = new HashMap<>();
// List<Map.Entry<String, Double>> predictorsSortedTermWeightList = new ArrayList<>();
// for(int i = 0; i < globalFeatureName.length; i++) {
// predictorFeatureWeightMap.put(globalFeatureName[i], featureWeights[i]);
// predictorsSortedTermWeightList.add(
// new AbstractMap.SimpleEntry<>(globalFeatureName[i],
// featureWeights[i]));
// }
// predictorsFeatureWeightMap.add(predictorFeatureWeightMap);
// predictorsSortedTermWeightMap.add(predictorsSortedTermWeightList);
// create tuple for keyword weight list
// String fn_keywordWeight = getKeywordFeatureWeightFileName(featureWeightFolder,
// varID, sessionID, userID);
// String[][] keywordWeightTable = Util.loadTable(fn_keywordWeight);
// List<Map.Entry<String, Double>> predictorsKeywordWeightList = new ArrayList<>();
// for(int i = 0; i < keywordWeightTable.length; i++) {
// predictorsKeywordWeightList.add(
// new AbstractMap.SimpleEntry<>(keywordWeightTable[i][0],
// Double.parseDouble(keywordWeightTable[i][1])));
// }
// predictorsKeywordWeightMap.add(predictorsKeywordWeightList);
}
}
示例7: transformToWeka
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Transform features into Weka format
*
* @param features
* Holds all features including a label, if training data is
* created.
* @param datasetName
* Holds the data set´s name
* @return returns the created data set
*/
public Instances transformToWeka(FeatureVectorDataSet features, String datasetName) {
Instances dataset = defineDataset(features, datasetName);
// Loop through all features
for (Iterator<Record> iterator = features.get().iterator(); iterator.hasNext();) {
Record record = iterator.next();
// calculate feature number
Collection<Attribute> attributes = features.getSchema().get();
int featureNum = attributes.size();
double[] values = new double[featureNum];
int index = 0;
for (Iterator<Attribute> attrIterator = attributes.iterator(); attrIterator.hasNext();) {
Attribute attr = attrIterator.next();
// get features
if (!attr.equals(FeatureVectorDataSet.ATTRIBUTE_LABEL)) {
String feature = record.getValue(attr);
// convert to double if applicable
if (feature != null) {
double featureValue = Double.parseDouble(feature);
values[index] = featureValue;
} else {
values[index] = 0;
}
index++;
}
}
Instance inst = new DenseInstance(1.0, values);
// Treat the label as a special case, which is always at the last
// position of the dataset.
if (datasetName.equals(this.trainingSet)) {
String labelRecord = record.getValue(FeatureVectorDataSet.ATTRIBUTE_LABEL);
values[index] = dataset.attribute(index).indexOfValue(labelRecord);
}
dataset.add(inst);
}
return dataset;
}
示例8: orderByCompactClass
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Reorder the data by compactness of each class using Euclidean distance
* @param data
* @return
*/
public static Instances orderByCompactClass(Instances data) {
Instances newData = new Instances(data, data.numInstances());
// get the number of class in the data
int nbClass = data.numClasses();
int[] instancePerClass = new int[nbClass];
int[] labels = new int[nbClass];
int[] classIndex = new int[nbClass];
double[] compactness = new double[nbClass];
// sort the data base on its class
data.sort(data.classAttribute());
int start = 0;
// get the number of instances per class in the data
for (int i = 0; i < nbClass; i++) {
instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i];
labels[i] = i;
if (i > 0)
classIndex[i] = classIndex[i-1] + instancePerClass[i-1];
int end = start + instancePerClass[i];
int counter = 0;
double[][] dataPerClass = new double[instancePerClass[i]][data.numAttributes()-1];
for (int j = start; j < end; j++) {
dataPerClass[counter++] = data.instance(j).toDoubleArray();
}
double[] mean = arithmeticMean(dataPerClass);
double d = 0;
for (int j = 0; j < instancePerClass[i]; j++) {
double temp = euclideanDistance(mean, dataPerClass[j]);
temp *= temp;
temp -= (mean[0] - dataPerClass[j][0]) * (mean[0] - dataPerClass[j][0]);
d += temp;
}
compactness[i] = d / instancePerClass[i];
start = end;
}
QuickSort.sort(compactness, labels);
for (int i = nbClass-1; i >=0 ; i--) {
for (int j = 0; j < instancePerClass[labels[i]]; j++) {
newData.add(data.instance(classIndex[labels[i]] + j));
}
}
return newData;
}
示例9: train
import weka.core.Instances; //导入方法依赖的package包/类
private void train() throws Exception{
models = new M5P[ModelNum];
for(int i=0;i<ModelNum;i++){
models[i] = buildModel(labeledInstances, M[i]);
}
for(int i=0;i<this.comtIterations;i++){
ArrayList<Instance>[] InstancePiSet = new ArrayList[ModelNum];
for(int j=0;j<ModelNum;j++)
InstancePiSet[j] = new ArrayList<Instance>();
for(int m=0;m<ModelNum;m++){
double maxDelta = 0;
Instance maxDeltaXY = null;
Enumeration<Instance> enu = this.unlabeledInstances.enumerateInstances();
while(enu.hasMoreElements()){
Instance ulIns = enu.nextElement();
Instances omega = getSiblings(models[m], ulIns);
double y = models[m].classifyInstance(ulIns);
if(indexOfClass==-1)
indexOfClass = labeledInstances.classIndex();
ulIns.setValue(indexOfClass, y);
Instances instancesPi = new Instances(models[m].getM5RootNode().zyqGetTrainingSet());
instancesPi.add(ulIns);
M5P modelPi = buildModel(instancesPi, M[m]);
double delta = computeOmegaDelta(models[m],modelPi,omega);
if(maxDelta<delta){
maxDelta = delta;
maxDeltaXY = ulIns;
}
}
//now check facts about delta
if(maxDelta>0){
InstancePiSet[m].add(maxDeltaXY);
this.unlabeledInstances.delete(this.unlabeledInstances.indexOf(maxDeltaXY));
}
}//check for both model
boolean toExit = true;
for(int m=0;m<ModelNum;m++){
if(InstancePiSet[m].size()>0){
toExit = false;
break;
}
}
if(toExit)
break;
else{
//update the models
int toGen = 0;
for(int m=0;m<ModelNum;m++){
Instances set = models[m].getM5RootNode().zyqGetTrainingSet();
toGen += InstancePiSet[m].size();
for(Instance ins : InstancePiSet[m])
set.add(ins);
models[m] = buildModel(set, M[m]);
}
//Replenish pool U' to size p
Instances toAdd = retrieveMore(toGen);
unlabeledInstances.addAll(toAdd);
}//we will go to another round of iteration
}//iterate for a number of rounds or break out on empty InstancesPiSets
//now we have the model as y = 0.5*sum(models[m].predict(x))
}
示例10: getMultiDim
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
double pace;
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
if(crntAttr.isNumeric()){
bounds[i][0] = crntAttr.getLowerNumericBound();
bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
for(int j=1;j<sampleSetSize;j++){
bounds[i][j] = bounds[i][j-1] + pace;
}
}else{//crntAttr.isNominal()
if(crntAttr.numValues()>=sampleSetSize){
//randomly select among the set
for(int j=0;j<=sampleSetSize;j++)
bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
}else{
//first round-robin
int lastPart = sampleSetSize%crntAttr.numValues();
for(int j=0;j<sampleSetSize-lastPart;j++)
bounds[i][j] = j%crntAttr.numValues();
//then randomly select
for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
}
}//nominal attribute
}//get all subdomains
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
if(atts.get(j).isNumeric()){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
}else{//isNominal()
vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
}
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例11: sampleMultiDimContinuous
import weka.core.Instances; //导入方法依赖的package包/类
/**
* At current version, we assume all attributes are numeric attributes with bounds
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
boolean[] roundToInt = new boolean[atts.size()];
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
//flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
if(bounds[i][sampleSetSize]-bounds[i][0]>sampleSetSize)
roundToInt[i]=true;
}
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
if(roundToInt[j])
vals[j] = (int)vals[j];
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例12: getMultiDimContinuousDiv
import weka.core.Instances; //导入方法依赖的package包/类
/**
* At current version, we assume all attributes are numeric attributes with bounds
*
* Let PACE be upper-lower DIVided by the sampleSetSize
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
double pace;
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
bounds[i][0] = crntAttr.getLowerNumericBound();
bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
for(int j=1;j<sampleSetSize;j++){
bounds[i][j] = bounds[i][j-1] + pace;
}
}
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例13: getMultiDim
import weka.core.Instances; //导入方法依赖的package包/类
/**
* Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
double pace;
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
if(crntAttr.isNumeric()){
bounds[i][0] = crntAttr.getLowerNumericBound();
bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
for(int j=1;j<sampleSetSize;j++){
bounds[i][j] = bounds[i][j-1] + pace;
}
}else{//crntAttr.isNominal()
if(crntAttr.numValues()>=sampleSetSize){
//randomly select among the set
for(int j=0;j<=sampleSetSize;j++)
bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
}else{
//first round-robin
int lastPart = sampleSetSize%crntAttr.numValues();
for(int j=0;j<sampleSetSize-lastPart;j++)
bounds[i][j] = j%crntAttr.numValues();
//then randomly select
for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
}
}//nominal attribute
}//get all subdomains
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
if(atts.get(j).isNumeric()){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
}else{//isNominal()
vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
}
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例14: getMultiDimContinuousLog
import weka.core.Instances; //导入方法依赖的package包/类
/**
* At current version, we assume all attributes are numeric attributes with bounds
*
* Let PACE be log10(upper/lower)
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
public static Instances getMultiDimContinuousLog(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
int step, crntStep;
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
bounds[i][0] = crntAttr.getLowerNumericBound();
bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
crntStep = (int)Math.log10(bounds[i][sampleSetSize] - bounds[i][0]);
step = sampleSetSize/crntStep;//num of points drawn after the multiplication of 10
int left = sampleSetSize%crntStep;
if(bounds[i][0]==0)
bounds[i][0]=uniRand.nextInt(10);
crntStep = 1;
double theBound = bounds[i][sampleSetSize]/10;
for(int j=1;j<sampleSetSize;j++){
if(crntStep>=step && bounds[i][j-1]<=theBound)
crntStep=0;
if(crntStep==0)
bounds[i][j] = bounds[i][j-step] * 10;
else if(crntStep<step)
bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./((double)step+1.));
else if(crntStep>=step)
bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./(double)(left+step+1));
if(bounds[i][j]>=bounds[i][sampleSetSize])
System.err.println("be careful!!!!");
crntStep++;
}
}
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}
示例15: getMultiDimContinuous
import weka.core.Instances; //导入方法依赖的package包/类
/**
* At current version, we assume all attributes are numeric attributes with bounds
*
* Let PACE be log10(upper/lower)
*
* @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
*/
public static Instances getMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
ArrayList<Integer>[] setWithMaxMinDist=null;
//generate L sets of sampleSetSize points
for(int i=0; i<L; i++){
ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
//compute the minimum distance minDist between any sample pair for each set
crntMinDist = minDistForSet(setPerm);
//select the set with the maximum minDist
if(crntMinDist>maxMinDist){
setWithMaxMinDist = setPerm;
maxMinDist = crntMinDist;
}
}
//generate and output the set with the maximum minDist as the result
//first, divide the domain of each attribute into sampleSetSize equal subdomain
double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
Iterator<Attribute> itr = atts.iterator();
Attribute crntAttr;
boolean[] roundToInt = new boolean[atts.size()];
for(int i=0;i<bounds.length;i++){
crntAttr = itr.next();
uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
//flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
if(bounds[i][sampleSetSize]-bounds[i][0]>sampleSetSize)
roundToInt[i]=true;
}
//second, generate the set according to setWithMaxMinDist
Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
for(int i=0;i<sampleSetSize;i++){
double[] vals = new double[atts.size()];
for(int j=0;j<vals.length;j++){
vals[j] = useMid?
(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
bounds[j][setWithMaxMinDist[j].get(i)]+
(
(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
);
if(roundToInt[j])
vals[j] = (int)vals[j];
}
data.add(new DenseInstance(1.0, vals));
}
//third, return the generated points
return data;
}