本文整理汇总了Java中weka.filters.unsupervised.attribute.ReplaceMissingValues.setInputFormat方法的典型用法代码示例。如果您正苦于以下问题:Java ReplaceMissingValues.setInputFormat方法的具体用法?Java ReplaceMissingValues.setInputFormat怎么用?Java ReplaceMissingValues.setInputFormat使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.filters.unsupervised.attribute.ReplaceMissingValues
的用法示例。
在下文中一共展示了ReplaceMissingValues.setInputFormat方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: preProcessData
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
public static Instances preProcessData(Instances data) throws Exception{
/*
* Remove useless attributes
*/
RemoveUseless removeUseless = new RemoveUseless();
removeUseless.setOptions(new String[] { "-M", "99" }); // threshold
removeUseless.setInputFormat(data);
data = Filter.useFilter(data, removeUseless);
/*
* Remove useless attributes
*/
ReplaceMissingValues fixMissing = new ReplaceMissingValues();
fixMissing.setInputFormat(data);
data = Filter.useFilter(data, fixMissing);
/*
* Remove useless attributes
*/
Discretize discretizeNumeric = new Discretize();
discretizeNumeric.setOptions(new String[] {
"-O",
"-M", "-1.0",
"-B", "4", // no of bins
"-R", "first-last"}); //range of attributes
fixMissing.setInputFormat(data);
data = Filter.useFilter(data, fixMissing);
/*
* Select only informative attributes
*/
InfoGainAttributeEval eval = new InfoGainAttributeEval();
Ranker search = new Ranker();
search.setOptions(new String[] { "-T", "0.001" }); // information gain threshold
AttributeSelection attSelect = new AttributeSelection();
attSelect.setEvaluator(eval);
attSelect.setSearch(search);
// apply attribute selection
attSelect.SelectAttributes(data);
// remove the attributes not selected in the last run
data = attSelect.reduceDimensionality(data);
return data;
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:52,代码来源:KddCup.java
示例2: normalizeDataSet
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* ensure that all variables are nominal and that there are no missing values
*
* @param instances data set to check and quantize and/or fill in missing
* values
* @return filtered instances
* @throws Exception if a filter (Discretize, ReplaceMissingValues) fails
*/
protected Instances normalizeDataSet(Instances instances) throws Exception {
m_nNonDiscreteAttribute = -1;
Enumeration<Attribute> enu = instances.enumerateAttributes();
while (enu.hasMoreElements()) {
Attribute attribute = enu.nextElement();
if (attribute.type() != Attribute.NOMINAL) {
m_nNonDiscreteAttribute = attribute.index();
}
}
if ((m_nNonDiscreteAttribute > -1)
&& (instances.attribute(m_nNonDiscreteAttribute).type() != Attribute.NOMINAL)) {
m_DiscretizeFilter = new Discretize();
m_DiscretizeFilter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_DiscretizeFilter);
}
m_MissingValuesFilter = new ReplaceMissingValues();
m_MissingValuesFilter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_MissingValuesFilter);
return instances;
}
示例3: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
m_training = true;
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double[m_theInstances.numAttributes()];
m_maxValues = new double[m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = Double.MAX_VALUE;
m_maxValues[i] = -Double.MAX_VALUE;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances, 0);
m_training = false;
}
示例4: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Builds the logistic regression using LogitBoost.
*
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
// replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
// convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
// create actual logistic model
m_boostedModel =
new LogisticBase(m_numBoostingIterations, m_useCrossValidation,
m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
m_boostedModel.setNumDecimalPlaces(m_numDecimalPlaces);
// build logistic model
m_boostedModel.buildClassifier(data);
}
示例5: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer
* that are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been
* generated successfully
*/
public void buildClusterer (Instances data)
throws Exception {
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double [m_theInstances.numAttributes()];
m_maxValues = new double [m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = m_maxValues[i] = Double.NaN;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances,0);
}
示例6: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Builds the logistic regression using LogitBoost.
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
//replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
//convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
//create actual logistic model
m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
//build logistic model
m_boostedModel.buildClassifier(data);
}
示例7: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
m_training = true;
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double[m_theInstances.numAttributes()];
m_maxValues = new double[m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = m_maxValues[i] = Double.NaN;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances, 0);
m_training = false;
}
示例8: cleanUpData
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Cleans up data
*
* @param data data to be cleaned up
* @throws Exception if an error occurs
*/
private void cleanUpData(Instances data)throws Exception{
m_Data = data;
m_TransformFilter = new NominalToBinary();
m_TransformFilter.setInputFormat(m_Data);
m_Data = Filter.useFilter(m_Data, m_TransformFilter);
m_MissingFilter = new ReplaceMissingValues();
m_MissingFilter.setInputFormat(m_Data);
m_Data = Filter.useFilter(m_Data, m_MissingFilter);
m_Data.deleteWithMissingClass();
}
示例9: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
@Override
public void buildClusterer(Instances data) throws Exception {
m_t1 = m_userT1;
m_t2 = m_userT2;
if (data.numInstances() == 0 && m_userT2 < 0) {
System.err
.println("The heuristic for setting T2 based on std. dev. can't be used when "
+ "running in incremental mode. Using default of 1.0.");
m_t2 = 1.0;
}
m_canopyT2Density = new ArrayList<double[]>();
m_canopyCenters = new ArrayList<double[][]>();
m_canopyNumMissingForNumerics = new ArrayList<double[]>();
if (data.numInstances() > 0) {
if (!m_dontReplaceMissing) {
m_missingValuesReplacer = new ReplaceMissingValues();
m_missingValuesReplacer.setInputFormat(data);
data = Filter.useFilter(data, m_missingValuesReplacer);
}
Random r = new Random(getSeed());
for (int i = 0; i < 10; i++) {
r.nextInt();
}
data.randomize(r);
if (m_userT2 < 0) {
setT2T1BasedOnStdDev(data);
}
}
m_t1 = m_userT1 > 0 ? m_userT1 : -m_userT1 * m_t2;
// if (m_t1 < m_t2) {
// throw new Exception("T1 can't be less than T2. Computed T2 as " + m_t2
// + " T1 is requested to be " + m_t1);
// }
m_distanceFunction.setInstances(data);
m_canopies = new Instances(data, 0);
if (data.numInstances() > 0) {
m_trainingData = new Instances(data);
}
for (int i = 0; i < data.numInstances(); i++) {
if (getDebug() && i % m_periodicPruningRate == 0) {
System.err.println("Processed: " + i);
}
updateClusterer(data.instance(i));
}
updateFinished();
}
示例10: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
// can clusterer handle the data?
getCapabilities().testWithFail(data);
// long start = System.currentTimeMillis();
m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(data);
m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);
initMinMax(m_instances);
m_ClusterCentroids = new Instances(m_instances, m_NumClusters);
int n = m_instances.numInstances();
Random r = new Random(getSeed());
boolean[] selected = new boolean[n];
double[] minDistance = new double[n];
for (int i = 0; i < n; i++) {
minDistance[i] = Double.MAX_VALUE;
}
int firstI = r.nextInt(n);
m_ClusterCentroids.add(m_instances.instance(firstI));
selected[firstI] = true;
updateMinDistance(minDistance, selected, m_instances,
m_instances.instance(firstI));
if (m_NumClusters > n) {
m_NumClusters = n;
}
for (int i = 1; i < m_NumClusters; i++) {
int nextI = farthestAway(minDistance, selected);
m_ClusterCentroids.add(m_instances.instance(nextI));
selected[nextI] = true;
updateMinDistance(minDistance, selected, m_instances,
m_instances.instance(nextI));
}
m_instances = new Instances(m_instances, 0);
// long end = System.currentTimeMillis();
// System.out.println("Clustering Time = " + (end-start));
}
示例11: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates the classifier.
*
* @param data set of instances serving as training data
* @throws Exception if the classifier has not been generated successfully
*/
@Override
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
m_instances = new Instances(data);
m_replaceMissing = new ReplaceMissingValues();
m_replaceMissing.setInputFormat(m_instances);
m_instances = Filter.useFilter(m_instances, m_replaceMissing);
m_nominalToBinary = new NominalToBinary();
m_nominalToBinary.setInputFormat(m_instances);
m_instances = Filter.useFilter(m_instances, m_nominalToBinary);
m_removeUseless = new RemoveUseless();
m_removeUseless.setInputFormat(m_instances);
m_instances = Filter.useFilter(m_instances, m_removeUseless);
m_instances.randomize(new Random(1));
m_ruleSet = new ArrayList<Rule>();
Rule tempRule;
if (m_generateRules) {
Instances tempInst = m_instances;
do {
tempRule = new Rule();
tempRule.setSmoothing(!m_unsmoothedPredictions);
tempRule.setRegressionTree(m_regressionTree);
tempRule.setUnpruned(m_useUnpruned);
tempRule.setSaveInstances(false);
tempRule.setMinNumInstances(m_minNumInstances);
tempRule.buildClassifier(tempInst);
m_ruleSet.add(tempRule);
// System.err.println("Built rule : "+tempRule.toString());
tempInst = tempRule.notCoveredInstances();
tempRule.freeNotCoveredInstances();
} while (tempInst.numInstances() > 0);
} else {
// just build a single tree
tempRule = new Rule();
tempRule.setUseTree(true);
// tempRule.setGrowFullTree(true);
tempRule.setSmoothing(!m_unsmoothedPredictions);
tempRule.setSaveInstances(m_saveInstances);
tempRule.setRegressionTree(m_regressionTree);
tempRule.setUnpruned(m_useUnpruned);
tempRule.setMinNumInstances(m_minNumInstances);
Instances temp_train;
temp_train = m_instances;
tempRule.buildClassifier(temp_train);
m_ruleSet.add(tempRule);
// System.err.print(tempRule.m_topOfTree.treeToString(0));
}
// save space
m_instances = new Instances(m_instances, 0);
}
示例12: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Builds the ensemble of perceptrons.
*
* @param insts the data to train the classifier with
* @throws Exception if something goes wrong during building
*/
public void buildClassifier(Instances insts) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(insts);
// remove instances with missing class
insts = new Instances(insts);
insts.deleteWithMissingClass();
// Filter data
m_Train = new Instances(insts);
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(m_Train);
m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(m_Train);
m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
/** Randomize training data */
m_Train.randomize(new Random(m_Seed));
/** Make space to store perceptrons */
m_Additions = new int[m_MaxK + 1];
m_IsAddition = new boolean[m_MaxK + 1];
m_Weights = new int[m_MaxK + 1];
/** Compute perceptrons */
m_K = 0;
out:
for (int it = 0; it < m_NumIterations; it++) {
for (int i = 0; i < m_Train.numInstances(); i++) {
Instance inst = m_Train.instance(i);
if (!inst.classIsMissing()) {
int prediction = makePrediction(m_K, inst);
int classValue = (int) inst.classValue();
if (prediction == classValue) {
m_Weights[m_K]++;
} else {
m_IsAddition[m_K] = (classValue == 1);
m_Additions[m_K] = i;
m_K++;
m_Weights[m_K]++;
}
if (m_K == m_MaxK) {
break out;
}
}
}
}
}
示例13: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Method for building the classifier.
*
* @param data the set of training instances.
* @throws Exception if the classifier can't be built successfully.
*/
@Override
public void buildClassifier(Instances data) throws Exception {
reset();
// can classifier handle the data?
getCapabilities().testWithFail(data);
data = new Instances(data);
data.deleteWithMissingClass();
if (data.numInstances() > 0 && !m_dontReplaceMissing) {
m_replaceMissing = new ReplaceMissingValues();
m_replaceMissing.setInputFormat(data);
data = Filter.useFilter(data, m_replaceMissing);
}
// check for only numeric attributes
boolean onlyNumeric = true;
for (int i = 0; i < data.numAttributes(); i++) {
if (i != data.classIndex()) {
if (!data.attribute(i).isNumeric()) {
onlyNumeric = false;
break;
}
}
}
if (!onlyNumeric) {
if (data.numInstances() > 0) {
m_nominalToBinary = new weka.filters.supervised.attribute.NominalToBinary();
} else {
m_nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary();
}
m_nominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_nominalToBinary);
}
if (!m_dontNormalize && data.numInstances() > 0) {
m_normalize = new Normalize();
m_normalize.setInputFormat(data);
data = Filter.useFilter(data, m_normalize);
}
m_numInstances = data.numInstances();
m_weights = new double[data.numAttributes() + 1];
m_data = new Instances(data, 0);
if (data.numInstances() > 0) {
data.randomize(new Random(getSeed())); // randomize the data
train(data);
}
}
示例14: YATSIInstances
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* initializes the object
* @param parent the parent algorithm
* @param train the train instances
* @param test the test instances
* @param setWeights whether to set the weights for the training set
* (the processed instances)
* @throws Exception if something goes wrong
*/
public YATSIInstances(YATSI parent, Instances train, Instances test,
boolean setWeights)
throws Exception {
super();
m_Parent = parent;
// build sorted array (train + test)
double weight;
if (getParent().getNoWeights())
weight = 1.0;
else
weight = (double) train.numInstances()
/ (double) test.numInstances()
* getParent().getWeightingFactor();
m_Unprocessed = new Instance[train.numInstances() + test.numInstances()];
for (int i = 0; i < train.numInstances(); i++)
m_Unprocessed[i] = train.instance(i);
for (int i = 0; i < test.numInstances(); i++) {
m_Unprocessed[train.numInstances() + i] = test.instance(i);
m_Unprocessed[train.numInstances() + i].setWeight(weight);
}
Arrays.sort(m_Unprocessed, m_Comparator);
// weights
m_Weights = new double[m_Unprocessed.length];
for (int i = 0; i < m_Unprocessed.length; i++) {
m_Weights[i] = m_Unprocessed[i].weight();
if (!setWeights)
m_Unprocessed[i].setWeight(1);
}
// filter data
m_Trainset = new Instances(train, 0);
for (int i = 0; i < m_Unprocessed.length; i++)
m_Trainset.add(m_Unprocessed[i]);
// set up filter
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(m_Trainset);
m_Trainset = Filter.useFilter(m_Trainset, m_Missing);
}
示例15: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入方法依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer
* that are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been
* generated successfully
*/
public void buildClusterer(Instances data) throws Exception {
// can clusterer handle the data?
getCapabilities().testWithFail(data);
//long start = System.currentTimeMillis();
m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(data);
m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);
initMinMax(m_instances);
m_ClusterCentroids = new Instances(m_instances, m_NumClusters);
int n = m_instances.numInstances();
Random r = new Random(getSeed());
boolean[] selected = new boolean[n];
double[] minDistance = new double[n];
for(int i = 0; i<n; i++) minDistance[i] = Double.MAX_VALUE;
int firstI = r.nextInt(n);
m_ClusterCentroids.add(m_instances.instance(firstI));
selected[firstI] = true;
updateMinDistance(minDistance,selected,m_instances,m_instances.instance(firstI));
if (m_NumClusters > n) m_NumClusters = n;
for(int i = 1; i < m_NumClusters; i++) {
int nextI = farthestAway(minDistance, selected);
m_ClusterCentroids.add(m_instances.instance(nextI));
selected[nextI] = true;
updateMinDistance(minDistance,selected,m_instances,m_instances.instance(nextI));
}
m_instances = new Instances(m_instances,0);
//long end = System.currentTimeMillis();
//System.out.println("Clustering Time = " + (end-start));
}