本文整理汇总了Java中weka.filters.unsupervised.attribute.ReplaceMissingValues类的典型用法代码示例。如果您正苦于以下问题:Java ReplaceMissingValues类的具体用法?Java ReplaceMissingValues怎么用?Java ReplaceMissingValues使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ReplaceMissingValues类属于weka.filters.unsupervised.attribute包,在下文中一共展示了ReplaceMissingValues类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: preProcessData
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
*
* @param data the data to transform
* @param shouldImpute impute the data?
* @param shouldStandardize standardize the numeric attributes?
* @param shouldBinarize binarize the attributes?
* @return the transformed data
* @throws Exception
*/
public static Instances preProcessData(Instances data, boolean shouldImpute,
boolean shouldStandardize, boolean shouldBinarize) throws Exception {
if( shouldImpute ) {
Filter impute = new ReplaceMissingValues();
impute.setInputFormat(data);
data = Filter.useFilter(data, impute);
}
if( shouldStandardize ) {
Filter standardize = new Standardize();
standardize.setInputFormat(data);
data = Filter.useFilter(data, standardize);
}
if( shouldBinarize ) {
Filter binarize = new NominalToBinary();
binarize.setInputFormat(data);
// make resulting binary attrs nominal, not numeric
binarize.setOptions(new String[] { "-N" } );
data = Filter.useFilter(data, binarize);
}
return data;
}
示例2: preProcessData
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
public static Instances preProcessData(Instances data) throws Exception{
/*
* Remove useless attributes
*/
RemoveUseless removeUseless = new RemoveUseless();
removeUseless.setOptions(new String[] { "-M", "99" }); // threshold
removeUseless.setInputFormat(data);
data = Filter.useFilter(data, removeUseless);
/*
* Remove useless attributes
*/
ReplaceMissingValues fixMissing = new ReplaceMissingValues();
fixMissing.setInputFormat(data);
data = Filter.useFilter(data, fixMissing);
/*
* Remove useless attributes
*/
Discretize discretizeNumeric = new Discretize();
discretizeNumeric.setOptions(new String[] {
"-O",
"-M", "-1.0",
"-B", "4", // no of bins
"-R", "first-last"}); //range of attributes
fixMissing.setInputFormat(data);
data = Filter.useFilter(data, fixMissing);
/*
* Select only informative attributes
*/
InfoGainAttributeEval eval = new InfoGainAttributeEval();
Ranker search = new Ranker();
search.setOptions(new String[] { "-T", "0.001" }); // information gain threshold
AttributeSelection attSelect = new AttributeSelection();
attSelect.setEvaluator(eval);
attSelect.setSearch(search);
// apply attribute selection
attSelect.SelectAttributes(data);
// remove the attributes not selected in the last run
data = attSelect.reduceDimensionality(data);
return data;
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:52,代码来源:KddCup.java
示例3: normalizeDataSet
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* ensure that all variables are nominal and that there are no missing values
*
* @param instances data set to check and quantize and/or fill in missing
* values
* @return filtered instances
* @throws Exception if a filter (Discretize, ReplaceMissingValues) fails
*/
protected Instances normalizeDataSet(Instances instances) throws Exception {
m_nNonDiscreteAttribute = -1;
Enumeration<Attribute> enu = instances.enumerateAttributes();
while (enu.hasMoreElements()) {
Attribute attribute = enu.nextElement();
if (attribute.type() != Attribute.NOMINAL) {
m_nNonDiscreteAttribute = attribute.index();
}
}
if ((m_nNonDiscreteAttribute > -1)
&& (instances.attribute(m_nNonDiscreteAttribute).type() != Attribute.NOMINAL)) {
m_DiscretizeFilter = new Discretize();
m_DiscretizeFilter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_DiscretizeFilter);
}
m_MissingValuesFilter = new ReplaceMissingValues();
m_MissingValuesFilter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_MissingValuesFilter);
return instances;
}
示例4: initializeMembers
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* performs initialization of members
*/
@Override
protected void initializeMembers() {
super.initializeMembers();
m_KNNdetermined = -1;
m_NeighborsTestset = null;
m_TrainsetNew = null;
m_TestsetNew = null;
m_UseNaiveSearch = false;
m_LabeledTestset = null;
m_Missing = new ReplaceMissingValues();
m_Classifier = new IBk();
m_Classifier.setKNN(10);
m_Classifier.setCrossValidate(true);
m_Classifier.setWindowSize(0);
m_Classifier.setMeanSquared(false);
m_KNN = m_Classifier.getKNN();
m_AdditionalMeasures.add("measureDeterminedKNN");
}
示例5: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
m_training = true;
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double[m_theInstances.numAttributes()];
m_maxValues = new double[m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = Double.MAX_VALUE;
m_maxValues[i] = -Double.MAX_VALUE;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances, 0);
m_training = false;
}
示例6: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Builds the logistic regression using LogitBoost.
*
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
// replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
// convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
// create actual logistic model
m_boostedModel =
new LogisticBase(m_numBoostingIterations, m_useCrossValidation,
m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
m_boostedModel.setNumDecimalPlaces(m_numDecimalPlaces);
// build logistic model
m_boostedModel.buildClassifier(data);
}
示例7: initializeMembers
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* performs initialization of members
*/
@Override
protected void initializeMembers() {
super.initializeMembers();
m_Filter = new ReplaceMissingValues();
m_Classifier = new YATSI();
m_TrainsetNew = null;
m_TestsetNew = null;
}
开发者ID:fracpete,项目名称:collective-classification-weka-package,代码行数:13,代码来源:FilteredCollectiveClassifier.java
示例8: PLSFilter
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* default constructor
*/
public PLSFilter() {
super();
// setup pre-processing
m_Missing = new ReplaceMissingValues();
m_Filter = new Center();
}
示例9: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer
* that are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been
* generated successfully
*/
public void buildClusterer (Instances data)
throws Exception {
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double [m_theInstances.numAttributes()];
m_maxValues = new double [m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = m_maxValues[i] = Double.NaN;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances,0);
}
示例10: buildClassifier
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Builds the logistic regression using LogitBoost.
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
//replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
//convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
//create actual logistic model
m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
//build logistic model
m_boostedModel.buildClassifier(data);
}
示例11: getConfiguredFilterVariant
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/** Creates a configured MultiFilter (variant) */
public Filter getConfiguredFilterVariant() {
MultiFilter result = new MultiFilter();
Filter[] filters = new Filter[2];
filters[0] = new ReplaceMissingValues();
filters[1] = new Center();
result.setFilters(filters);
return result;
}
示例12: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
m_training = true;
// can clusterer handle the data?
getCapabilities().testWithFail(data);
m_replaceMissing = new ReplaceMissingValues();
Instances instances = new Instances(data);
instances.setClassIndex(-1);
m_replaceMissing.setInputFormat(instances);
data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
instances = null;
m_theInstances = data;
// calculate min and max values for attributes
m_minValues = new double[m_theInstances.numAttributes()];
m_maxValues = new double[m_theInstances.numAttributes()];
for (int i = 0; i < m_theInstances.numAttributes(); i++) {
m_minValues[i] = m_maxValues[i] = Double.NaN;
}
for (int i = 0; i < m_theInstances.numInstances(); i++) {
updateMinMax(m_theInstances.instance(i));
}
doEM();
// save memory
m_theInstances = new Instances(m_theInstances, 0);
m_training = false;
}
示例13: cleanUpData
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Cleans up data
*
* @param data data to be cleaned up
* @throws Exception if an error occurs
*/
private void cleanUpData(Instances data)throws Exception{
m_Data = data;
m_TransformFilter = new NominalToBinary();
m_TransformFilter.setInputFormat(m_Data);
m_Data = Filter.useFilter(m_Data, m_TransformFilter);
m_MissingFilter = new ReplaceMissingValues();
m_MissingFilter.setInputFormat(m_Data);
m_Data = Filter.useFilter(m_Data, m_MissingFilter);
m_Data.deleteWithMissingClass();
}
示例14: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
@Override
public void buildClusterer(Instances data) throws Exception {
m_t1 = m_userT1;
m_t2 = m_userT2;
if (data.numInstances() == 0 && m_userT2 < 0) {
System.err
.println("The heuristic for setting T2 based on std. dev. can't be used when "
+ "running in incremental mode. Using default of 1.0.");
m_t2 = 1.0;
}
m_canopyT2Density = new ArrayList<double[]>();
m_canopyCenters = new ArrayList<double[][]>();
m_canopyNumMissingForNumerics = new ArrayList<double[]>();
if (data.numInstances() > 0) {
if (!m_dontReplaceMissing) {
m_missingValuesReplacer = new ReplaceMissingValues();
m_missingValuesReplacer.setInputFormat(data);
data = Filter.useFilter(data, m_missingValuesReplacer);
}
Random r = new Random(getSeed());
for (int i = 0; i < 10; i++) {
r.nextInt();
}
data.randomize(r);
if (m_userT2 < 0) {
setT2T1BasedOnStdDev(data);
}
}
m_t1 = m_userT1 > 0 ? m_userT1 : -m_userT1 * m_t2;
// if (m_t1 < m_t2) {
// throw new Exception("T1 can't be less than T2. Computed T2 as " + m_t2
// + " T1 is requested to be " + m_t1);
// }
m_distanceFunction.setInstances(data);
m_canopies = new Instances(data, 0);
if (data.numInstances() > 0) {
m_trainingData = new Instances(data);
}
for (int i = 0; i < data.numInstances(); i++) {
if (getDebug() && i % m_periodicPruningRate == 0) {
System.err.println("Processed: " + i);
}
updateClusterer(data.instance(i));
}
updateFinished();
}
示例15: buildClusterer
import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
* Generates a clusterer. Has to initialize all fields of the clusterer that
* are not being set via options.
*
* @param data set of instances serving as training data
* @throws Exception if the clusterer has not been generated successfully
*/
@Override
public void buildClusterer(Instances data) throws Exception {
// can clusterer handle the data?
getCapabilities().testWithFail(data);
// long start = System.currentTimeMillis();
m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(data);
m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);
initMinMax(m_instances);
m_ClusterCentroids = new Instances(m_instances, m_NumClusters);
int n = m_instances.numInstances();
Random r = new Random(getSeed());
boolean[] selected = new boolean[n];
double[] minDistance = new double[n];
for (int i = 0; i < n; i++) {
minDistance[i] = Double.MAX_VALUE;
}
int firstI = r.nextInt(n);
m_ClusterCentroids.add(m_instances.instance(firstI));
selected[firstI] = true;
updateMinDistance(minDistance, selected, m_instances,
m_instances.instance(firstI));
if (m_NumClusters > n) {
m_NumClusters = n;
}
for (int i = 1; i < m_NumClusters; i++) {
int nextI = farthestAway(minDistance, selected);
m_ClusterCentroids.add(m_instances.instance(nextI));
selected[nextI] = true;
updateMinDistance(minDistance, selected, m_instances,
m_instances.instance(nextI));
}
m_instances = new Instances(m_instances, 0);
// long end = System.currentTimeMillis();
// System.out.println("Clustering Time = " + (end-start));
}