当前位置: 首页>>代码示例>>Java>>正文


Java ReplaceMissingValues类代码示例

本文整理汇总了Java中weka.filters.unsupervised.attribute.ReplaceMissingValues的典型用法代码示例。如果您正苦于以下问题:Java ReplaceMissingValues类的具体用法?Java ReplaceMissingValues怎么用?Java ReplaceMissingValues使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


ReplaceMissingValues类属于weka.filters.unsupervised.attribute包,在下文中一共展示了ReplaceMissingValues类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: preProcessData

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * 
 * @param data the data to transform
 * @param shouldImpute impute the data?
 * @param shouldStandardize standardize the numeric attributes?
 * @param shouldBinarize binarize the attributes?
 * @return the transformed data
 * @throws Exception
 */
public static Instances preProcessData(Instances data, boolean shouldImpute, 
		boolean shouldStandardize, boolean shouldBinarize) throws Exception {
    if( shouldImpute ) {
    	Filter impute = new ReplaceMissingValues();
    	impute.setInputFormat(data);
		data = Filter.useFilter(data, impute);
    }
	if( shouldStandardize ) {
		Filter standardize = new Standardize();
		standardize.setInputFormat(data);
		data = Filter.useFilter(data, standardize);
	}
	if( shouldBinarize ) {
		Filter binarize = new NominalToBinary();
		binarize.setInputFormat(data);
    	// make resulting binary attrs nominal, not numeric
		binarize.setOptions(new String[] { "-N" } );
    	data = Filter.useFilter(data, binarize);
	}
	return data;
}
 
开发者ID:christopher-beckham,项目名称:weka-pyscript,代码行数:31,代码来源:Utility.java

示例2: preProcessData

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
public static Instances preProcessData(Instances data) throws Exception{
	
	/* 
	 * Remove useless attributes
	 */
	RemoveUseless removeUseless = new RemoveUseless();
	removeUseless.setOptions(new String[] { "-M", "99" });	// threshold
	removeUseless.setInputFormat(data);
	data = Filter.useFilter(data, removeUseless);

	
	/* 
	 * Remove useless attributes
	 */
	ReplaceMissingValues fixMissing = new ReplaceMissingValues();
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);
	

	/* 
	 * Remove useless attributes
	 */
	Discretize discretizeNumeric = new Discretize();
	discretizeNumeric.setOptions(new String[] {
			"-O",
			"-M",  "-1.0", 
			"-B",  "4",  // no of bins
			"-R",  "first-last"}); //range of attributes
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);

	/* 
	 * Select only informative attributes
	 */
	InfoGainAttributeEval eval = new InfoGainAttributeEval();
	Ranker search = new Ranker();
	search.setOptions(new String[] { "-T", "0.001" });	// information gain threshold
	AttributeSelection attSelect = new AttributeSelection();
	attSelect.setEvaluator(eval);
	attSelect.setSearch(search);
	
	// apply attribute selection
	attSelect.SelectAttributes(data);
	
	// remove the attributes not selected in the last run
	data = attSelect.reduceDimensionality(data);
	
	

	return data;
}
 
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:52,代码来源:KddCup.java

示例3: normalizeDataSet

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * ensure that all variables are nominal and that there are no missing values
 * 
 * @param instances data set to check and quantize and/or fill in missing
 *          values
 * @return filtered instances
 * @throws Exception if a filter (Discretize, ReplaceMissingValues) fails
 */
protected Instances normalizeDataSet(Instances instances) throws Exception {

  m_nNonDiscreteAttribute = -1;
  Enumeration<Attribute> enu = instances.enumerateAttributes();
  while (enu.hasMoreElements()) {
    Attribute attribute = enu.nextElement();
    if (attribute.type() != Attribute.NOMINAL) {
      m_nNonDiscreteAttribute = attribute.index();
    }
  }

  if ((m_nNonDiscreteAttribute > -1)
    && (instances.attribute(m_nNonDiscreteAttribute).type() != Attribute.NOMINAL)) {
    m_DiscretizeFilter = new Discretize();
    m_DiscretizeFilter.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_DiscretizeFilter);
  }

  m_MissingValuesFilter = new ReplaceMissingValues();
  m_MissingValuesFilter.setInputFormat(instances);
  instances = Filter.useFilter(instances, m_MissingValuesFilter);

  return instances;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:33,代码来源:BayesNet.java

示例4: initializeMembers

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();

  m_KNNdetermined    = -1;
  m_NeighborsTestset = null;
  m_TrainsetNew      = null;
  m_TestsetNew       = null;
  m_UseNaiveSearch   = false;
  m_LabeledTestset   = null;
  m_Missing          = new ReplaceMissingValues();
  
  m_Classifier = new IBk();
  m_Classifier.setKNN(10);
  m_Classifier.setCrossValidate(true);
  m_Classifier.setWindowSize(0);
  m_Classifier.setMeanSquared(false);
  
  m_KNN = m_Classifier.getKNN();
  
  m_AdditionalMeasures.add("measureDeterminedKNN");
}
 
开发者ID:fracpete,项目名称:collective-classification-weka-package,代码行数:26,代码来源:CollectiveIBk.java

示例5: buildClusterer

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {
  m_training = true;

  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  m_replaceMissing = new ReplaceMissingValues();
  Instances instances = new Instances(data);
  instances.setClassIndex(-1);
  m_replaceMissing.setInputFormat(instances);
  data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
  instances = null;

  m_theInstances = data;

  // calculate min and max values for attributes
  m_minValues = new double[m_theInstances.numAttributes()];
  m_maxValues = new double[m_theInstances.numAttributes()];
  for (int i = 0; i < m_theInstances.numAttributes(); i++) {
    m_minValues[i] = Double.MAX_VALUE;
    m_maxValues[i] = -Double.MAX_VALUE;
  }
  for (int i = 0; i < m_theInstances.numInstances(); i++) {
    updateMinMax(m_theInstances.instance(i));
  }

  doEM();

  // save memory
  m_theInstances = new Instances(m_theInstances, 0);
  m_training = false;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:41,代码来源:EM.java

示例6: buildClassifier

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Builds the logistic regression using LogitBoost.
 *
 * @param data the training data
 * @throws Exception if something goes wrong
 */
public void buildClassifier(Instances data) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  data = new Instances(data);
  data.deleteWithMissingClass();

  // replace missing values
  m_ReplaceMissingValues = new ReplaceMissingValues();
  m_ReplaceMissingValues.setInputFormat(data);
  data = Filter.useFilter(data, m_ReplaceMissingValues);

  // convert nominal attributes
  m_NominalToBinary = new NominalToBinary();
  m_NominalToBinary.setInputFormat(data);
  data = Filter.useFilter(data, m_NominalToBinary);

  // create actual logistic model
  m_boostedModel =
    new LogisticBase(m_numBoostingIterations, m_useCrossValidation,
      m_errorOnProbabilities);
  m_boostedModel.setMaxIterations(m_maxBoostingIterations);
  m_boostedModel.setHeuristicStop(m_heuristicStop);
  m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
  m_boostedModel.setUseAIC(m_useAIC);
  m_boostedModel.setNumDecimalPlaces(m_numDecimalPlaces);

  // build logistic model
  m_boostedModel.buildClassifier(data);
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:39,代码来源:SimpleLogistic.java

示例7: initializeMembers

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();
  
  m_Filter      = new ReplaceMissingValues();
  m_Classifier  = new YATSI();
  m_TrainsetNew = null;
  m_TestsetNew  = null;
}
 
开发者ID:fracpete,项目名称:collective-classification-weka-package,代码行数:13,代码来源:FilteredCollectiveClassifier.java

示例8: PLSFilter

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * default constructor
 */
public PLSFilter() {
  super();
  
  // setup pre-processing
  m_Missing = new ReplaceMissingValues();
  m_Filter  = new Center();
}
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:11,代码来源:PLSFilter.java

示例9: buildClusterer

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Generates a clusterer. Has to initialize all fields of the clusterer
 * that are not being set via options.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the clusterer has not been 
 * generated successfully
 */
public void buildClusterer (Instances data)
  throws Exception {
  
  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  m_replaceMissing = new ReplaceMissingValues();
  Instances instances = new Instances(data);
  instances.setClassIndex(-1);
  m_replaceMissing.setInputFormat(instances);
  data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
  instances = null;
  
  m_theInstances = data;

  // calculate min and max values for attributes
  m_minValues = new double [m_theInstances.numAttributes()];
  m_maxValues = new double [m_theInstances.numAttributes()];
  for (int i = 0; i < m_theInstances.numAttributes(); i++) {
    m_minValues[i] = m_maxValues[i] = Double.NaN;
  }
  for (int i = 0; i < m_theInstances.numInstances(); i++) {
    updateMinMax(m_theInstances.instance(i));
  }

  doEM();
  
  // save memory
  m_theInstances = new Instances(m_theInstances,0);
}
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:39,代码来源:EM.java

示例10: buildClassifier

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
    * Builds the logistic regression using LogitBoost.
    * @param data the training data
    * @throws Exception if something goes wrong 
    */
   public void buildClassifier(Instances data) throws Exception {

     // can classifier handle the data?
     getCapabilities().testWithFail(data);

     // remove instances with missing class
     data = new Instances(data);
     data.deleteWithMissingClass();

//replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);

//convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);

//create actual logistic model
m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
       m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
       m_boostedModel.setUseAIC(m_useAIC);

//build logistic model
m_boostedModel.buildClassifier(data);
   }
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:35,代码来源:SimpleLogistic.java

示例11: getConfiguredFilterVariant

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/** Creates a configured MultiFilter (variant) */
public Filter getConfiguredFilterVariant() {
  MultiFilter result = new MultiFilter();
  
  Filter[] filters = new Filter[2];
  filters[0] = new ReplaceMissingValues();
  filters[1] = new Center();
  
  result.setFilters(filters);
  
  return result;
}
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:13,代码来源:MultiFilterTest.java

示例12: buildClusterer

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {
  m_training = true;

  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  m_replaceMissing = new ReplaceMissingValues();
  Instances instances = new Instances(data);
  instances.setClassIndex(-1);
  m_replaceMissing.setInputFormat(instances);
  data = weka.filters.Filter.useFilter(instances, m_replaceMissing);
  instances = null;

  m_theInstances = data;

  // calculate min and max values for attributes
  m_minValues = new double[m_theInstances.numAttributes()];
  m_maxValues = new double[m_theInstances.numAttributes()];
  for (int i = 0; i < m_theInstances.numAttributes(); i++) {
    m_minValues[i] = m_maxValues[i] = Double.NaN;
  }
  for (int i = 0; i < m_theInstances.numInstances(); i++) {
    updateMinMax(m_theInstances.instance(i));
  }

  doEM();

  // save memory
  m_theInstances = new Instances(m_theInstances, 0);
  m_training = false;
}
 
开发者ID:umple,项目名称:umple,代码行数:40,代码来源:EM.java

示例13: cleanUpData

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Cleans up data
 *
 * @param data data to be cleaned up
 * @throws Exception if an error occurs
 */
private void cleanUpData(Instances data)throws Exception{

  m_Data = data;
  m_TransformFilter = new NominalToBinary();
  m_TransformFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_TransformFilter);
  m_MissingFilter = new ReplaceMissingValues();
  m_MissingFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_MissingFilter);
  m_Data.deleteWithMissingClass();
}
 
开发者ID:williamClanton,项目名称:jbossBA,代码行数:18,代码来源:LeastMedSq.java

示例14: buildClusterer

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
@Override
public void buildClusterer(Instances data) throws Exception {
  m_t1 = m_userT1;
  m_t2 = m_userT2;

  if (data.numInstances() == 0 && m_userT2 < 0) {
    System.err
      .println("The heuristic for setting T2 based on std. dev. can't be used when "
        + "running in incremental mode. Using default of 1.0.");
    m_t2 = 1.0;
  }

  m_canopyT2Density = new ArrayList<double[]>();
  m_canopyCenters = new ArrayList<double[][]>();
  m_canopyNumMissingForNumerics = new ArrayList<double[]>();

  if (data.numInstances() > 0) {
    if (!m_dontReplaceMissing) {
      m_missingValuesReplacer = new ReplaceMissingValues();
      m_missingValuesReplacer.setInputFormat(data);
      data = Filter.useFilter(data, m_missingValuesReplacer);
    }
    Random r = new Random(getSeed());
    for (int i = 0; i < 10; i++) {
      r.nextInt();
    }
    data.randomize(r);

    if (m_userT2 < 0) {
      setT2T1BasedOnStdDev(data);
    }
  }
  m_t1 = m_userT1 > 0 ? m_userT1 : -m_userT1 * m_t2;
  // if (m_t1 < m_t2) {
  // throw new Exception("T1 can't be less than T2. Computed T2 as " + m_t2
  // + " T1 is requested to be " + m_t1);
  // }

  m_distanceFunction.setInstances(data);

  m_canopies = new Instances(data, 0);
  if (data.numInstances() > 0) {
    m_trainingData = new Instances(data);
  }

  for (int i = 0; i < data.numInstances(); i++) {
    if (getDebug() && i % m_periodicPruningRate == 0) {
      System.err.println("Processed: " + i);
    }
    updateClusterer(data.instance(i));
  }

  updateFinished();
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:55,代码来源:Canopy.java

示例15: buildClusterer

import weka.filters.unsupervised.attribute.ReplaceMissingValues; //导入依赖的package包/类
/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  // long start = System.currentTimeMillis();

  m_ReplaceMissingFilter = new ReplaceMissingValues();
  m_ReplaceMissingFilter.setInputFormat(data);
  m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);

  initMinMax(m_instances);

  m_ClusterCentroids = new Instances(m_instances, m_NumClusters);

  int n = m_instances.numInstances();
  Random r = new Random(getSeed());
  boolean[] selected = new boolean[n];
  double[] minDistance = new double[n];

  for (int i = 0; i < n; i++) {
    minDistance[i] = Double.MAX_VALUE;
  }

  int firstI = r.nextInt(n);
  m_ClusterCentroids.add(m_instances.instance(firstI));
  selected[firstI] = true;

  updateMinDistance(minDistance, selected, m_instances,
    m_instances.instance(firstI));

  if (m_NumClusters > n) {
    m_NumClusters = n;
  }

  for (int i = 1; i < m_NumClusters; i++) {
    int nextI = farthestAway(minDistance, selected);
    m_ClusterCentroids.add(m_instances.instance(nextI));
    selected[nextI] = true;
    updateMinDistance(minDistance, selected, m_instances,
      m_instances.instance(nextI));
  }

  m_instances = new Instances(m_instances, 0);
  // long end = System.currentTimeMillis();
  // System.out.println("Clustering Time = " + (end-start));
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:56,代码来源:FarthestFirst.java


注:本文中的weka.filters.unsupervised.attribute.ReplaceMissingValues类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。