当前位置: 首页>>代码示例>>Java>>正文


Java DefaultOptionCreator类代码示例

本文整理汇总了Java中org.apache.mahout.common.commandline.DefaultOptionCreator的典型用法代码示例。如果您正苦于以下问题:Java DefaultOptionCreator类的具体用法?Java DefaultOptionCreator怎么用?Java DefaultOptionCreator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DefaultOptionCreator类属于org.apache.mahout.common.commandline包,在下文中一共展示了DefaultOptionCreator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * CLI to run clustering post processor. The input to post processor is the ouput path specified to the
 * clustering.
 */
@Override
public int run(String[] args) throws Exception {
  
  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.methodOption().create());

  if (parseArguments(args) == null) {
    return -1;
  }
  
  Path input = getInputPath();
  Path output = getOutputPath();

  if (getConf() == null) {
    setConf(new Configuration());
  }
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
    DefaultOptionCreator.SEQUENTIAL_METHOD);
  run(input, output, runSequential);
  return 0;
  
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:28,代码来源:ClusterOutputPostProcessorDriver.java

示例2: main

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
  ArgumentBuilder abuilder = new ArgumentBuilder();
  GroupBuilder gbuilder = new GroupBuilder();
  
  Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
  Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
  Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
    abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
    "The vector implementation to use.").withShortName("v").create();
  
  Option helpOpt = DefaultOptionCreator.helpOption();
  
  Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
    vectorOpt).withOption(helpOpt).create();
	 
  try {
    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);
    if (cmdLine.hasOption(helpOpt)) {
      CommandLineUtil.printHelp(group);
      return;
    }
    
    Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
    Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
    String vectorClassName = cmdLine.getValue(vectorOpt,
       "org.apache.mahout.math.RandomAccessSparseVector").toString();
    //runJob(input, output, vectorClassName);
  } catch (OptionException e) {
    InputDriver.log.error("Exception parsing command line: ", e);
    CommandLineUtil.printHelp(group);
  }
}
 
开发者ID:PacktPublishing,项目名称:HBase-High-Performance-Cookbook,代码行数:36,代码来源:InputDriver.java

示例3: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws Exception {

  // set up command line arguments
  addOption("half-life", "b", "Minimal half-life threshold", true);
  addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
  addOption("epsilon", "e", "Half-life threshold coefficient", Double.toString(EPSILON_DEFAULT));
  addOption("tau", "t", "Threshold for cutting affinities", Double.toString(TAU_DEFAULT));
  addOption("eigenrank", "k", "Number of top eigenvectors to use", true);
  addOption(DefaultOptionCreator.inputOption().create());
  addOption(DefaultOptionCreator.outputOption().create());
  addOption(DefaultOptionCreator.overwriteOption().create());
  Map<String, List<String>> parsedArgs = parseArguments(arg0);
  if (parsedArgs == null) {
    return 0;
  }

  // read in the command line values
  Path input = getInputPath();
  Path output = getOutputPath();
  if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(getConf(), output);
  }
  int dimensions = Integer.parseInt(getOption("dimensions"));
  double halflife = Double.parseDouble(getOption("half-life"));
  double epsilon = Double.parseDouble(getOption("epsilon"));
  double tau = Double.parseDouble(getOption("tau"));
  int eigenrank = Integer.parseInt(getOption("eigenrank"));

  run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon, tau);

  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:34,代码来源:EigencutsDriver.java

示例4: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws IOException, ClassNotFoundException, InterruptedException {
  // set up command line options
  Configuration conf = getConf();
  addInputOption();
  addOutputOption();
  addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
  addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
  addOption(DefaultOptionCreator.distanceMeasureOption().create());
  addOption(DefaultOptionCreator.convergenceOption().create());
  addOption(DefaultOptionCreator.maxIterationsOption().create());
  addOption(DefaultOptionCreator.overwriteOption().create());
  Map<String, List<String>> parsedArgs = parseArguments(arg0);
  if (parsedArgs == null) {
    return 0;
  }

  Path input = getInputPath();
  Path output = getOutputPath();
  if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(conf, output);
  }
  int numDims = Integer.parseInt(getOption("dimensions"));
  int clusters = Integer.parseInt(getOption("clusters"));
  String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
  DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
  double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
  int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));

  run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations);

  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:34,代码来源:SpectralKMeansDriver.java

示例5: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * CLI to run Cluster Classification Driver.
 */
@Override
public int run(String[] args) throws Exception {
  
  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.methodOption().create());
  addOption(DefaultOptionCreator.clustersInOption()
      .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
      .create());
  
  if (parseArguments(args) == null) {
    return -1;
  }
  
  Path input = getInputPath();
  Path output = getOutputPath();
  
  if (getConf() == null) {
    setConf(new Configuration());
  }
  Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
      DefaultOptionCreator.SEQUENTIAL_METHOD);
  
  double clusterClassificationThreshold = 0.0;
  if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
    clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
  }
  
  run(input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
  
  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:37,代码来源:ClusterClassificationDriver.java

示例6: getAnalyzerClassFromOption

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
protected Class<? extends Analyzer> getAnalyzerClassFromOption() throws ClassNotFoundException {
  Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class;
  if (hasOption(DefaultOptionCreator.ANALYZER_NAME_OPTION)) {
    String className = getOption(DefaultOptionCreator.ANALYZER_NAME_OPTION);
    analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
    // try instantiating it, b/c there isn't any point in setting it if
    // you can't instantiate it
    ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
  }
  return analyzerClass;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:12,代码来源:AbstractJob.java

示例7: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * CLI to run ImageToText Driver.
 */
public int run(String[] args) throws Exception {

	addInputOption();
	addOutputOption();
	addOption(DefaultOptionCreator.methodOption().create());
	addOption(DefaultOptionCreator
			.clustersInOption()
			.withDescription(
					"The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
			.create());

	if (parseArguments(args) == null) {
		return -1;
	}

	Path input = getInputPath();
	Path output = getOutputPath();

	if (getConf() == null) {
		setConf(new Configuration());
	}
	Path clustersIn = new Path(
			getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
	boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
			.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);

	double clusterClassificationThreshold = 0.0;
	if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
		clusterClassificationThreshold = Double
				.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
	}

	run(getConf(), input, clustersIn, output, runSequential);

	return 0;
}
 
开发者ID:pgorecki,项目名称:visearch,代码行数:40,代码来源:ImageToTextDriver.java

示例8: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * CLI to run Cluster Classification Driver.
 */
public int run(String[] args) throws Exception {

	addInputOption();
	addOutputOption();
	addOption(DefaultOptionCreator.methodOption().create());
	addOption(DefaultOptionCreator
			.clustersInOption()
			.withDescription(
					"The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
			.create());

	if (parseArguments(args) == null) {
		return -1;
	}

	Path input = getInputPath();
	Path output = getOutputPath();

	if (getConf() == null) {
		setConf(new Configuration());
	}
	Path clustersIn = new Path(
			getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
	boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
			.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);

	double clusterClassificationThreshold = 0.0;
	if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
		clusterClassificationThreshold = Double
				.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
	}

	run(getConf(), input, clustersIn, output,
			clusterClassificationThreshold, true, runSequential);

	return 0;
}
 
开发者ID:pgorecki,项目名称:visearch,代码行数:41,代码来源:MyClusterClassificationDriver.java

示例9: complete

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public void complete(List<Centroid> list, TridentCollector tridentCollector) {
  BallKMeans clusterer = new BallKMeans(StreamingKMeansBolt.searcherFromConfiguration(_conf),
      ((Number) _conf.get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)).intValue(), 100);
  clusterer.cluster(list);

  for (Centroid centroid : clusterer) {
    tridentCollector.emit(new Values(centroid));
  }
}
 
开发者ID:dfilimon,项目名称:streaming-storm,代码行数:11,代码来源:BallKMeansAggregator.java

示例10: searcherFromConfiguration

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * Instantiates a searcher from a given configuration.
 * @param conf the configuration
 * @return the instantiated searcher
 * @throws RuntimeException if the distance measure class cannot be instantiated
 * @throws IllegalStateException if an unknown searcher class was requested
 */
public static UpdatableSearcher searcherFromConfiguration(Map conf) {
  DistanceMeasure distanceMeasure;
  String distanceMeasureClass = (String) conf.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
  try {
    distanceMeasure = (DistanceMeasure)Class.forName(distanceMeasureClass).newInstance();
  } catch (Exception e) {
    throw new RuntimeException("Failed to instantiate distanceMeasure", e);
  }

  Integer numProjections = ((Number) conf.get(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION)).intValue();
  Integer searchSize =  ((Number) conf.get(StreamingKMeansDriver.SEARCH_SIZE_OPTION)).intValue();

  String searcherClass = (String) conf.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION);

  if (searcherClass.equals(BruteSearch.class.getName())) {
    return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
        new Class[]{DistanceMeasure.class}, new Object[]{distanceMeasure});
  } else if (searcherClass.equals(FastProjectionSearch.class.getName()) ||
      searcherClass.equals(ProjectionSearch.class.getName())) {
    return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
        new Class[]{DistanceMeasure.class, int.class, int.class},
        new Object[]{distanceMeasure, numProjections, searchSize});
  } else if (searcherClass.equals(LocalitySensitiveHashSearch.class.getName())) {
    return ClassUtils.instantiateAs(searcherClass, LocalitySensitiveHashSearch.class,
        new Class[]{DistanceMeasure.class, int.class},
        new Object[]{distanceMeasure, searchSize});
  } else {
    throw new IllegalStateException("Unknown class instantiation requested");
  }
}
 
开发者ID:dfilimon,项目名称:streaming-storm,代码行数:38,代码来源:StreamingKMeansBolt.java

示例11: setupConfig

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Before
public void setupConfig() {
  _conf = new Config();
  _conf.setNumWorkers(2);

  _conf.registerSerialization(Path.class, FieldSerializer.class);
  _conf.registerSerialization(SequenceFile.Writer.class, FieldSerializer.class);
  _conf.registerSerialization(DenseVector.class, VectorSerializer.class);
  _conf.registerSerialization(Centroid.class, CentroidSerializer.class);

  _conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 5);

  _conf.put(HypercubeSpout.NUM_POINTS, 2000);
  _conf.put(HypercubeSpout.NUM_DIMENSION, 500);
  _conf.put(HypercubeSpout.NUM_CLUSTERS, NUM_CLUSTERS);
  _conf.put(HypercubeSpout.RADIUS, 0.0001);
  _conf.put(HypercubeSpout.UNIFORM_FRACTION, 0.0);

  _conf.put(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, SquaredEuclideanDistanceMeasure.class.getName());
  _conf.put(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION, 3);
  _conf.put(StreamingKMeansDriver.SEARCH_SIZE_OPTION, 2);
  _conf.put(StreamingKMeansDriver.SEARCHER_CLASS_OPTION, FastProjectionSearch.class.getName());
  _conf.put(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS, ESTIMATED_NUM_MAP_CLUSTERS);
  _conf.put(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, 1e-7);

  _conf.put(HypercubeSpout.OUTPUT_PATH, INPUT_PATH);
  _conf.put(LocalSequenceFileWriterBolt.OUTPUT_PATH, OUTPUT_PATH);
}
 
开发者ID:dfilimon,项目名称:streaming-storm,代码行数:29,代码来源:StreamingKMeansTopologyTest.java

示例12: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
 * The sampling rate that is used for computing the reconstruction error
 */

@Override
public int run(String[] args) throws Exception {
  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.methodOption().create());
  addOption(ROWSOPTION, "rows", "Number of rows");
  addOption(COLSOPTION, "cols", "Number of cols");
  addOption(PRINCIPALSOPTION, "pcs", "Number of principal components");
  addOption(SPLITFACTOROPTION, "sf", "Split each block to increase paralelism");
  addOption(ERRSAMPLE, "errSampleRate",
      "Sampling rate for computing the error (0-1]");
  addOption(MAXITER, "maxIter",
          "Maximum number of iterations before terminating, the default is 3");
  addOption(NORMALIZEOPTION, "normalize",
          "Choose whether you want the input matrix to be  normalized or not, 1 means normalize, 0 means don't normalize");
  if (parseArguments(args) == null) {
    return -1;
  }
  Path input = getInputPath();
  Path output = getOutputPath();
  final int nRows = Integer.parseInt(getOption(ROWSOPTION));
  final int nCols = Integer.parseInt(getOption(COLSOPTION));
  final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION));
  final int splitFactor;
  final int normalize;
  final int maxIterations;
  final float errSampleRate;
  if(hasOption(SPLITFACTOROPTION))
  	splitFactor= Integer.parseInt(getOption(SPLITFACTOROPTION, "1"));
  else
  	splitFactor=1;
  if (hasOption(ERRSAMPLE))
  	errSampleRate = Float.parseFloat(getOption(ERRSAMPLE));
  else 
  {
  	 int length = String.valueOf(nRows).length();
  	 if(length <= 4)
  		 errSampleRate= 1;
       else
      	 errSampleRate=(float) (1/Math.pow(10, length-4));
  	 log.warn("error sampling rate set to:  errRate=" + errSampleRate);
  }
  	
  if (hasOption(MAXITER))
     maxIterations = Integer.parseInt(getOption(MAXITER));
  else
  	maxIterations=3;
  if (hasOption(NORMALIZEOPTION))
     normalize = Integer.parseInt(getOption(NORMALIZEOPTION));
  else
  	normalize=0;
  
  Configuration conf = getConf();
  if (conf == null) {
    throw new IOException("No Hadoop configuration present");
  }
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
      .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
  run(conf, input, output, nRows, nCols, nPCs, splitFactor, errSampleRate, maxIterations, normalize, runSequential);
  return 0;
}
 
开发者ID:SiddharthMalhotra,项目名称:sPCA,代码行数:66,代码来源:SPCADriver.java

示例13: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.maxIterationsOption().create());
  addOption(DefaultOptionCreator.numClustersOption().withRequired(true).create());
  addOption(DefaultOptionCreator.overwriteOption().create());
  addOption(DefaultOptionCreator.clusteringOption().create());
  addOption(ALPHA_OPTION, "a0", "The alpha0 value for the DirichletDistribution. Defaults to 1.0", "1.0");
  addOption(MODEL_DISTRIBUTION_CLASS_OPTION, "md",
      "The ModelDistribution class name. Defaults to GaussianClusterDistribution",
      GaussianClusterDistribution.class.getName());
  addOption(MODEL_PROTOTYPE_CLASS_OPTION, "mp",
      "The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector",
      RandomAccessSparseVector.class.getName());
  addOption(DefaultOptionCreator.distanceMeasureOption().withRequired(false).create());
  addOption(DefaultOptionCreator.emitMostLikelyOption().create());
  addOption(DefaultOptionCreator.thresholdOption().create());
  addOption(DefaultOptionCreator.methodOption().create());
  
  if (parseArguments(args) == null) {
    return -1;
  }
  
  Path input = getInputPath();
  Path output = getOutputPath();
  if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(getConf(), output);
  }
  String modelFactory = getOption(MODEL_DISTRIBUTION_CLASS_OPTION);
  String modelPrototype = getOption(MODEL_PROTOTYPE_CLASS_OPTION);
  String distanceMeasure = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
  int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
  int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
  boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
  double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
  double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
  boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
      DefaultOptionCreator.SEQUENTIAL_METHOD);
  int prototypeSize = readPrototypeSize(input);
  
  DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
      prototypeSize);
  
  run(getConf(), input, output, description, numModels, maxIterations, alpha0, runClustering, emitMostLikely,
      threshold, runSequential);
  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:50,代码来源:DirichletDriver.java

示例14: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.distanceMeasureOption().create());
  addOption(DefaultOptionCreator.t1Option().create());
  addOption(DefaultOptionCreator.t2Option().create());
  addOption(DefaultOptionCreator.t3Option().create());
  addOption(DefaultOptionCreator.t4Option().create());
  addOption(DefaultOptionCreator.clusterFilterOption().create());
  addOption(DefaultOptionCreator.overwriteOption().create());
  addOption(DefaultOptionCreator.clusteringOption().create());
  addOption(DefaultOptionCreator.methodOption().create());
  addOption(DefaultOptionCreator.outlierThresholdOption().create());

  if (parseArguments(args) == null) {
    return -1;
  }

  Path input = getInputPath();
  Path output = getOutputPath();
  Configuration conf = getConf();
  if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(conf, output);
  }
  String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
  double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
  double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
  double t3 = t1;
  if (hasOption(DefaultOptionCreator.T3_OPTION)) {
    t3 = Double.parseDouble(getOption(DefaultOptionCreator.T3_OPTION));
  }
  double t4 = t2;
  if (hasOption(DefaultOptionCreator.T4_OPTION)) {
    t4 = Double.parseDouble(getOption(DefaultOptionCreator.T4_OPTION));
  }
  int clusterFilter = 0;
  if (hasOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION)) {
    clusterFilter = Integer
        .parseInt(getOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION));
  }
  boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
      .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
  DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
  double clusterClassificationThreshold = 0.0;
  if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
    clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
  }
  run(conf, input, output, measure, t1, t2, t3, t4, clusterFilter,
      runClustering, clusterClassificationThreshold, runSequential );
  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:55,代码来源:CanopyDriver.java

示例15: run

import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  addInputOption();
  addOutputOption();
  addOption(DefaultOptionCreator.distanceMeasureOption().create());
  addOption(DefaultOptionCreator.clustersInOption()
      .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
          + "If k is also specified, then a random set of vectors will be selected"
          + " and written out to this path first")
      .create());
  addOption(DefaultOptionCreator.numClustersOption()
      .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
          + " as the Centroid and written to the clusters input path.").create());
  addOption(DefaultOptionCreator.convergenceOption().create());
  addOption(DefaultOptionCreator.maxIterationsOption().create());
  addOption(DefaultOptionCreator.overwriteOption().create());
  addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);
  addOption(DefaultOptionCreator.clusteringOption().create());
  addOption(DefaultOptionCreator.emitMostLikelyOption().create());
  addOption(DefaultOptionCreator.thresholdOption().create());
  addOption(DefaultOptionCreator.methodOption().create());

  if (parseArguments(args) == null) {
    return -1;
  }

  Path input = getInputPath();
  Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
  Path output = getOutputPath();
  String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
  if (measureClass == null) {
    measureClass = SquaredEuclideanDistanceMeasure.class.getName();
  }
  double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
  float fuzziness = Float.parseFloat(getOption(M_OPTION));

  int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
  if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
    HadoopUtil.delete(getConf(), output);
  }
  boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
  double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
  DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);

  if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
    clusters = RandomSeedGenerator.buildRandom(getConf(),
                                               input,
                                               clusters,
                                               Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)),
                                               measure);
  }
  boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
  boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
      DefaultOptionCreator.SEQUENTIAL_METHOD);
  run(getConf(),
      input,
      clusters,
      output,
      measure,
      convergenceDelta,
      maxIterations,
      fuzziness,
      runClustering,
      emitMostLikely,
      threshold,
      runSequential);
  return 0;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:70,代码来源:FuzzyKMeansDriver.java


注:本文中的org.apache.mahout.common.commandline.DefaultOptionCreator类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。