本文整理汇总了Java中org.apache.mahout.common.commandline.DefaultOptionCreator类的典型用法代码示例。如果您正苦于以下问题:Java DefaultOptionCreator类的具体用法?Java DefaultOptionCreator怎么用?Java DefaultOptionCreator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DefaultOptionCreator类属于org.apache.mahout.common.commandline包,在下文中一共展示了DefaultOptionCreator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run clustering post processor. The input to post processor is the ouput path specified to the
* clustering.
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
run(input, output, runSequential);
return 0;
}
示例2: main
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
"The vector implementation to use.").withShortName("v").create();
Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
vectorOpt).withOption(helpOpt).create();
try {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
if (cmdLine.hasOption(helpOpt)) {
CommandLineUtil.printHelp(group);
return;
}
Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
String vectorClassName = cmdLine.getValue(vectorOpt,
"org.apache.mahout.math.RandomAccessSparseVector").toString();
//runJob(input, output, vectorClassName);
} catch (OptionException e) {
InputDriver.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
示例3: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws Exception {
// set up command line arguments
addOption("half-life", "b", "Minimal half-life threshold", true);
addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
addOption("epsilon", "e", "Half-life threshold coefficient", Double.toString(EPSILON_DEFAULT));
addOption("tau", "t", "Threshold for cutting affinities", Double.toString(TAU_DEFAULT));
addOption("eigenrank", "k", "Number of top eigenvectors to use", true);
addOption(DefaultOptionCreator.inputOption().create());
addOption(DefaultOptionCreator.outputOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, List<String>> parsedArgs = parseArguments(arg0);
if (parsedArgs == null) {
return 0;
}
// read in the command line values
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
int dimensions = Integer.parseInt(getOption("dimensions"));
double halflife = Double.parseDouble(getOption("half-life"));
double epsilon = Double.parseDouble(getOption("epsilon"));
double tau = Double.parseDouble(getOption("tau"));
int eigenrank = Integer.parseInt(getOption("eigenrank"));
run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon, tau);
return 0;
}
示例4: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws IOException, ClassNotFoundException, InterruptedException {
// set up command line options
Configuration conf = getConf();
addInputOption();
addOutputOption();
addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, List<String>> parsedArgs = parseArguments(arg0);
if (parsedArgs == null) {
return 0;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(conf, output);
}
int numDims = Integer.parseInt(getOption("dimensions"));
int clusters = Integer.parseInt(getOption("clusters"));
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations);
return 0;
}
示例5: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run Cluster Classification Driver.
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
return 0;
}
示例6: getAnalyzerClassFromOption
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
protected Class<? extends Analyzer> getAnalyzerClassFromOption() throws ClassNotFoundException {
Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class;
if (hasOption(DefaultOptionCreator.ANALYZER_NAME_OPTION)) {
String className = getOption(DefaultOptionCreator.ANALYZER_NAME_OPTION);
analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
// try instantiating it, b/c there isn't any point in setting it if
// you can't instantiate it
ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
}
return analyzerClass;
}
示例7: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run ImageToText Driver.
*/
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator
.clustersInOption()
.withDescription(
"The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(
getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double
.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(getConf(), input, clustersIn, output, runSequential);
return 0;
}
示例8: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run Cluster Classification Driver.
*/
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator
.clustersInOption()
.withDescription(
"The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(
getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double
.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(getConf(), input, clustersIn, output,
clusterClassificationThreshold, true, runSequential);
return 0;
}
示例9: complete
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public void complete(List<Centroid> list, TridentCollector tridentCollector) {
BallKMeans clusterer = new BallKMeans(StreamingKMeansBolt.searcherFromConfiguration(_conf),
((Number) _conf.get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)).intValue(), 100);
clusterer.cluster(list);
for (Centroid centroid : clusterer) {
tridentCollector.emit(new Values(centroid));
}
}
示例10: searcherFromConfiguration
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* Instantiates a searcher from a given configuration.
* @param conf the configuration
* @return the instantiated searcher
* @throws RuntimeException if the distance measure class cannot be instantiated
* @throws IllegalStateException if an unknown searcher class was requested
*/
public static UpdatableSearcher searcherFromConfiguration(Map conf) {
DistanceMeasure distanceMeasure;
String distanceMeasureClass = (String) conf.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
try {
distanceMeasure = (DistanceMeasure)Class.forName(distanceMeasureClass).newInstance();
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate distanceMeasure", e);
}
Integer numProjections = ((Number) conf.get(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION)).intValue();
Integer searchSize = ((Number) conf.get(StreamingKMeansDriver.SEARCH_SIZE_OPTION)).intValue();
String searcherClass = (String) conf.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION);
if (searcherClass.equals(BruteSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class}, new Object[]{distanceMeasure});
} else if (searcherClass.equals(FastProjectionSearch.class.getName()) ||
searcherClass.equals(ProjectionSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class, int.class, int.class},
new Object[]{distanceMeasure, numProjections, searchSize});
} else if (searcherClass.equals(LocalitySensitiveHashSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, LocalitySensitiveHashSearch.class,
new Class[]{DistanceMeasure.class, int.class},
new Object[]{distanceMeasure, searchSize});
} else {
throw new IllegalStateException("Unknown class instantiation requested");
}
}
示例11: setupConfig
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Before
public void setupConfig() {
_conf = new Config();
_conf.setNumWorkers(2);
_conf.registerSerialization(Path.class, FieldSerializer.class);
_conf.registerSerialization(SequenceFile.Writer.class, FieldSerializer.class);
_conf.registerSerialization(DenseVector.class, VectorSerializer.class);
_conf.registerSerialization(Centroid.class, CentroidSerializer.class);
_conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 5);
_conf.put(HypercubeSpout.NUM_POINTS, 2000);
_conf.put(HypercubeSpout.NUM_DIMENSION, 500);
_conf.put(HypercubeSpout.NUM_CLUSTERS, NUM_CLUSTERS);
_conf.put(HypercubeSpout.RADIUS, 0.0001);
_conf.put(HypercubeSpout.UNIFORM_FRACTION, 0.0);
_conf.put(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, SquaredEuclideanDistanceMeasure.class.getName());
_conf.put(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION, 3);
_conf.put(StreamingKMeansDriver.SEARCH_SIZE_OPTION, 2);
_conf.put(StreamingKMeansDriver.SEARCHER_CLASS_OPTION, FastProjectionSearch.class.getName());
_conf.put(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS, ESTIMATED_NUM_MAP_CLUSTERS);
_conf.put(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, 1e-7);
_conf.put(HypercubeSpout.OUTPUT_PATH, INPUT_PATH);
_conf.put(LocalSequenceFileWriterBolt.OUTPUT_PATH, OUTPUT_PATH);
}
示例12: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* The sampling rate that is used for computing the reconstruction error
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(ROWSOPTION, "rows", "Number of rows");
addOption(COLSOPTION, "cols", "Number of cols");
addOption(PRINCIPALSOPTION, "pcs", "Number of principal components");
addOption(SPLITFACTOROPTION, "sf", "Split each block to increase paralelism");
addOption(ERRSAMPLE, "errSampleRate",
"Sampling rate for computing the error (0-1]");
addOption(MAXITER, "maxIter",
"Maximum number of iterations before terminating, the default is 3");
addOption(NORMALIZEOPTION, "normalize",
"Choose whether you want the input matrix to be normalized or not, 1 means normalize, 0 means don't normalize");
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
final int nRows = Integer.parseInt(getOption(ROWSOPTION));
final int nCols = Integer.parseInt(getOption(COLSOPTION));
final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION));
final int splitFactor;
final int normalize;
final int maxIterations;
final float errSampleRate;
if(hasOption(SPLITFACTOROPTION))
splitFactor= Integer.parseInt(getOption(SPLITFACTOROPTION, "1"));
else
splitFactor=1;
if (hasOption(ERRSAMPLE))
errSampleRate = Float.parseFloat(getOption(ERRSAMPLE));
else
{
int length = String.valueOf(nRows).length();
if(length <= 4)
errSampleRate= 1;
else
errSampleRate=(float) (1/Math.pow(10, length-4));
log.warn("error sampling rate set to: errRate=" + errSampleRate);
}
if (hasOption(MAXITER))
maxIterations = Integer.parseInt(getOption(MAXITER));
else
maxIterations=3;
if (hasOption(NORMALIZEOPTION))
normalize = Integer.parseInt(getOption(NORMALIZEOPTION));
else
normalize=0;
Configuration conf = getConf();
if (conf == null) {
throw new IOException("No Hadoop configuration present");
}
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
run(conf, input, output, nRows, nCols, nPCs, splitFactor, errSampleRate, maxIterations, normalize, runSequential);
return 0;
}
示例13: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.numClustersOption().withRequired(true).create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(ALPHA_OPTION, "a0", "The alpha0 value for the DirichletDistribution. Defaults to 1.0", "1.0");
addOption(MODEL_DISTRIBUTION_CLASS_OPTION, "md",
"The ModelDistribution class name. Defaults to GaussianClusterDistribution",
GaussianClusterDistribution.class.getName());
addOption(MODEL_PROTOTYPE_CLASS_OPTION, "mp",
"The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector",
RandomAccessSparseVector.class.getName());
addOption(DefaultOptionCreator.distanceMeasureOption().withRequired(false).create());
addOption(DefaultOptionCreator.emitMostLikelyOption().create());
addOption(DefaultOptionCreator.thresholdOption().create());
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
String modelFactory = getOption(MODEL_DISTRIBUTION_CLASS_OPTION);
String modelPrototype = getOption(MODEL_PROTOTYPE_CLASS_OPTION);
String distanceMeasure = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
int prototypeSize = readPrototypeSize(input);
DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
prototypeSize);
run(getConf(), input, output, description, numModels, maxIterations, alpha0, runClustering, emitMostLikely,
threshold, runSequential);
return 0;
}
示例14: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.t1Option().create());
addOption(DefaultOptionCreator.t2Option().create());
addOption(DefaultOptionCreator.t3Option().create());
addOption(DefaultOptionCreator.t4Option().create());
addOption(DefaultOptionCreator.clusterFilterOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.outlierThresholdOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
Configuration conf = getConf();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(conf, output);
}
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
double t3 = t1;
if (hasOption(DefaultOptionCreator.T3_OPTION)) {
t3 = Double.parseDouble(getOption(DefaultOptionCreator.T3_OPTION));
}
double t4 = t2;
if (hasOption(DefaultOptionCreator.T4_OPTION)) {
t4 = Double.parseDouble(getOption(DefaultOptionCreator.T4_OPTION));
}
int clusterFilter = 0;
if (hasOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION)) {
clusterFilter = Integer
.parseInt(getOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION));
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(conf, input, output, measure, t1, t2, t3, t4, clusterFilter,
runClustering, clusterClassificationThreshold, runSequential );
return 0;
}
示例15: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
+ "If k is also specified, then a random set of vectors will be selected"
+ " and written out to this path first")
.create());
addOption(DefaultOptionCreator.numClustersOption()
.withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
+ " as the Centroid and written to the clusters input path.").create());
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.emitMostLikelyOption().create());
addOption(DefaultOptionCreator.thresholdOption().create());
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
Path output = getOutputPath();
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
if (measureClass == null) {
measureClass = SquaredEuclideanDistanceMeasure.class.getName();
}
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
float fuzziness = Float.parseFloat(getOption(M_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
clusters = RandomSeedGenerator.buildRandom(getConf(),
input,
clusters,
Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)),
measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
run(getConf(),
input,
clusters,
output,
measure,
convergenceDelta,
maxIterations,
fuzziness,
runClustering,
emitMostLikely,
threshold,
runSequential);
return 0;
}