本文整理汇总了Java中org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable类的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileDirValueIterable类的具体用法?Java SequenceFileDirValueIterable怎么用?Java SequenceFileDirValueIterable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
SequenceFileDirValueIterable类属于org.apache.mahout.common.iterator.sequencefile包,在下文中一共展示了SequenceFileDirValueIterable类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runSequential
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/***
* PPCA: sequential PPCA based on the paper from Tipping and Bishop
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential(Configuration conf, Path input, Path output,
final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix centralC = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(centralC, ss);
// Matrix sampledYe = sample(centralY);
// runSequential(conf, sampledYe, initVal, 100);
double error = runSequential(conf, centralY, initVal, 100);
return error;
}
示例2: runSequential_JacobVersion
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* PPCA: sequential PPCA based on the matlab implementation of Jacob Verbeek
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential_JacobVersion(Configuration conf, Path input,
Path output, final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix C = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(C, ss);
double error = runSequential_JacobVersion(conf, centralY, initVal, 100);
return error;
}
示例3: configureWithClusterInfo
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* Create a list of SoftClusters from whatever type is passed in as the prior
*
* @param conf
* the Configuration
* @param clusterPath
* the path to the prior Clusters
* @param clusters
* a List<Cluster> to put values into
*/
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, List<Cluster> clusters) {
for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
PathFilters.partFilter(), conf)) {
Class<? extends Writable> valueClass = value.getClass();
if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable) value;
value = clusterWritable.getValue();
valueClass = value.getClass();
}
if (valueClass.equals(Kluster.class)) {
// get the cluster info
Kluster cluster = (Kluster) value;
clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
} else if (valueClass.equals(SoftCluster.class)) {
// get the cluster info
clusters.add((SoftCluster) value);
} else if (valueClass.equals(Canopy.class)) {
// get the cluster info
Canopy canopy = (Canopy) value;
clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
} else {
throw new IllegalStateException("Bad value class: " + valueClass);
}
}
}
示例4: configureWithClusterInfo
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* Create a list of Klusters from whatever Cluster type is passed in as the prior
*
* @param conf
* the Configuration
* @param clusterPath
* the path to the prior Clusters
* @param clusters
* a List<Cluster> to put values into
*/
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, Collection<Cluster> clusters) {
for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
PathFilters.partFilter(), conf)) {
Class<? extends Writable> valueClass = value.getClass();
if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable) value;
value = clusterWritable.getValue();
valueClass = value.getClass();
}
log.debug("Read 1 Cluster from {}", clusterPath);
if (valueClass.equals(Kluster.class)) {
// get the cluster info
clusters.add((Kluster) value);
} else if (valueClass.equals(Canopy.class)) {
// get the cluster info
Canopy canopy = (Canopy) value;
clusters.add(new Kluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
} else {
throw new IllegalStateException("Bad value class: " + valueClass);
}
}
}
示例5: readClusters
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
public static List<List<Cluster>> readClusters(Configuration conf, Path output)
throws IOException {
List<List<Cluster>> Clusters = Lists.newArrayList();
FileSystem fs = FileSystem.get(output.toUri(), conf);
for (FileStatus s : fs.listStatus(output, new ClustersFilter())) {
List<Cluster> clusters = Lists.newArrayList();
for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(
s.getPath(), PathType.LIST, PathFilters.logsCRCFilter(),
conf)) {
Cluster cluster = value.getValue();
clusters.add(cluster);
}
Clusters.add(clusters);
}
return Clusters;
}
示例6: crossTestIterationOfMapReducePPCASequentialPPCA
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
@Test
public void crossTestIterationOfMapReducePPCASequentialPPCA() throws Exception {
Matrix C_central = PCACommon.randomMatrix(D, d);
double ss = PCACommon.randSS();
InitialValues initValSeq = new InitialValues(C_central, ss);
InitialValues initValMR = new InitialValues(C_central.clone(), ss);
//1. run sequential
Matrix Ye_central = new DenseMatrix(N, D);
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
Ye_central.assignRow(row, vw.get());
row++;
}
double bishopSeqErr = ppcaDriver.runSequential(conf, Ye_central, initValSeq, 1);
//2. run mapreduce
DistributedRowMatrix Ye = new DistributedRowMatrix(input, tmp, N, D);
Ye.setConf(conf);
double bishopMRErr = ppcaDriver.runMapReduce(conf, Ye, initValMR, output, N, D, d, 1, 1, 1, 1);
Assert.assertEquals(
"ss value is different in sequential and mapreduce PCA", initValSeq.ss,
initValMR.ss, EPSILON);
double seqCTrace = PCACommon.trace(initValSeq.C);
double mrCTrace = PCACommon.trace(initValMR.C);
Assert.assertEquals(
"C value is different in sequential and mapreduce PCA", seqCTrace,
mrCTrace, EPSILON);
Assert.assertEquals(
"The PPCA error between sequntial and mapreduce methods is too different: "
+ bishopSeqErr + "!= " + bishopMRErr, bishopSeqErr, bishopMRErr, EPSILON);
}
示例7: buildClustersSeq
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* Build a directory of Canopy clusters from the input vectors and other
* arguments. Run sequential execution
*
* @param input
* the Path to the directory containing input vectors
* @param output
* the Path for all output directories
* @param measure
* the DistanceMeasure
* @param t1
* the double T1 distance metric
* @param t2
* the double T2 distance metric
* @param clusterFilter
* the int minimum size of canopies produced
* @return the canopy output directory Path
*/
private static Path buildClustersSeq(Path input, Path output,
DistanceMeasure measure, double t1, double t2, int clusterFilter)
throws IOException {
CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
Collection<Canopy> canopies = Lists.newArrayList();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
clusterer.addPointToCanopies(vw.get(), canopies);
}
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
Path path = new Path(canopyOutputDir, "part-r-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Text.class, ClusterWritable.class);
ClusterWritable clusterWritable = new ClusterWritable();
try {
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (log.isDebugEnabled()) {
log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}",
new Object[] { canopy.getIdentifier(),
AbstractCluster.formatVector(canopy.getCenter(), null),
canopy.getNumObservations(),
AbstractCluster.formatVector(canopy.getRadius(), null) });
}
if (canopy.getNumObservations() > clusterFilter) {
clusterWritable.setValue(canopy);
writer.append(new Text(canopy.getIdentifier()), clusterWritable);
}
}
} finally {
Closeables.closeQuietly(writer);
}
return canopyOutputDir;
}
示例8: readFromSeqFiles
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
Configuration config = new Configuration();
List<Cluster> clusters = Lists.newArrayList();
for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
PathFilters.logsCRCFilter(), config)) {
Cluster cluster = cw.getValue();
cluster.configure(conf);
clusters.add(cluster);
}
this.models = clusters;
modelClass = models.get(0).getClass().getName();
this.policy = readPolicy(path);
}
示例9: getCanopies
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
public static List<MeanShiftCanopy> getCanopies(Configuration conf) {
String statePath = conf.get(MeanShiftCanopyDriver.STATE_IN_KEY);
List<MeanShiftCanopy> canopies = Lists.newArrayList();
Path path = new Path(statePath);
for (ClusterWritable clusterWritable
: new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
canopies.add(canopy);
}
return canopies;
}
示例10: iterateSeq
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* Iterate over data using a prior-trained ClusterClassifier, for a number of iterations using a sequential
* implementation
*
* @param conf
* the Configuration
* @param inPath
* a Path to input VectorWritables
* @param priorPath
* a Path to the prior classifier
* @param outPath
* a Path of output directory
* @param numIterations
* the int number of iterations to perform
*/
public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
throws IOException {
ClusterClassifier classifier = new ClusterClassifier();
classifier.readFromSeqFiles(conf, priorPath);
Path clustersOut = null;
int iteration = 1;
while (iteration <= numIterations) {
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(inPath, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Vector vector = vw.get();
// classification yields probabilities
Vector probabilities = classifier.classify(vector);
// policy selects weights for models given those probabilities
Vector weights = classifier.getPolicy().select(probabilities);
// training causes all models to observe data
for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
int index = it.next().index();
classifier.train(index, vector, weights.get(index));
}
}
// compute the posterior models
classifier.close();
// update the policy
classifier.getPolicy().update(classifier);
// output the classifier
clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
classifier.writeToSeqFiles(clustersOut);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
iteration++;
if (isConverged(clustersOut, conf, fs)) {
break;
}
}
Path finalClustersIn = new Path(outPath, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
FileSystem.get(clustersOut.toUri(), conf).rename(clustersOut, finalClustersIn);
}
示例11: selectCluster
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable; //导入依赖的package包/类
/**
* Classifies the vector into its respective cluster.
*
* @param input
* the path containing the input vector.
* @param clusterModels
* the clusters
* @param clusterClassifier
* used to classify the vectors into different clusters
* @param output
* the path to store classified data
* @param clusterClassificationThreshold
* @param emitMostLikely
* TODO
* @throws IOException
*/
private static void selectCluster(Path input, List<Cluster> clusterModels, ClusterClassifier clusterClassifier,
Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
Configuration conf = new Configuration();
SequenceFile.Writer writer = new SequenceFile.Writer(input.getFileSystem(conf), conf, new Path(output,
"part-m-" + 0), IntWritable.class, WeightedVectorWritable.class);
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Vector pdfPerCluster = clusterClassifier.classify(vw.get());
if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) {
classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, vw, pdfPerCluster);
}
}
writer.close();
}