本文整理汇总了Java中org.apache.mahout.math.VectorWritable类的典型用法代码示例。如果您正苦于以下问题:Java VectorWritable类的具体用法?Java VectorWritable怎么用?Java VectorWritable使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
VectorWritable类属于org.apache.mahout.math包,在下文中一共展示了VectorWritable类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runSequential
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
/***
* PPCA: sequential PPCA based on the paper from Tipping and Bishop
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential(Configuration conf, Path input, Path output,
final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix centralC = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(centralC, ss);
// Matrix sampledYe = sample(centralY);
// runSequential(conf, sampledYe, initVal, 100);
double error = runSequential(conf, centralY, initVal, 100);
return error;
}
示例2: runSequential_JacobVersion
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
/**
* PPCA: sequential PPCA based on the matlab implementation of Jacob Verbeek
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential_JacobVersion(Configuration conf, Path input,
Path output, final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix C = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(C, ss);
double error = runSequential_JacobVersion(conf, centralY, initVal, 100);
return error;
}
示例3: toDistributedVector
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
static Path toDistributedVector(Vector vector, Path outputDir, String label,
Configuration conf) throws IOException {
Path outputFile = new Path(outputDir, "Vector-" + label);
FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
if (fs.exists(outputFile)) {
log.warn("----------- OVERWRITE " + outputFile + " already exists");
fs.delete(outputFile, false);
}
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile,
IntWritable.class, VectorWritable.class);
VectorWritable vectorWritable = new VectorWritable();
vectorWritable.set(vector);
writer.append(new IntWritable(0), vectorWritable);
writer.close();
return outputFile;
}
示例4: verifyYtX
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
private void verifyYtX(
DummyRecordWriter<IntWritable, VectorWritable> writer) {
Assert.assertEquals("The reducer should output " + cols + " keys!", cols, writer
.getKeys().size());
for (IntWritable key : writer.getKeys()) {
List<VectorWritable> list = writer.getValue(key);
assertEquals("reducer produces more than one values per key!", 1,
list.size());
Vector v = list.get(0).get();
assertEquals("reducer vector size must match the x size!", xsize,
v.size());
for (int c = 0; c < xsize; c++)
Assert.assertEquals("The ytx[" + key.get() + "][" + c
+ "] is incorrect: ", ytx[key.get()][c], v.get(c), EPSILON);
}
}
示例5: nextVector
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
public MySparseVector nextVector() throws IOException {
if(!vecIterator.hasNext()) return null;
Pair<Text, VectorWritable> entry = vecIterator.next();
String name = entry.getFirst().toString();
VectorWritable mahoutVector = entry.getSecond();
ArrayList<Integer> indices = new ArrayList();
ArrayList<Double> values = new ArrayList();
for(Element e: mahoutVector.get().all()){
double value =e.get();
if (value==0) continue;
values.add(value);
int index= e.index();
indices.add(index);
}
return new MySparseVector(indices, values);
}
示例6: reduce
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
protected void reduce(Text row, Iterable<VertexWritable> entries,
Context context) throws IOException, InterruptedException {
// now to assemble the vectors
RandomAccessSparseVector output = new RandomAccessSparseVector(
context.getConfiguration().getInt(EigencutsKeys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE), 100);
int rownum = Integer.parseInt(row.toString());
for (VertexWritable e : entries) {
// first, are we setting a diagonal?
if (e.getCol() == rownum) {
// add to what's already present
output.setQuick(e.getCol(), output.getQuick(e.getCol()) + e.getValue());
} else {
// simply set the value
output.setQuick(e.getCol(), e.getValue());
}
}
context.write(new IntWritable(rownum), new VectorWritable(output));
}
示例7: createModelDistribution
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
/**
* Create an instance of AbstractVectorModelDistribution from the given command line arguments
*/
public ModelDistribution<VectorWritable> createModelDistribution(Configuration conf) {
AbstractVectorModelDistribution modelDistribution =
ClassUtils.instantiateAs(modelFactory, AbstractVectorModelDistribution.class);
Vector prototype = ClassUtils.instantiateAs(modelPrototype,
Vector.class,
new Class<?>[] {int.class},
new Object[] {prototypeSize});
modelDistribution.setModelPrototype(new VectorWritable(prototype));
if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);
measure.configure(conf);
((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
}
return modelDistribution;
}
示例8: runIteration
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
int iterationNumber, int maxIterations, int numReduceTasks)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Iteration %d of %d, input path: %s",
iterationNumber, maxIterations, modelInput);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setMapperClass(CachingCVB0Mapper.class);
job.setCombinerClass(VectorSumReducer.class);
job.setReducerClass(VectorSumReducer.class);
job.setNumReduceTasks(numReduceTasks);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, corpusInput);
FileOutputFormat.setOutputPath(job, modelOutput);
setModelPaths(job, modelInput);
HadoopUtil.delete(conf, modelOutput);
if (!job.waitForCompletion(true)) {
throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
iterationNumber));
}
}
示例9: reduce
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
Context context) throws IOException, InterruptedException {
for (VectorWritable value : values) {
Vector point = value.get();
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
ClusterWritable clusterWritable = new ClusterWritable();
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
}
}
示例10: map
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
/**
* Mapper which classifies the vectors to respective clusters.
*/
@Override
protected void map(WritableComparable<?> key, VectorWritable vw, Context context)
throws IOException, InterruptedException {
if (!clusterModels.isEmpty()) {
Vector pdfPerCluster = clusterClassifier.classify(vw.get());
if (shouldClassify(pdfPerCluster)) {
if (emitMostLikely) {
int maxValueIndex = pdfPerCluster.maxValueIndex();
write(vw, context, maxValueIndex, 1.0);
} else {
writeAllAboveThreshold(vw, context, pdfPerCluster);
}
}
}
}
示例11: writeTopicModel
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
throws IOException, InterruptedException, ClassNotFoundException {
String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(CVB0TopicTermVectorNormalizerMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, modelInput);
FileOutputFormat.setOutputPath(job, output);
job.submit();
return job;
}
示例12: configure
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
public void configure(Configuration jobConf) {
if (parameters == null) {
ParameteredGeneralizations.configureParameters(this, jobConf);
}
try {
if (weightsFile.get() != null) {
FileSystem fs = FileSystem.get(weightsFile.get().toUri(), jobConf);
VectorWritable weights =
ClassUtils.instantiateAs((Class<? extends VectorWritable>) vectorClass.get(), VectorWritable.class);
if (!fs.exists(weightsFile.get())) {
throw new FileNotFoundException(weightsFile.get().toString());
}
DataInputStream in = fs.open(weightsFile.get());
try {
weights.readFields(in);
} finally {
Closeables.closeQuietly(in);
}
this.weights = weights.get();
}
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
示例13: reduce
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values, Context context)
throws IOException, InterruptedException {
int size = context.getConfiguration().getInt(EigencutsKeys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE);
RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100);
for (DistributedRowMatrix.MatrixEntryWritable element : values) {
out.setQuick(element.getCol(), element.getVal());
if (log.isDebugEnabled()) {
log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]",
new Object[] {row.get(), element.getCol(), element.getVal()});
}
}
SequentialAccessSparseVector output = new SequentialAccessSparseVector(out);
context.write(row, new VectorWritable(output));
}
示例14: reduce
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context) throws IOException,
InterruptedException {
Vector vector = null;
for (VectorWritable value : values) {
if (vector == null) {
vector = value.get().clone();
continue;
}
//value.get().addTo(vector);
vector.assign(value.get(), Functions.PLUS);
}
if (normPower != PartialVectorMerger.NO_NORMALIZING) {
if (logNormalize) {
vector = vector.logNormalize(normPower);
} else {
vector = vector.normalize(normPower);
}
}
VectorWritable vectorWritable = new VectorWritable(vector);
context.write(key, vectorWritable);
}
示例15: writeDocTopicInference
import org.apache.mahout.math.VectorWritable; //导入依赖的package包/类
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setMapperClass(CVB0DocInferenceMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
FileSystem fs = FileSystem.get(corpus.toUri(), conf);
if (modelInput != null && fs.exists(modelInput)) {
FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
URI[] modelUris = new URI[statuses.length];
for (int i = 0; i < statuses.length; i++) {
modelUris[i] = statuses[i].getPath().toUri();
}
DistributedCache.setCacheFiles(modelUris, conf);
}
FileInputFormat.addInputPath(job, corpus);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(CVB0Driver.class);
job.submit();
return job;
}