本文整理汇总了Java中org.apache.mahout.common.iterator.sequencefile.PathType.GLOB属性的典型用法代码示例。如果您正苦于以下问题:Java PathType.GLOB属性的具体用法?Java PathType.GLOB怎么用?Java PathType.GLOB使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.mahout.common.iterator.sequencefile.PathType
的用法示例。
在下文中一共展示了PathType.GLOB属性的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: process
/**
* This method takes the clustered points output by the clustering algorithms as input and writes them into
* their respective clusters.
*/
public void process() throws IOException {
createPostProcessDirectory();
for (Pair<?,WeightedVectorWritable> record :
new SequenceFileDirIterable<Writable,WeightedVectorWritable>(clusteredPoints,
PathType.GLOB,
PathFilters.partFilter(),
null,
false,
conf)) {
String clusterId = record.getFirst().toString().trim();
putVectorInRespectiveCluster(clusterId, record.getSecond());
}
IOUtils.close(writersForClusters.values());
writersForClusters.clear();
}
示例2: getFileStatus
public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException {
FileStatus[] statuses;
FileSystem fs = path.getFileSystem(conf);
if (filter == null) {
statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path);
} else {
statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter);
}
if (ordering != null) {
Arrays.sort(statuses, ordering);
}
return statuses;
}
示例3: createDictionaryChunks
/**
* Read the feature frequency List which is built at the end of the Word Count Job and assign ids to them.
* This will use constant memory and will run at the speed of your disk read
*/
private static List<Path> createDictionaryChunks(Path wordCountPath,
Path dictionaryPathBase,
Configuration baseConf,
int chunkSizeInMegabytes,
int[] maxTermDimension) throws IOException {
List<Path> chunkPaths = Lists.newArrayList();
Configuration conf = new Configuration(baseConf);
FileSystem fs = FileSystem.get(wordCountPath.toUri(), conf);
long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L;
int chunkIndex = 0;
Path chunkPath = new Path(dictionaryPathBase, DICTIONARY_FILE + chunkIndex);
chunkPaths.add(chunkPath);
SequenceFile.Writer dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);
try {
long currentChunkSize = 0;
Path filesPattern = new Path(wordCountPath, OUTPUT_FILES_PATTERN);
int i = 0;
for (Pair<Writable,Writable> record
: new SequenceFileDirIterable<Writable,Writable>(filesPattern, PathType.GLOB, null, null, true, conf)) {
if (currentChunkSize > chunkSizeLimit) {
Closeables.closeQuietly(dictWriter);
chunkIndex++;
chunkPath = new Path(dictionaryPathBase, DICTIONARY_FILE + chunkIndex);
chunkPaths.add(chunkPath);
dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);
currentChunkSize = 0;
}
Writable key = record.getFirst();
int fieldSize = DICTIONARY_BYTE_OVERHEAD + key.toString().length() * 2 + Integer.SIZE / 8;
currentChunkSize += fieldSize;
dictWriter.append(key, new IntWritable(i++));
}
maxTermDimension[0] = i;
} finally {
Closeables.closeQuietly(dictWriter);
}
return chunkPaths;
}
示例4: createDictionaryChunks
/**
* Read the document frequency List which is built at the end of the DF Count Job. This will use constant
* memory and will run at the speed of your disk read
*/
private static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath,
Path dictionaryPathBase,
Configuration baseConf,
int chunkSizeInMegabytes) throws IOException {
List<Path> chunkPaths = Lists.newArrayList();
Configuration conf = new Configuration(baseConf);
FileSystem fs = FileSystem.get(featureCountPath.toUri(), conf);
long chunkSizeLimit = chunkSizeInMegabytes * 1024L * 1024L;
int chunkIndex = 0;
Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
chunkPaths.add(chunkPath);
SequenceFile.Writer freqWriter =
new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
try {
long currentChunkSize = 0;
long featureCount = 0;
long vectorCount = Long.MAX_VALUE;
Path filesPattern = new Path(featureCountPath, OUTPUT_FILES_PATTERN);
for (Pair<IntWritable,LongWritable> record
: new SequenceFileDirIterable<IntWritable,LongWritable>(filesPattern,
PathType.GLOB,
null,
null,
true,
conf)) {
if (currentChunkSize > chunkSizeLimit) {
Closeables.closeQuietly(freqWriter);
chunkIndex++;
chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
chunkPaths.add(chunkPath);
freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
currentChunkSize = 0;
}
int fieldSize = SEQUENCEFILE_BYTE_OVERHEAD + Integer.SIZE / 8 + Long.SIZE / 8;
currentChunkSize += fieldSize;
IntWritable key = record.getFirst();
LongWritable value = record.getSecond();
if (key.get() >= 0) {
freqWriter.append(key, value);
} else if (key.get() == -1) {
vectorCount = value.get();
}
featureCount = Math.max(key.get(), featureCount);
}
featureCount++;
Long[] counts = {featureCount, vectorCount};
return new Pair<Long[], List<Path>>(counts, chunkPaths);
} finally {
Closeables.closeQuietly(freqWriter);
}
}