本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat.getReaders方法的典型用法代码示例。如果您正苦于以下问题:Java MapFileOutputFormat.getReaders方法的具体用法?Java MapFileOutputFormat.getReaders怎么用?Java MapFileOutputFormat.getReaders使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat
的用法示例。
在下文中一共展示了MapFileOutputFormat.getReaders方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getNumPages
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
private int getNumPages(Configuration conf, Path titlesDir)
throws Exception {
int numPages = 0;
IntWritable pageNumber = new IntWritable();
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
for (int i = 0; i < readers.length; i++) {
readers[i].finalKey(pageNumber);
if (pageNumber.get() > numPages) {
numPages = pageNumber.get();
}
}
for (MapFile.Reader reader : readers) {
reader.close();
}
return numPages;
}
示例2: run
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
return 0;
}
示例3: cleanup
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
Path titlesDir = new Path(conf.get("pagerank.titles_dir"));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
IntWritable page = new IntWritable();
Text title = new Text();
float[] pageRanks = new float[topN.size()];
String[] titles = new String[topN.size()];
// The order of the entries is reversed. The priority queue is in
// non-decreasing order and we want the highest PageRank first.
for (int i = pageRanks.length - 1; i >= 0; i--) {
Map.Entry<Float, Integer> entry = topN.poll();
// Get the title of the page from the title index.
page.set(entry.getValue());
MapFileOutputFormat.getEntry(readers, partitioner, page, title);
pageRanks[i] = entry.getKey();
titles[i] = title.toString();
}
for (MapFile.Reader reader : readers) {
reader.close();
}
for (int i = 0; i < pageRanks.length; i++) {
context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
}
}
示例4: cleanup
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
Path titlesDir = new Path(conf.get("inlinks.titles_dir"));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
IntWritable page = new IntWritable();
Text title = new Text();
int[] inLinks = new int[topN.size()];
String[] titles = new String[topN.size()];
for (int i = inLinks.length - 1; i >= 0; i--) {
Map.Entry<Integer, Integer> entry = topN.poll();
page.set(entry.getValue());
MapFileOutputFormat.getEntry(readers, partitioner, page, title);
inLinks[i] = entry.getKey();
titles[i] = title.toString();
}
for (MapFile.Reader reader : readers) {
reader.close();
}
for (int i = 0; i < inLinks.length; i++) {
context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
}
}
示例5: run
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Reader reader = readers[partitioner.getPartition(key, val,
readers.length)];
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(),
parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}
示例6: map
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue,
Context context) throws IOException, InterruptedException {
// This task gets each block M_{i,j}, loads the corresponding stripe j
// of the vector v_{k-1} and produces the partial result of the stripe i
// of the vector v_k.
Configuration conf = context.getConfiguration();
int iter = Integer.parseInt(conf.get("pagerank.iteration"));
int numPages = Integer.parseInt(conf.get("pagerank.num_pages"));
short blockSize = Short.parseShort(conf.get("pagerank.block_size"));
Writable[] blockIndexes = inKey.get();
short i = ((ShortWritable) blockIndexes[0]).get();
short j = ((ShortWritable) blockIndexes[1]).get();
int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize;
FloatWritable[] vj = new FloatWritable[vjSize];
if (iter == 1) {
// Initial PageRank vector with 1/n for all pages.
for (int k = 0; k < vj.length; k++) {
vj[k] = new FloatWritable(1.0f / numPages);
}
} else {
// Load the stripe j of the vector v_{k-1} from the MapFiles.
Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent();
Path vjDir = new Path(outputDir, "v" + (iter - 1));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf);
Partitioner<ShortWritable, FloatArrayWritable> partitioner =
new HashPartitioner<ShortWritable, FloatArrayWritable>();
ShortWritable key = new ShortWritable(j);
FloatArrayWritable value = new FloatArrayWritable();
MapFileOutputFormat.getEntry(readers, partitioner, key, value);
Writable[] writables = value.get();
for (int k = 0; k < vj.length; k++) {
vj[k] = (FloatWritable) writables[k];
}
for (MapFile.Reader reader : readers) {
reader.close();
}
}
// Initialize the partial result i of the vector v_k.
int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize;
FloatWritable[] vi = new FloatWritable[viSize];
for (int k = 0; k < vi.length; k++) {
vi[k] = new FloatWritable(0);
}
// Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the
// partial result i of the vector v_k.
Writable[][] blockColumns = inValue.get();
for (int k = 0; k < blockColumns.length; k++) {
Writable[] blockColumn = blockColumns[k];
if (blockColumn.length > 0) {
int vDegree = ((ShortWritable) blockColumn[0]).get();
for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) {
int l = ((ShortWritable) blockColumn[columnIndex]).get();
vi[l].set(vi[l].get() + (1.0f / vDegree) * vj[k].get());
}
}
}
context.write(new ShortWritable(i), new FloatArrayWritable(vi));
}