当前位置: 首页>>代码示例>>Java>>正文


Java MapFileOutputFormat.getReaders方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat.getReaders方法的典型用法代码示例。如果您正苦于以下问题:Java MapFileOutputFormat.getReaders方法的具体用法?Java MapFileOutputFormat.getReaders怎么用?Java MapFileOutputFormat.getReaders使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat的用法示例。


在下文中一共展示了MapFileOutputFormat.getReaders方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getNumPages

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
private int getNumPages(Configuration conf, Path titlesDir)
		throws Exception {

	int numPages = 0;

	IntWritable pageNumber = new IntWritable();
	MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
	for (int i = 0; i < readers.length; i++) {
		readers[i].finalKey(pageNumber);
		if (pageNumber.get() > numPages) {
			numPages = pageNumber.get();
		}
	}
	for (MapFile.Reader reader : readers) {
		reader.close();
	}

	return numPages;
}
 
开发者ID:yasserglez,项目名称:pagerank-hadoop,代码行数:20,代码来源:PageRank.java

示例2: run

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		JobBuilder.printUsage(this, "<path> <key>");
		return -1;
	}
	Path path = new Path(args[0]);
	IntWritable key = new IntWritable(Integer.parseInt(args[1]));

	Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	Text val = new Text();
	Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
			key, val);
	if (entry == null) {
		System.err.println("Key not found: " + key);
		return -1;
	}
	NcdcRecordParser parser = new NcdcRecordParser();
	parser.parse(val.toString());
	System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
	return 0;
}
 
开发者ID:lhfei,项目名称:hadoop-in-action,代码行数:24,代码来源:LookupRecordByTemperature.java

示例3: cleanup

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
		InterruptedException {

	Configuration conf = context.getConfiguration();
	Path titlesDir = new Path(conf.get("pagerank.titles_dir"));

	MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	IntWritable page = new IntWritable();
	Text title = new Text();

	float[] pageRanks = new float[topN.size()];
	String[] titles = new String[topN.size()];

	// The order of the entries is reversed. The priority queue is in
	// non-decreasing order and we want the highest PageRank first.
	for (int i = pageRanks.length - 1; i >= 0; i--) {
		Map.Entry<Float, Integer> entry = topN.poll();
		// Get the title of the page from the title index.
		page.set(entry.getValue());
		MapFileOutputFormat.getEntry(readers, partitioner, page, title);
		pageRanks[i] = entry.getKey();
		titles[i] = title.toString();
	}

	for (MapFile.Reader reader : readers) {
		reader.close();
	}

	for (int i = 0; i < pageRanks.length; i++) {
		context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
	}
}
 
开发者ID:yasserglez,项目名称:pagerank-hadoop,代码行数:35,代码来源:PageRankTopNReducer.java

示例4: cleanup

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
		InterruptedException {

	Configuration conf = context.getConfiguration();
	Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

	MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	IntWritable page = new IntWritable();
	Text title = new Text();

	int[] inLinks = new int[topN.size()];
	String[] titles = new String[topN.size()];

	for (int i = inLinks.length - 1; i >= 0; i--) {
		Map.Entry<Integer, Integer> entry = topN.poll();
		page.set(entry.getValue());
		MapFileOutputFormat.getEntry(readers, partitioner, page, title);
		inLinks[i] = entry.getKey();
		titles[i] = title.toString();
	}

	for (MapFile.Reader reader : readers) {
		reader.close();
	}

	for (int i = 0; i < inLinks.length; i++) {
		context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
	}
}
 
开发者ID:yasserglez,项目名称:pagerank-hadoop,代码行数:32,代码来源:InLinksTopNReducer.java

示例5: run

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		JobBuilder.printUsage(this, "<path> <key>");
		return -1;
	}
	Path path = new Path(args[0]);
	IntWritable key = new IntWritable(Integer.parseInt(args[1]));

	Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	Text val = new Text();

	Reader reader = readers[partitioner.getPartition(key, val,
			readers.length)];
	Writable entry = reader.get(key, val);
	if (entry == null) {
		System.err.println("Key not found: " + key);
		return -1;
	}
	NcdcRecordParser parser = new NcdcRecordParser();
	IntWritable nextKey = new IntWritable();
	do {
		parser.parse(val.toString());
		System.out.printf("%s\t%s\n", parser.getStationId(),
				parser.getYear());
	} while (reader.next(nextKey, val) && key.equals(nextKey));
	return 0;
}
 
开发者ID:lhfei,项目名称:hadoop-in-action,代码行数:30,代码来源:LookupRecordsByTemperature.java

示例6: map

import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; //导入方法依赖的package包/类
@Override
public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue,
		Context context) throws IOException, InterruptedException {

	// This task gets each block M_{i,j}, loads the corresponding stripe j
	// of the vector v_{k-1} and produces the partial result of the stripe i
	// of the vector v_k.

	Configuration conf = context.getConfiguration();
	int iter = Integer.parseInt(conf.get("pagerank.iteration"));
	int numPages = Integer.parseInt(conf.get("pagerank.num_pages"));
	short blockSize = Short.parseShort(conf.get("pagerank.block_size"));

	Writable[] blockIndexes = inKey.get();
	short i = ((ShortWritable) blockIndexes[0]).get();
	short j = ((ShortWritable) blockIndexes[1]).get();

	int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize;
	FloatWritable[] vj = new FloatWritable[vjSize];

	if (iter == 1) {
		// Initial PageRank vector with 1/n for all pages.
		for (int k = 0; k < vj.length; k++) {
			vj[k] = new FloatWritable(1.0f / numPages);
		}
	} else {
		// Load the stripe j of the vector v_{k-1} from the MapFiles.
		Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent();
		Path vjDir = new Path(outputDir, "v" + (iter - 1));
		MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf);
		Partitioner<ShortWritable, FloatArrayWritable> partitioner =
				new HashPartitioner<ShortWritable, FloatArrayWritable>();
		ShortWritable key = new ShortWritable(j);
		FloatArrayWritable value = new FloatArrayWritable();
		MapFileOutputFormat.getEntry(readers, partitioner, key, value);
		Writable[] writables = value.get();
		for (int k = 0; k < vj.length; k++) {
			vj[k] = (FloatWritable) writables[k];
		}
		for (MapFile.Reader reader : readers) {
			reader.close();
		}
	}

	// Initialize the partial result i of the vector v_k.
	int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize;
	FloatWritable[] vi = new FloatWritable[viSize];
	for (int k = 0; k < vi.length; k++) {
		vi[k] = new FloatWritable(0);
	}

	// Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the
	// partial result i of the vector v_k.
	Writable[][] blockColumns = inValue.get();
	for (int k = 0; k < blockColumns.length; k++) {
		Writable[] blockColumn = blockColumns[k];
		if (blockColumn.length > 0) {
			int vDegree = ((ShortWritable) blockColumn[0]).get();
			for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) {
				int l = ((ShortWritable) blockColumn[columnIndex]).get();
				vi[l].set(vi[l].get() +  (1.0f / vDegree) * vj[k].get());
			}
		}
	}

	context.write(new ShortWritable(i), new FloatArrayWritable(vi));
}
 
开发者ID:yasserglez,项目名称:pagerank-hadoop,代码行数:68,代码来源:PageRankIterationMapper.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat.getReaders方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。