本文整理汇总了Java中org.apache.hadoop.mapreduce.Partitioner.getPartition方法的典型用法代码示例。如果您正苦于以下问题:Java Partitioner.getPartition方法的具体用法?Java Partitioner.getPartition怎么用?Java Partitioner.getPartition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Partitioner
的用法示例。
在下文中一共展示了Partitioner.getPartition方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: assertData
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
private void assertData(int totalShardCount) throws IOException {
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
for (int i = 0; i < totalShardCount; i++) {
HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
DirectoryReader reader = DirectoryReader.open(directory);
int numDocs = reader.numDocs();
for (int d = 0; d < numDocs; d++) {
Document document = reader.document(d);
IndexableField field = document.getField("id");
Integer id = (Integer) field.numericValue();
int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
assertEquals(i, partition);
}
reader.close();
}
}
示例2: createShard
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
throws IOException {
HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
assertEquals(i, partition);
Document doc = getDoc(i);
indexWriter.addDocument(doc);
indexWriter.close();
}
示例3: getEntry
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
Partitioner<K, V> partitioner, K key, V value) throws IOException {
int part = partitioner.getPartition(key, value, readers.length);
return readers[part].get(key, value);
}
示例4: getEntry
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
Partitioner<K, V> partitioner, K key, V value) throws IOException {
int readerLength = readers.length;
int part;
if (readerLength <= 1) {
part = 0;
} else {
part = partitioner.getPartition(key, value, readers.length);
}
return readers[part].get(key, value);
}
示例5: validate
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
/**
* Validates the first non-empty partition hfile has right partitioning function.
* It reads several keys, then calculates the partition according to the partitioning function
* client offering. If the calculated partition number is different with actual partition number
* an exception is thrown. If all partition hfiles are empty, an exception is thrown.
*
* @param parts full absolute path for all partitions
* @param partitionerType type of paritioning function
* @param numShards total number of partitions
* @throws IOException if something goes wrong when reading the hfiles
* @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
*/
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards)
throws IOException {
boolean hasNonEmptyPartition = false;
HColumnDescriptor columnDescriptor = new HColumnDescriptor();
// Disable block cache to ensure it reads the actual file content.
columnDescriptor.setBlockCacheEnabled(false);
for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
Path fileToBeValidated = parts.get(shardIndex);
HFile.Reader reader = null;
try {
FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
CacheConfig cc = new CacheConfig(conf, columnDescriptor);
reader = HFile.createReader(fs, fileToBeValidated, cc);
Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
byte[] rowKey = reader.getFirstRowKey();
if (rowKey == null) {
LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
reader.close();
continue;
}
hasNonEmptyPartition = true;
BytesWritable key = new BytesWritable(rowKey);
int partition = partitioner.getPartition(key, null, numShards);
if (partition != shardIndex) {
throw new IllegalArgumentException(
String.format("wrong partition type %s for key %s in partition %d, expected %d",
partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)
);
}
} finally {
if (reader != null) {
reader.close();
}
}
}
if (!hasNonEmptyPartition) {
throw new IllegalArgumentException("all partitions are empty");
}
}
示例6: generateHFiles
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
/**
* Generate hfiles for testing purpose
*
* @param sourceFileSystem source file system
* @param conf configuration for hfile
* @param outputFolder output folder for generated hfiles
* @param partitionerType partitioner type
* @param numOfPartitions number of partitions
* @param numOfKeys number of keys
* @return list of generated hfiles
* @throws IOException if hfile creation goes wrong
*/
public static List<Path> generateHFiles(FileSystem sourceFileSystem, Configuration conf,
File outputFolder, PartitionerType partitionerType,
int numOfPartitions, int numOfKeys)
throws IOException {
StoreFile.Writer[] writers = new StoreFile.Writer[numOfPartitions];
for (int i = 0; i < numOfPartitions; i++) {
writers[i] = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), sourceFileSystem, 4096)
.withFilePath(new Path(String.format("%s/%s", outputFolder.getAbsoluteFile(),
TerrapinUtil.formatPartitionName(i))))
.withCompression(Compression.Algorithm.NONE)
.build();
}
Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
for (int i = 0; i < numOfKeys; i++) {
byte[] key = String.format("%06d", i).getBytes();
byte[] value;
if (i <= 1) {
value = "".getBytes();
} else {
value = ("v" + (i + 1)).getBytes();
}
KeyValue kv = new KeyValue(key, Bytes.toBytes("cf"), Bytes.toBytes(""), value);
int partition = partitioner.getPartition(new BytesWritable(key), new BytesWritable(value),
numOfPartitions);
writers[partition].append(kv);
}
for (int i = 0; i < numOfPartitions; i++) {
writers[i].close();
}
return Lists.transform(Lists.newArrayList(writers), new Function<StoreFile.Writer, Path>() {
@Override
public Path apply(StoreFile.Writer writer) {
return writer.getPath();
}
});
}
示例7: run
import org.apache.hadoop.mapreduce.Partitioner; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Reader reader = readers[partitioner.getPartition(key, val,
readers.length)];
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(),
parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}