本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo类的典型用法代码示例。如果您正苦于以下问题:Java OneFileInfo类的具体用法?Java OneFileInfo怎么用?Java OneFileInfo使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OneFileInfo类属于org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat包,在下文中一共展示了OneFileInfo类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testNodeDistribution
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
@Test
public void testNodeDistribution() throws IOException, InterruptedException {
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 60;
long totLength = 0;
long blockSize = 100;
int numNodes = 10;
long minSizeNode = 50;
long minSizeRack = 50;
int maxSplitSize = 200; // 4 blocks per split.
String[] locations = new String[numNodes];
for (int i = 0; i < numNodes; i++) {
locations[i] = "h" + i;
}
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
int hostCountBase = 0;
// Generate block list. Replication 3 per block.
for (int i = 0; i < numBlocks; i++) {
int localHostCount = hostCountBase;
String[] blockHosts = new String[3];
for (int j = 0; j < 3; j++) {
int hostNum = localHostCount % numNodes;
blockHosts[j] = "h" + hostNum;
localHostCount++;
}
hostCountBase++;
blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
maxSplitSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int) (totLength / maxSplitSize);
assertEquals(expectedSplitCount, splits.size());
// Ensure 90+% of the splits have node local blocks.
// 100% locality may not always be achieved.
int numLocalSplits = 0;
for (InputSplit inputSplit : splits) {
assertEquals(maxSplitSize, inputSplit.getLength());
if (inputSplit.getLocations().length == 1) {
numLocalSplits++;
}
}
assertTrue(numLocalSplits >= 0.9 * splits.size());
}
示例2: testNodeInputSplit
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
@Test
public void testNodeInputSplit() throws IOException, InterruptedException {
// Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on
// both nodes. The grouping ensures that both nodes get splits instead of
// just the first node
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 12;
long totLength = 0;
long blockSize = 100;
long maxSize = 200;
long minSizeNode = 50;
long minSizeRack = 50;
String[] locations = { "h1", "h2" };
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
for(int i=0; i<numBlocks; ++i) {
blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes =
new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks =
new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes =
new HashMap<OneBlockInfo, String[]>();
HashMap<String, Set<OneBlockInfo>> nodeToBlocks =
new HashMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
maxSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int)(totLength/maxSize);
assertEquals(expectedSplitCount, splits.size());
HashMultiset<String> nodeSplits = HashMultiset.create();
for(int i=0; i<expectedSplitCount; ++i) {
InputSplit inSplit = splits.get(i);
assertEquals(maxSize, inSplit.getLength());
assertEquals(1, inSplit.getLocations().length);
nodeSplits.add(inSplit.getLocations()[0]);
}
assertEquals(3, nodeSplits.count(locations[0]));
assertEquals(3, nodeSplits.count(locations[1]));
}
示例3: testNodeDistribution
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
public void testNodeDistribution() throws IOException, InterruptedException {
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 60;
long totLength = 0;
long blockSize = 100;
int numNodes = 10;
long minSizeNode = 50;
long minSizeRack = 50;
int maxSplitSize = 200; // 4 blocks per split.
String[] locations = new String[numNodes];
for (int i = 0; i < numNodes; i++) {
locations[i] = "h" + i;
}
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
int hostCountBase = 0;
// Generate block list. Replication 3 per block.
for (int i = 0; i < numBlocks; i++) {
int localHostCount = hostCountBase;
String[] blockHosts = new String[3];
for (int j = 0; j < 3; j++) {
int hostNum = localHostCount % numNodes;
blockHosts[j] = "h" + hostNum;
localHostCount++;
}
hostCountBase++;
blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
maxSplitSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int) (totLength / maxSplitSize);
Assert.assertEquals(expectedSplitCount, splits.size());
// Ensure 90+% of the splits have node local blocks.
// 100% locality may not always be achieved.
int numLocalSplits = 0;
for (InputSplit inputSplit : splits) {
Assert.assertEquals(maxSplitSize, inputSplit.getLength());
if (inputSplit.getLocations().length == 1) {
numLocalSplits++;
}
}
Assert.assertTrue(numLocalSplits >= 0.9 * splits.size());
}
示例4: testNodeInputSplit
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
public void testNodeInputSplit() throws IOException, InterruptedException {
// Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on
// both nodes. The grouping ensures that both nodes get splits instead of
// just the first node
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 12;
long totLength = 0;
long blockSize = 100;
long maxSize = 200;
long minSizeNode = 50;
long minSizeRack = 50;
String[] locations = { "h1", "h2" };
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
for(int i=0; i<numBlocks; ++i) {
blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes =
new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks =
new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes =
new HashMap<OneBlockInfo, String[]>();
HashMap<String, Set<OneBlockInfo>> nodeToBlocks =
new HashMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
maxSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int)(totLength/maxSize);
Assert.assertEquals(expectedSplitCount, splits.size());
HashMultiset<String> nodeSplits = HashMultiset.create();
for(int i=0; i<expectedSplitCount; ++i) {
InputSplit inSplit = splits.get(i);
Assert.assertEquals(maxSize, inSplit.getLength());
Assert.assertEquals(1, inSplit.getLocations().length);
nodeSplits.add(inSplit.getLocations()[0]);
}
Assert.assertEquals(3, nodeSplits.count(locations[0]));
Assert.assertEquals(3, nodeSplits.count(locations[1]));
}