Java OneFileInfo类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo类的典型用法代码示例。如果您正苦于以下问题：Java OneFileInfo类的具体用法？Java OneFileInfo怎么用？Java OneFileInfo使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

OneFileInfo类属于org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat包，在下文中一共展示了OneFileInfo类的4个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testNodeDistribution

import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
@Test
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  assertTrue(numLocalSplits >= 0.9 * splits.size());
}

开发者ID:naver，项目名称:hadoop，代码行数:64，代码来源:TestCombineFileInputFormat.java

示例2: testNodeInputSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
@Test
public void testNodeInputSplit() throws IOException, InterruptedException {
  // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on 
  // both nodes. The grouping ensures that both nodes get splits instead of 
  // just the first node
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 12;
  long totLength = 0;
  long blockSize = 100;
  long maxSize = 200;
  long minSizeNode = 50;
  long minSizeRack = 50;
  String[] locations = { "h1", "h2" };
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");
  
  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
  for(int i=0; i<numBlocks; ++i) {
    blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks);
    totLength += blockSize;
  }
  
  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = 
                            new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = 
                            new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = 
                            new HashMap<OneBlockInfo, String[]>();
  HashMap<String, Set<OneBlockInfo>> nodeToBlocks = 
                            new HashMap<String, Set<OneBlockInfo>>();
  
  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, 
                           nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,  
                        maxSize, minSizeNode, minSizeRack, splits);
  
  int expectedSplitCount = (int)(totLength/maxSize);
  assertEquals(expectedSplitCount, splits.size());
  HashMultiset<String> nodeSplits = HashMultiset.create();
  for(int i=0; i<expectedSplitCount; ++i) {
    InputSplit inSplit = splits.get(i);
    assertEquals(maxSize, inSplit.getLength());
    assertEquals(1, inSplit.getLocations().length);
    nodeSplits.add(inSplit.getLocations()[0]);
  }
  assertEquals(3, nodeSplits.count(locations[0]));
  assertEquals(3, nodeSplits.count(locations[1]));
}

开发者ID:naver，项目名称:hadoop，代码行数:51，代码来源:TestCombineFileInputFormat.java

示例3: testNodeDistribution

import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
public void testNodeDistribution() throws IOException, InterruptedException {
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 60;
  long totLength = 0;
  long blockSize = 100;
  int numNodes = 10;

  long minSizeNode = 50;
  long minSizeRack = 50;
  int maxSplitSize = 200; // 4 blocks per split.

  String[] locations = new String[numNodes];
  for (int i = 0; i < numNodes; i++) {
    locations[i] = "h" + i;
  }
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");

  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];

  int hostCountBase = 0;
  // Generate block list. Replication 3 per block.
  for (int i = 0; i < numBlocks; i++) {
    int localHostCount = hostCountBase;
    String[] blockHosts = new String[3];
    for (int j = 0; j < 3; j++) {
      int hostNum = localHostCount % numNodes;
      blockHosts[j] = "h" + hostNum;
      localHostCount++;
    }
    hostCountBase++;
    blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts,
        racks);
    totLength += blockSize;
  }

  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
  Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();

  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes,
      nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,
      maxSplitSize, minSizeNode, minSizeRack, splits);

  int expectedSplitCount = (int) (totLength / maxSplitSize);
  Assert.assertEquals(expectedSplitCount, splits.size());

  // Ensure 90+% of the splits have node local blocks.
  // 100% locality may not always be achieved.
  int numLocalSplits = 0;
  for (InputSplit inputSplit : splits) {
    Assert.assertEquals(maxSplitSize, inputSplit.getLength());
    if (inputSplit.getLocations().length == 1) {
      numLocalSplits++;
    }
  }
  Assert.assertTrue(numLocalSplits >= 0.9 * splits.size());
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:63，代码来源:TestCombineFileInputFormat.java

示例4: testNodeInputSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo; //导入依赖的package包/类
public void testNodeInputSplit() throws IOException, InterruptedException {
  // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on 
  // both nodes. The grouping ensures that both nodes get splits instead of 
  // just the first node
  DummyInputFormat inFormat = new DummyInputFormat();
  int numBlocks = 12;
  long totLength = 0;
  long blockSize = 100;
  long maxSize = 200;
  long minSizeNode = 50;
  long minSizeRack = 50;
  String[] locations = { "h1", "h2" };
  String[] racks = new String[0];
  Path path = new Path("hdfs://file");
  
  OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
  for(int i=0; i<numBlocks; ++i) {
    blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks);
    totLength += blockSize;
  }
  
  List<InputSplit> splits = new ArrayList<InputSplit>();
  HashMap<String, Set<String>> rackToNodes = 
                            new HashMap<String, Set<String>>();
  HashMap<String, List<OneBlockInfo>> rackToBlocks = 
                            new HashMap<String, List<OneBlockInfo>>();
  HashMap<OneBlockInfo, String[]> blockToNodes = 
                            new HashMap<OneBlockInfo, String[]>();
  HashMap<String, Set<OneBlockInfo>> nodeToBlocks = 
                            new HashMap<String, Set<OneBlockInfo>>();
  
  OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, 
                           nodeToBlocks, rackToNodes);
  
  inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,  
                        maxSize, minSizeNode, minSizeRack, splits);
  
  int expectedSplitCount = (int)(totLength/maxSize);
  Assert.assertEquals(expectedSplitCount, splits.size());
  HashMultiset<String> nodeSplits = HashMultiset.create();
  for(int i=0; i<expectedSplitCount; ++i) {
    InputSplit inSplit = splits.get(i);
    Assert.assertEquals(maxSize, inSplit.getLength());
    Assert.assertEquals(1, inSplit.getLocations().length);
    nodeSplits.add(inSplit.getLocations()[0]);
  }
  Assert.assertEquals(3, nodeSplits.count(locations[0]));
  Assert.assertEquals(3, nodeSplits.count(locations[1]));
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:50，代码来源:TestCombineFileInputFormat.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。