本文整理汇总了Java中org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm方法的典型用法代码示例。如果您正苦于以下问题:Java RegionSplitter.SplitAlgorithm方法的具体用法?Java RegionSplitter.SplitAlgorithm怎么用?Java RegionSplitter.SplitAlgorithm使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.hbase.util.RegionSplitter
的用法示例。
在下文中一共展示了RegionSplitter.SplitAlgorithm方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setInput
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
/**
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
* @param conf the job to configure
* @param snapshotName the name of the snapshot to read from
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restoreDir can be deleted.
* @param numSplitsPerRegion how many input splits to generate per one region
* @param splitAlgo SplitAlgorithm to be used when generating InputSplits
* @throws IOException if an error occurs
*/
public static void setInput(Configuration conf, String snapshotName, Path restoreDir,
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion)
throws IOException {
conf.set(SNAPSHOT_NAME_KEY, snapshotName);
if (numSplitsPerRegion < 1) {
throw new IllegalArgumentException("numSplits must be >= 1, " +
"illegal numSplits : " + numSplitsPerRegion);
}
if (splitAlgo == null && numSplitsPerRegion > 1) {
throw new IllegalArgumentException("Split algo can't be null when numSplits > 1");
}
if (splitAlgo != null) {
conf.set(SPLIT_ALGO, splitAlgo.getClass().getName());
}
conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion);
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
// TODO: restore from record readers to parallelize.
RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
conf.set(RESTORE_DIR_KEY, restoreDir.toString());
}
示例2: testSplit3_10
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
@Test
public void testSplit3_10() throws Exception {
int numRegions = 3;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[numRegions - 2]));
}
示例3: testSplit10_10
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
@Test
public void testSplit10_10() throws Exception {
int numRegions = 10;
int cardinality = 10;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 2;
assertEquals(String.format("%0" + digits + "d", 1), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 9), Bytes.toString(splits[numRegions - 2]));
}
示例4: testSplit300_1000
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
@Test
public void testSplit300_1000() throws Exception {
int numRegions = 300;
int cardinality = 1000;
RegionSplitter.SplitAlgorithm splitAlgorithm = new DecimalStringSplit(cardinality);
byte[][] splits = splitAlgorithm.split(numRegions);
assertEquals(numRegions - 1, splits.length);
int digits = 4;
assertEquals(String.format("%0" + digits + "d", 3), Bytes.toString(splits[0]));
assertEquals(String.format("%0" + digits + "d", 6), Bytes.toString(splits[1]));
assertEquals(String.format("%0" + digits + "d", 10), Bytes.toString(splits[2]));
assertEquals(String.format("%0" + digits + "d", 996), Bytes.toString(splits[numRegions - 2]));
}
示例5: createSplitter
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new RegionSplitter.HexStringSplit();
}
示例6: split
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
public List<byte[]> split(int numRegions, int cardinality) {
List<byte[]> splitPointList = new ArrayList<>();
RegionSplitter.SplitAlgorithm splitter = createSplitter(cardinality);
Collections.addAll(splitPointList, splitter.split(numRegions));
return splitPointList;
}
示例7: createSplitter
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
@Override
public RegionSplitter.SplitAlgorithm createSplitter(int cardinality) {
return new DecimalStringSplit(cardinality);
}
示例8: getSplits
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
public static List<InputSplit> getSplits(Configuration conf) throws IOException {
String snapshotName = getSnapshotName(conf);
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
// TODO: mapred does not support scan as input API. Work around for now.
Scan scan = extractScanFromConf(conf);
// the temp dir where the snapshot is restored
Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf);
int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1);
return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
}
示例9: initTableSnapshotMapJob
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
/**
* Sets up the job for reading from a table snapshot. It bypasses hbase servers
* and read directly from snapshot files.
*
* @param snapshotName The name of the snapshot (of a table) to read from.
* @param columns The columns to scan.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param jobConf The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
* @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restore directory can be deleted.
* @param splitAlgo algorithm to split
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException When setting up the details fails.
* @see TableSnapshotInputFormat
*/
public static void initTableSnapshotMapJob(String snapshotName, String columns,
Class<? extends TableMap> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass, JobConf jobConf,
boolean addDependencyJars, Path tmpRestoreDir,
RegionSplitter.SplitAlgorithm splitAlgo,
int numSplitsPerRegion)
throws IOException {
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
numSplitsPerRegion);
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,
addDependencyJars, TableSnapshotInputFormat.class);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(jobConf);
}
示例10: initTableSnapshotMapperJob
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
/**
* Sets up the job for reading from a table snapshot. It bypasses hbase servers
* and read directly from snapshot files.
*
* @param snapshotName The name of the snapshot (of a table) to read from.
* @param scan The scan instance with the columns, time range etc.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
*
* @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restore directory can be deleted.
* @param splitAlgo algorithm to split
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException When setting up the details fails.
* @see TableSnapshotInputFormat
*/
public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass, Job job,
boolean addDependencyJars, Path tmpRestoreDir,
RegionSplitter.SplitAlgorithm splitAlgo,
int numSplitsPerRegion)
throws IOException {
TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo,
numSplitsPerRegion);
initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
resetCacheConfig(job.getConfiguration());
}
示例11: setInput
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
/**
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
* @param job the job to configure
* @param snapshotName the name of the snapshot to read from
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restoreDir can be deleted.
* @param splitAlgo split algorithm to generate splits from region
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException if an error occurs
*/
public static void setInput(JobConf job, String snapshotName, Path restoreDir,
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion);
}
示例12: setInput
import org.apache.hadoop.hbase.util.RegionSplitter; //导入方法依赖的package包/类
/**
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
* @param job the job to configure
* @param snapshotName the name of the snapshot to read from
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
* After the job is finished, restoreDir can be deleted.
* @param splitAlgo split algorithm to generate splits from region
* @param numSplitsPerRegion how many input splits to generate per one region
* @throws IOException if an error occurs
*/
public static void setInput(Job job, String snapshotName, Path restoreDir,
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir,
splitAlgo, numSplitsPerRegion);
}