本文整理汇总了Java中org.apache.hadoop.mapreduce.JobSubmissionFiles.getJobSplitMetaFile方法的典型用法代码示例。如果您正苦于以下问题:Java JobSubmissionFiles.getJobSplitMetaFile方法的具体用法?Java JobSubmissionFiles.getJobSplitMetaFile怎么用?Java JobSubmissionFiles.getJobSplitMetaFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.JobSubmissionFiles
的用法示例。
在下文中一共展示了JobSubmissionFiles.getJobSplitMetaFile方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeNewSplits
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
/**
* Generate new-api mapreduce InputFormat splits
* @param jobContext JobContext required by InputFormat
* @param inputSplitDir Directory in which to generate splits information
*
* @return InputSplitInfo containing the split files' information and the
* location hints for each split generated to be used to determining parallelism of
* the map stage.
*
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
Path inputSplitDir) throws IOException, InterruptedException,
ClassNotFoundException {
org.apache.hadoop.mapreduce.InputSplit[] splits =
generateNewSplits(jobContext, null, 0);
Configuration conf = jobContext.getConfiguration();
JobSplitWriter.createSplitFiles(inputSplitDir, conf,
inputSplitDir.getFileSystem(conf), splits);
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splits.length);
for (int i = 0; i < splits.length; ++i) {
locationHints.add(
new TaskLocationHint(new HashSet<String>(
Arrays.asList(splits[i].getLocations())), null));
}
return new InputSplitInfoDisk(
JobSubmissionFiles.getJobSplitFile(inputSplitDir),
JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
splits.length, locationHints, jobContext.getCredentials());
}
示例2: writeOldSplits
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
/**
* Generate old-api mapred InputFormat splits
* @param jobConf JobConf required by InputFormat class
* @param inputSplitDir Directory in which to generate splits information
*
* @return InputSplitInfo containing the split files' information and the
* number of splits generated to be used to determining parallelism of
* the map stage.
*
* @throws IOException
*/
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
Path inputSplitDir) throws IOException {
org.apache.hadoop.mapred.InputSplit[] splits =
generateOldSplits(jobConf, null, 0);
JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
inputSplitDir.getFileSystem(jobConf), splits);
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splits.length);
for (int i = 0; i < splits.length; ++i) {
locationHints.add(
new TaskLocationHint(new HashSet<String>(
Arrays.asList(splits[i].getLocations())), null));
}
return new InputSplitInfoDisk(
JobSubmissionFiles.getJobSplitFile(inputSplitDir),
JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
splits.length, locationHints, jobConf.getCredentials());
}
示例3: writeOldSplits
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
/**
* Generate old-api mapred InputFormat splits
* @param jobConf JobConf required by InputFormat class
* @param inputSplitDir Directory in which to generate splits information
*
* @return InputSplitInfo containing the split files' information and the
* number of splits generated to be used to determining parallelism of
* the map stage.
*
* @throws IOException
*/
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
Path inputSplitDir) throws IOException {
org.apache.hadoop.mapred.InputSplit[] splits =
generateOldSplits(jobConf, false, true, 0);
JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
inputSplitDir.getFileSystem(jobConf), splits);
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splits.length);
for (int i = 0; i < splits.length; ++i) {
locationHints.add(
TaskLocationHint.createTaskLocationHint(new HashSet<String>(
Arrays.asList(splits[i].getLocations())), null)
);
}
return new InputSplitInfoDisk(
JobSubmissionFiles.getJobSplitFile(inputSplitDir),
JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
splits.length, locationHints, jobConf.getCredentials());
}
示例4: readSplitMetaInfo
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir)
throws IOException {
long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
FileStatus fStatus = fs.getFileStatus(metaSplitFile);
if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
throw new IOException("Split metadata size exceeded " +
maxMetaInfoSize +". Aborting job " + jobId);
}
FSDataInputStream in = fs.open(metaSplitFile);
byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
in.readFully(header);
if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
throw new IOException("Invalid header on split file");
}
int vers = WritableUtils.readVInt(in);
if (vers != JobSplit.META_SPLIT_VERSION) {
in.close();
throw new IOException("Unsupported split version " + vers);
}
int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo =
new JobSplit.TaskSplitMetaInfo[numSplits];
for (int i = 0; i < numSplits; i++) {
JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
splitMetaInfo.readFields(in);
JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
jobSplitFile,
splitMetaInfo.getStartOffset());
allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex,
splitMetaInfo.getLocations(),
splitMetaInfo.getInputDataLength());
}
in.close();
return allSplitMetaInfo;
}
示例5: readSplitMetaInfo
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir)
throws IOException {
long maxMetaInfoSize = conf.getLong("mapreduce.jobtracker.split.metainfo.maxsize",
10000000L);
Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
FileStatus fStatus = fs.getFileStatus(metaSplitFile);
if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
throw new IOException("Split metadata size exceeded " +
maxMetaInfoSize +". Aborting job " + jobId);
}
FSDataInputStream in = fs.open(metaSplitFile);
byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
in.readFully(header);
if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
throw new IOException("Invalid header on split file");
}
int vers = WritableUtils.readVInt(in);
if (vers != JobSplit.META_SPLIT_VERSION) {
in.close();
throw new IOException("Unsupported split version " + vers);
}
int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo =
new JobSplit.TaskSplitMetaInfo[numSplits];
for (int i = 0; i < numSplits; i++) {
JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
splitMetaInfo.readFields(in);
JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString(),
splitMetaInfo.getStartOffset());
allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex,
splitMetaInfo.getLocations(),
splitMetaInfo.getInputDataLength());
}
in.close();
return allSplitMetaInfo;
}
示例6: readSplitMetaInfo
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir)
throws IOException {
long maxMetaInfoSize = conf.getLong(JTConfig.JT_MAX_JOB_SPLIT_METAINFO_SIZE,
10000000L);
Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
FileStatus fStatus = fs.getFileStatus(metaSplitFile);
if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
throw new IOException("Split metadata size exceeded " +
maxMetaInfoSize +". Aborting job " + jobId);
}
FSDataInputStream in = fs.open(metaSplitFile);
byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
in.readFully(header);
if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
throw new IOException("Invalid header on split file");
}
int vers = WritableUtils.readVInt(in);
if (vers != JobSplit.META_SPLIT_VERSION) {
in.close();
throw new IOException("Unsupported split version " + vers);
}
int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo =
new JobSplit.TaskSplitMetaInfo[numSplits];
for (int i = 0; i < numSplits; i++) {
JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
splitMetaInfo.readFields(in);
JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString(),
splitMetaInfo.getStartOffset());
allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex,
splitMetaInfo.getLocations(),
splitMetaInfo.getInputDataLength());
}
in.close();
return allSplitMetaInfo;
}
示例7: writeNewSplits
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
/**
* Generate new-api mapreduce InputFormat splits
* @param jobContext JobContext required by InputFormat
* @param inputSplitDir Directory in which to generate splits information
*
* @return InputSplitInfo containing the split files' information and the
* location hints for each split generated to be used to determining parallelism of
* the map stage.
*
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
Path inputSplitDir) throws IOException, InterruptedException,
ClassNotFoundException {
org.apache.hadoop.mapreduce.InputSplit[] splits =
generateNewSplits(jobContext, false, true, 0);
Configuration conf = jobContext.getConfiguration();
JobSplitWriter.createSplitFiles(inputSplitDir, conf,
inputSplitDir.getFileSystem(conf), splits);
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splits.length);
for (int i = 0; i < splits.length; ++i) {
locationHints.add(
TaskLocationHint.createTaskLocationHint(new HashSet<String>(
Arrays.asList(splits[i].getLocations())), null)
);
}
return new InputSplitInfoDisk(
JobSubmissionFiles.getJobSplitFile(inputSplitDir),
JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
splits.length, locationHints, jobContext.getCredentials());
}
示例8: readSplitMetaInfo
import org.apache.hadoop.mapreduce.JobSubmissionFiles; //导入方法依赖的package包/类
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir)
throws IOException {
long maxMetaInfoSize = conf.getLong("mapreduce.jobtracker.split.metainfo.maxsize",
10000000L);
Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
FileStatus fStatus = fs.getFileStatus(metaSplitFile);
if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
throw new IOException("Split metadata size exceeded " +
maxMetaInfoSize +". Aborting job " + jobId);
}
FSDataInputStream in = fs.open(metaSplitFile);
byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
in.readFully(header);
if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
throw new IOException("Invalid header on split file");
}
int vers = WritableUtils.readVInt(in);
if (vers != JobSplit.META_SPLIT_VERSION) {
in.close();
throw new IOException("Unsupported split version " + vers);
}
int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo =
new JobSplit.TaskSplitMetaInfo[numSplits];
final int maxLocations =
conf.getInt(JobSplitWriter.MAX_SPLIT_LOCATIONS, Integer.MAX_VALUE);
for (int i = 0; i < numSplits; i++) {
JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
splitMetaInfo.readFields(in);
final int numLocations = splitMetaInfo.getLocations().length;
if (numLocations > maxLocations) {
throw new IOException("Max block location exceeded for split: #" + i +
" splitsize: " + numLocations + " maxsize: " + maxLocations);
}
JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString(),
splitMetaInfo.getStartOffset());
allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex,
splitMetaInfo.getLocations(),
splitMetaInfo.getInputDataLength());
}
in.close();
return allSplitMetaInfo;
}