本文整理汇总了Java中org.apache.tez.mapreduce.hadoop.MRHelpers类的典型用法代码示例。如果您正苦于以下问题:Java MRHelpers类的具体用法?Java MRHelpers怎么用?Java MRHelpers使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MRHelpers类属于org.apache.tez.mapreduce.hadoop包,在下文中一共展示了MRHelpers类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initialize
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
public List<Event> initialize(TezRootInputInitializerContext rootInputContext) throws Exception {
MRInputUserPayloadProto userPayloadProto = MRHelpers
.parseMRInputPayload(rootInputContext.getUserPayload());
Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
.getConfigurationBytes());
try {
ReflectionUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME);
LOG.info("Class found");
FileSystem fs = FileSystem.get(conf);
fs.mkdirs(new Path("/tmp/relocalizationfilefound"));
} catch (TezUncheckedException e) {
LOG.info("Class not found");
}
return super.initialize(rootInputContext);
}
示例2: createUserPayload
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
/**
* Helper API to generate the user payload for the MRInput and
* MRInputAMSplitGenerator (if used). The InputFormat will be invoked by Tez
* at DAG runtime to generate the input splits.
*
* @param conf
* Configuration for the InputFormat
* @param inputFormatClassName
* Name of the class of the InputFormat
* @param useNewApi
* use new mapreduce API or old mapred API
* @param groupSplitsInAM
* do grouping of splits in the AM. If true then splits generated by
* the InputFormat will be grouped in the AM based on available
* resources, locality etc. This option may be set to true only when
* using MRInputAMSplitGenerator as the initializer class in
* {@link Vertex#addInput(String, org.apache.tez.dag.api.InputDescriptor, Class)}
* @return returns the user payload to be set on the InputDescriptor of MRInput
* @throws IOException
*/
public static byte[] createUserPayload(Configuration conf,
String inputFormatClassName, boolean useNewApi, boolean groupSplitsInAM)
throws IOException {
Configuration inputConf = new JobConf(conf);
String wrappedInputFormatClassName = null;
String configInputFormatClassName = null;
if (groupSplitsInAM) {
wrappedInputFormatClassName = inputFormatClassName;
configInputFormatClassName = TezGroupedSplitsInputFormat.class.getName();
} else {
wrappedInputFormatClassName = null;
configInputFormatClassName = inputFormatClassName;
}
inputConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR,
configInputFormatClassName);
inputConf.setBoolean("mapred.mapper.new-api", useNewApi);
MRHelpers.translateVertexConfToTez(inputConf);
MRHelpers.doJobClientMagic(inputConf);
if (groupSplitsInAM) {
return MRHelpers.createMRInputPayloadWithGrouping(inputConf,
wrappedInputFormatClassName);
} else {
return MRHelpers.createMRInputPayload(inputConf, null);
}
}
示例3: initialize
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
@Override
public void initialize(OutputCommitterContext context) throws IOException {
byte[] userPayload = context.getUserPayload();
if (userPayload == null) {
jobConf = new JobConf();
} else {
jobConf = new JobConf(
MRHelpers.createConfFromUserPayload(context.getUserPayload()));
}
// Read all credentials into the credentials instance stored in JobConf.
jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
context.getDAGAttemptNumber());
this.context = context;
committer = getOutputCommitter(this.context);
jobContext = getJobContextFromVertexContext(context);
initialized = true;
}
示例4: createUserPayload
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
/**
* Creates the user payload to be set on the OutputDescriptor for MROutput
*/
private UserPayload createUserPayload() {
// set which api is being used always
conf.setBoolean(MRJobConfig.NEW_API_REDUCER_CONFIG, useNewApi);
conf.setBoolean(MRJobConfig.NEW_API_MAPPER_CONFIG, useNewApi);
if (outputFormatProvided) {
if (useNewApi) {
conf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormat.getName());
} else {
conf.set("mapred.output.format.class", outputFormat.getName());
}
}
MRHelpers.translateMRConfToTez(conf);
try {
return TezUtils.createUserPayloadFromConf(conf);
} catch (IOException e) {
throw new TezUncheckedException(e);
}
}
示例5: createCustomDataSource
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DataSourceDescriptor createCustomDataSource() throws IOException {
setupBasicConf(conf);
MRHelpers.translateMRConfToTez(conf);
Collection<URI> uris = maybeGetURIsForCredentials();
UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
conf, groupSplitsInAM, sortSplitsInAM);
DataSourceDescriptor ds = DataSourceDescriptor
.create(InputDescriptor.create(inputClassName).setUserPayload(payload),
customInitializerDescriptor, null);
if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
}
if (uris != null) {
ds.addURIsForCredentials(uris);
}
return ds;
}
示例6: createGeneratorDataSource
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DataSourceDescriptor createGeneratorDataSource() throws IOException {
setupBasicConf(conf);
MRHelpers.translateMRConfToTez(conf);
Collection<URI> uris = maybeGetURIsForCredentials();
UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
conf, groupSplitsInAM, sortSplitsInAM);
DataSourceDescriptor ds = DataSourceDescriptor.create(
InputDescriptor.create(inputClassName).setUserPayload(payload),
InputInitializerDescriptor.create(MRInputAMSplitGenerator.class.getName()), null);
if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
}
if (uris != null) {
ds.addURIsForCredentials(uris);
}
return ds;
}
示例7: createDAG
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
Map<String, LocalResource> localResources, Path stagingDir,
String inputPath, String outputPath) throws IOException {
Configuration inputConf = new Configuration(tezConf);
inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
InputDescriptor id = new InputDescriptor(MRInput.class.getName())
.setUserPayload(MRInput.createUserPayload(inputConf,
TextInputFormat.class.getName(), true, true));
Configuration outputConf = new Configuration(tezConf);
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(MROutput.createUserPayload(
outputConf, TextOutputFormat.class.getName(), true));
Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);
Vertex summerVertex = new Vertex("summer",
new ProcessorDescriptor(
SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
summerVertex.addOutput("MROutput", od, MROutputCommitter.class);
OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName(), null).build();
DAG dag = new DAG("WordCount");
dag.addVertex(tokenizerVertex)
.addVertex(summerVertex)
.addEdge(
new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
return dag;
}
示例8: createDag
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
throws IOException {
long largeOutSizePerTask = largeOutSize / numTasks;
long smallOutSizePerTask = smallOutSize / numTasks;
DAG dag = new DAG("IntersectDataGen");
byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf);
byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf);
byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf);
Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor(
GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration(
largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf));
genDataVertex.addOutput(STREAM_OUTPUT_NAME,
new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload),
MROutputCommitter.class);
genDataVertex.addOutput(HASH_OUTPUT_NAME,
new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload),
MROutputCommitter.class);
genDataVertex.addOutput(EXPECTED_OUTPUT_NAME,
new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload),
MROutputCommitter.class);
dag.addVertex(genDataVertex);
return dag;
}
示例9: createUserPayload
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
/**
* Creates the user payload to be set on the OutputDescriptor for MROutput
* @param conf Configuration for the OutputFormat
* @param outputFormatName Name of the class of the OutputFormat
* @param useNewApi Use new mapreduce API or old mapred API
* @return
* @throws IOException
*/
public static byte[] createUserPayload(Configuration conf,
String outputFormatName, boolean useNewApi) throws IOException {
Configuration outputConf = new JobConf(conf);
outputConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatName);
outputConf.setBoolean("mapred.mapper.new-api", useNewApi);
MRHelpers.translateVertexConfToTez(outputConf);
MRHelpers.doJobClientMagic(outputConf);
return TezUtils.createUserPayloadFromConf(outputConf);
}
示例10: setupMapReduceEnv
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private void setupMapReduceEnv(Configuration jobConf,
Map<String, String> environment, boolean isMap) throws IOException {
if (isMap) {
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAP_JAVA_OPTS,""),
"map",
MRJobConfig.MAP_JAVA_OPTS,
MRJobConfig.MAP_ENV);
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,""),
"map",
MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
} else {
warnForJavaLibPath(
jobConf.get(MRJobConfig.REDUCE_JAVA_OPTS,""),
"reduce",
MRJobConfig.REDUCE_JAVA_OPTS,
MRJobConfig.REDUCE_ENV);
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,""),
"reduce",
MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
}
MRHelpers.updateEnvironmentForMRTasks(jobConf, environment, isMap);
}
示例11: initialize
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
public List<Event> initialize() throws IOException {
getContext().requestInitialMemory(0l, null); // mandatory call
MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload =
MRHelpers.parseMRInputPayload(getContext().getUserPayload());
Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
"Split information not expected in " + this.getClass().getName());
Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());
this.jobConf = new JobConf(conf);
// Add tokens to the jobConf - in case they are accessed within the RR / IF
jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
TaskAttemptID taskAttemptId = new TaskAttemptID(
new TaskID(
Long.toString(getContext().getApplicationId().getClusterTimestamp()),
getContext().getApplicationId().getId(), TaskType.MAP,
getContext().getTaskIndex()),
getContext().getTaskAttemptNumber());
jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
taskAttemptId.toString());
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
getContext().getDAGAttemptNumber());
this.inputRecordCounter = getContext().getCounters().findCounter(
TaskCounter.INPUT_RECORDS_PROCESSED);
useNewApi = this.jobConf.getUseNewMapper();
return null;
}
示例12: setupMapReduceEnv
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private void setupMapReduceEnv(Configuration jobConf,
Map<String, String> environment, boolean isMap) throws IOException {
if (isMap) {
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAP_JAVA_OPTS,""),
"map",
MRJobConfig.MAP_JAVA_OPTS,
MRJobConfig.MAP_ENV);
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,""),
"map",
MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
} else {
warnForJavaLibPath(
jobConf.get(MRJobConfig.REDUCE_JAVA_OPTS,""),
"reduce",
MRJobConfig.REDUCE_JAVA_OPTS,
MRJobConfig.REDUCE_ENV);
warnForJavaLibPath(
jobConf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,""),
"reduce",
MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
}
MRHelpers.updateEnvBasedOnMRTaskEnv(jobConf, environment, isMap);
}
示例13: createDistributorDataSource
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DataSourceDescriptor createDistributorDataSource() throws IOException {
InputSplitInfo inputSplitInfo;
setupBasicConf(conf);
try {
inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, true, 0);
} catch (Exception e) {
throw new TezUncheckedException(e);
}
MRHelpers.translateMRConfToTez(conf);
UserPayload payload = MRInputHelpersInternal.createMRInputPayload(conf,
inputSplitInfo.getSplitsProto());
Credentials credentials = null;
if (getCredentialsForSourceFilesystem && inputSplitInfo.getCredentials() != null) {
credentials = inputSplitInfo.getCredentials();
}
DataSourceDescriptor ds = DataSourceDescriptor.create(
InputDescriptor.create(inputClassName).setUserPayload(payload),
InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
inputSplitInfo.getNumTasks(), credentials,
VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), null);
if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
}
return ds;
}
示例14: createDag
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
throws IOException {
DAG dag = new DAG("IntersectValidate");
// Configuration for src1
Configuration lhsInputConf = new Configuration(tezConf);
lhsInputConf.set(FileInputFormat.INPUT_DIR, lhs.toUri().toString());
byte[] streamInputPayload = MRInput.createUserPayload(lhsInputConf,
TextInputFormat.class.getName(), true, false);
// Configuration for src2
Configuration rhsInputConf = new Configuration(tezConf);
rhsInputConf.set(FileInputFormat.INPUT_DIR, rhs.toUri().toString());
byte[] hashInputPayload = MRInput.createUserPayload(rhsInputConf,
TextInputFormat.class.getName(), true, false);
// Configuration for intermediate output - shared by Vertex1 and Vertex2
// This should only be setting selective keys from the underlying conf. Fix after there's a
// better mechanism to configure the IOs.
OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
.newBuilder(Text.class.getName(), NullWritable.class.getName(),
HashPartitioner.class.getName(), null).build();
// Change the way resources are setup - no MRHelpers
Vertex lhsVertex = new Vertex(LHS_INPUT_NAME, new ProcessorDescriptor(
ForwardingProcessor.class.getName()), -1,
MRHelpers.getMapResource(tezConf)).addInput("lhs", new InputDescriptor(
MRInput.class.getName()).setUserPayload(streamInputPayload),
MRInputAMSplitGenerator.class);
Vertex rhsVertex = new Vertex(RHS_INPUT_NAME, new ProcessorDescriptor(
ForwardingProcessor.class.getName()), -1,
MRHelpers.getMapResource(tezConf)).addInput("rhs", new InputDescriptor(
MRInput.class.getName()).setUserPayload(hashInputPayload),
MRInputAMSplitGenerator.class);
Vertex intersectValidateVertex = new Vertex("intersectvalidate",
new ProcessorDescriptor(IntersectValidateProcessor.class.getName()),
numPartitions, MRHelpers.getReduceResource(tezConf));
Edge e1 = new Edge(lhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty());
Edge e2 = new Edge(rhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty());
dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(intersectValidateVertex).addEdge(e1)
.addEdge(e2);
return dag;
}
示例15: createDag
import org.apache.tez.mapreduce.hadoop.MRHelpers; //导入依赖的package包/类
private DAG createDag(TezConfiguration tezConf, Path streamPath, Path hashPath, Path outPath,
int numPartitions) throws IOException {
DAG dag = new DAG("IntersectExample");
// Configuration for src1
Configuration streamInputConf = new Configuration(tezConf);
streamInputConf.set(FileInputFormat.INPUT_DIR, streamPath.toUri().toString());
byte[] streamInputPayload = MRInput.createUserPayload(streamInputConf,
TextInputFormat.class.getName(), true, false);
// Configuration for src2
Configuration hashInputConf = new Configuration(tezConf);
hashInputConf.set(FileInputFormat.INPUT_DIR, hashPath.toUri().toString());
byte[] hashInputPayload = MRInput.createUserPayload(hashInputConf,
TextInputFormat.class.getName(), true, false);
// Configuration for intermediate output - shared by Vertex1 and Vertex2
// This should only be setting selective keys from the underlying conf. Fix after there's a
// better mechanism to configure the IOs.
UnorderedPartitionedKVEdgeConfigurer edgeConf =
UnorderedPartitionedKVEdgeConfigurer
.newBuilder(Text.class.getName(), NullWritable.class.getName(),
HashPartitioner.class.getName(), null).build();
Configuration finalOutputConf = new Configuration(tezConf);
finalOutputConf.set(FileOutputFormat.OUTDIR, outPath.toUri().toString());
byte[] finalOutputPayload = MROutput.createUserPayload(finalOutputConf,
TextOutputFormat.class.getName(), true);
// Change the way resources are setup - no MRHelpers
Vertex streamFileVertex = new Vertex("partitioner1",
new ProcessorDescriptor(ForwardingProcessor.class.getName()), -1,
MRHelpers.getMapResource(tezConf)).addInput("streamfile",
new InputDescriptor(MRInput.class.getName())
.setUserPayload(streamInputPayload), MRInputAMSplitGenerator.class);
Vertex hashFileVertex = new Vertex("partitioner2", new ProcessorDescriptor(
ForwardingProcessor.class.getName()), -1,
MRHelpers.getMapResource(tezConf)).addInput("hashfile",
new InputDescriptor(MRInput.class.getName())
.setUserPayload(hashInputPayload), MRInputAMSplitGenerator.class);
Vertex intersectVertex = new Vertex("intersect", new ProcessorDescriptor(
IntersectProcessor.class.getName()), numPartitions,
MRHelpers.getReduceResource(tezConf)).addOutput("finalOutput",
new OutputDescriptor(MROutput.class.getName())
.setUserPayload(finalOutputPayload), MROutputCommitter.class);
Edge e1 = new Edge(streamFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());
Edge e2 = new Edge(hashFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());
dag.addVertex(streamFileVertex).addVertex(hashFileVertex).addVertex(intersectVertex)
.addEdge(e1).addEdge(e2);
return dag;
}