本文整理汇总了Java中org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig类的典型用法代码示例。如果您正苦于以下问题:Java OrderedPartitionedKVEdgeConfig类的具体用法?Java OrderedPartitionedKVEdgeConfig怎么用?Java OrderedPartitionedKVEdgeConfig使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OrderedPartitionedKVEdgeConfig类属于org.apache.tez.runtime.library.conf包,在下文中一共展示了OrderedPartitionedKVEdgeConfig类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: TezDAGBuilder
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
/**
*
* @param executionName
* @param tezClient
* @param executionConfig
*/
public TezDAGBuilder(String executionName, ExecutionContextAwareTezClient tezClient, Properties executionConfig) {
this.dag = DAG.create(executionName + "_" + System.currentTimeMillis());
this.tezClient = tezClient;
// TODO need to figure out when and why would the Edge be different and
// how to configure it
this.edgeConf = OrderedPartitionedKVEdgeConfig.newBuilder("io.dstream.tez.io.KeyWritable",
"io.dstream.tez.io.ValueWritable", TezDelegatingPartitioner.class.getName(), null).build();
this.dagExecutor = new TezDagExecutor(this.tezClient, this.dag);
}
示例2: testVertexFailuresMaxPercent
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
@Test(timeout = 60000)
public void testVertexFailuresMaxPercent() throws TezException, InterruptedException, IOException {
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, "50.0f");
tezConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
TezClient tezClient = TezClient.create("TestVertexFailuresMaxPercent", tezConf);
tezClient.start();
try {
DAG dag = DAG.create("TestVertexFailuresMaxPercent");
Vertex vertex1 = Vertex.create("Parent", ProcessorDescriptor.create(
FailingAttemptProcessor.class.getName()), 2);
Vertex vertex2 = Vertex.create("Child", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);
OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf)
.build();
dag.addVertex(vertex1)
.addVertex(vertex2)
.addEdge(Edge.create(vertex1, vertex2, edgeConfig.createDefaultEdgeProperty()));
DAGClient dagClient = tezClient.submitDAG(dag);
dagClient.waitForCompletion();
Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
} finally {
tezClient.stop();
}
}
示例3: createDAG
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
String columnIndex, String top, String numPartitions) throws IOException {
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
TextInputFormat.class, inputPath).build();
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, outputPath).build();
Vertex tokenizerVertex = Vertex.create(TOKENIZER,
ProcessorDescriptor.create(TokenProcessor.class.getName())
.setUserPayload(createPayload(Integer.valueOf(columnIndex))))
.addDataSource(INPUT, dataSource);
int topK = Integer.valueOf(top);
Vertex sumVertex = Vertex.create(SUM,
ProcessorDescriptor.create(SumProcessor.class.getName())
.setUserPayload(createPayload(topK)), Integer.valueOf(numPartitions));
// parallelism must be set to 1 as the writer needs to see the global picture of
// the data set
// multiple tasks from the writer will result in multiple list of the top K
// elements as all task will take the partitioned data's top K element
Vertex writerVertex = Vertex.create(WRITER,
ProcessorDescriptor.create(Writer.class.getName())
.setUserPayload(createPayload(topK)), 1)
.addDataSink(OUTPUT, dataSink);
OrderedPartitionedKVEdgeConfig tokenSumEdge = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName()).build();
UnorderedKVEdgeConfig sumWriterEdge = UnorderedKVEdgeConfig
.newBuilder(IntWritable.class.getName(), Text.class.getName()).build();
DAG dag = DAG.create("topk");
return dag
.addVertex(tokenizerVertex)
.addVertex(sumVertex)
.addVertex(writerVertex)
.addEdge(Edge.create(tokenizerVertex, sumVertex, tokenSumEdge.createDefaultEdgeProperty()))
.addEdge(Edge.create(sumVertex, writerVertex, sumWriterEdge.createDefaultBroadcastEdgeProperty()));
}
示例4: createDag
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
@VisibleForTesting
DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
throws IOException {
DAG dag = DAG.create(getDagName());
if (getDefaultExecutionContext() != null) {
dag.setExecutionContext(getDefaultExecutionContext());
}
// Configuration for intermediate output - shared by Vertex1 and Vertex2
// This should only be setting selective keys from the underlying conf. Fix after there's a
// better mechanism to configure the IOs. The setFromConfiguration call is optional and allows
// overriding the config options with command line parameters.
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), NullWritable.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf)
.build();
Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("lhs",
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
.generateSplitsInAM(!isGenerateSplitInClient()).build());
setVertexExecutionContext(lhsVertex, getLhsExecutionContext());
Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("rhs",
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
.generateSplitsInAM(!isGenerateSplitInClient()).build());
setVertexExecutionContext(rhsVertex, getRhsExecutionContext());
Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
JoinValidateProcessor.class.getName()), numPartitions);
setVertexExecutionContext(joinValidateVertex, getValidateExecutionContext());
Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(joinValidateVertex).addEdge(e1)
.addEdge(e2);
return dag;
}
示例5: createDAG
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
int numPartitions) throws IOException {
// Create the descriptor that describes the input data to Tez. Using MRInput to read text
// data from the given input path. The TextInputFormat is used to read the text data.
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
TextInputFormat.class, inputPath).groupSplits(!isDisableSplitGrouping())
.generateSplitsInAM(!isGenerateSplitInClient()).build();
// Create a descriptor that describes the output data to Tez. Using MROoutput to write text
// data to the given output path. The TextOutputFormat is used to write the text data.
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, outputPath).build();
// Create a vertex that reads the data from the data source and tokenizes it using the
// TokenProcessor. The number of tasks that will do the work for this vertex will be decided
// using the information provided by the data source descriptor.
Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
TokenProcessor.class.getName())).addDataSource(INPUT, dataSource);
// Create the edge that represents the movement and semantics of data between the producer
// Tokenizer vertex and the consumer Summation vertex. In order to perform the summation in
// parallel the tokenized data will be partitioned by word such that a given word goes to the
// same partition. The counts for the words should be grouped together per word. To achieve this
// we can use an edge that contains an input/output pair that handles partitioning and grouping
// of key value data. We use the helper OrderedPartitionedKVEdgeConfig to create such an
// edge. Internally, it sets up matching Tez inputs and outputs that can perform this logic.
// We specify the key, value and partitioner type. Here the key type is Text (for word), the
// value type is IntWritable (for count) and we using a hash based partitioner. This is a helper
// object. The edge can be configured by configuring the input, output etc individually without
// using this helper. The setFromConfiguration call is optional and allows overriding the config
// options with command line parameters.
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf)
.build();
// Create a vertex that reads the tokenized data and calculates the sum using the SumProcessor.
// The number of tasks that do the work of this vertex depends on the number of partitions used
// to distribute the sum processing. In this case, its been made configurable via the
// numPartitions parameter.
Vertex summationVertex = Vertex.create(SUMMATION,
ProcessorDescriptor.create(SumProcessor.class.getName()), numPartitions)
.addDataSink(OUTPUT, dataSink);
// No need to add jar containing this class as assumed to be part of the Tez jars. Otherwise
// we would have to add the jars for this code as local files to the vertices.
// Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
DAG dag = DAG.create("WordCount");
dag.addVertex(tokenizerVertex)
.addVertex(summationVertex)
.addEdge(
Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
return dag;
}
示例6: createDAG
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
public static DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
int numPartitions, boolean disableSplitGrouping, boolean isGenerateSplitInClient, String dagName) throws IOException {
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
TextInputFormat.class, inputPath).groupSplits(!disableSplitGrouping)
.generateSplitsInAM(!isGenerateSplitInClient).build();
DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, outputPath).build();
Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
TokenProcessor.class.getName()));
tokenizerVertex.addDataSource(INPUT, dataSource);
// Use Text key and IntWritable value to bring counts for each word in the same partition
// The setFromConfiguration call is optional and allows overriding the config options with
// command line parameters.
OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf)
.build();
// This vertex will be reading intermediate data via an input edge and writing intermediate data
// via an output edge.
Vertex summationVertex = Vertex.create(SUMMATION, ProcessorDescriptor.create(
SumProcessor.class.getName()), numPartitions);
// Use IntWritable key and Text value to bring all words with the same count in the same
// partition. The data will be ordered by count and words grouped by count. The
// setFromConfiguration call is optional and allows overriding the config options with
// command line parameters.
OrderedPartitionedKVEdgeConfig sorterEdgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(IntWritable.class.getName(), Text.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf)
.build();
// Use 1 task to bring all the data in one place for global sorted order. Essentially the number
// of partitions is 1. So the NoOpSorter can be used to produce the globally ordered output
Vertex sorterVertex = Vertex.create(SORTER, ProcessorDescriptor.create(
NoOpSorter.class.getName()), 1);
sorterVertex.addDataSink(OUTPUT, dataSink);
// No need to add jar containing this class as assumed to be part of the tez jars.
DAG dag = DAG.create(dagName);
dag.addVertex(tokenizerVertex)
.addVertex(summationVertex)
.addVertex(sorterVertex)
.addEdge(
Edge.create(tokenizerVertex, summationVertex,
summationEdgeConf.createDefaultEdgeProperty()))
.addEdge(
Edge.create(summationVertex, sorterVertex, sorterEdgeConf.createDefaultEdgeProperty()));
return dag;
}
示例7: createDAG
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
Map<String, LocalResource> localResources, Path stagingDir,
String inputPath, String outputPath) throws IOException {
DAG dag = DAG.create("UnionExample");
int numMaps = -1;
Configuration inputConf = new Configuration(tezConf);
inputConf.setBoolean("mapred.mapper.new-api", false);
inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();
Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
UnionProcessor.class.getName()), 1);
Configuration outputConf = new Configuration(tezConf);
outputConf.setBoolean("mapred.reducer.new-api", false);
outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
checkerVertex.addDataSink("union", od);
Configuration allPartsConf = new Configuration(tezConf);
DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
TextOutputFormat.class, outputPath + "-all-parts").build();
checkerVertex.addDataSink("all-parts", od2);
Configuration partsConf = new Configuration(tezConf);
DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
TextOutputFormat.class, outputPath + "-parts").build();
VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
unionVertex.addDataSink("parts", od1);
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName()).build();
dag.addVertex(mapVertex1)
.addVertex(mapVertex2)
.addVertex(mapVertex3)
.addVertex(checkerVertex)
.addEdge(
Edge.create(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
.addEdge(
GroupInputEdge.create(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
InputDescriptor.create(
ConcatenatedMergedKeyValuesInput.class.getName())));
return dag;
}
示例8: run
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
this.tezConf = new TezConfiguration(getConf());
String dagName = "pipelinedShuffleTest";
DAG dag = DAG.create(dagName);
Vertex m1_Vertex = Vertex.create("mapper1",
ProcessorDescriptor.create(DataGenerator.class.getName()), 1);
Vertex m2_Vertex = Vertex.create("mapper2",
ProcessorDescriptor.create(DataGenerator.class.getName()), 1);
Vertex reducerVertex = Vertex.create("reducer",
ProcessorDescriptor.create(SimpleReduceProcessor.class.getName()), 1);
Edge mapper1_to_reducer = Edge.create(m1_Vertex, reducerVertex,
OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), Text.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf).build().createDefaultEdgeProperty());
Edge mapper2_to_reducer = Edge.create(m2_Vertex, reducerVertex,
OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), Text.class.getName(),
HashPartitioner.class.getName())
.setFromConfiguration(tezConf).build().createDefaultEdgeProperty());
dag.addVertex(m1_Vertex);
dag.addVertex(m2_Vertex);
dag.addVertex(reducerVertex);
dag.addEdge(mapper1_to_reducer).addEdge(mapper2_to_reducer);
TezClient client = TezClient.create(dagName, tezConf);
client.start();
client.waitTillReady();
DAGClient dagClient = client.submitDAG(dag);
Set<StatusGetOpts> getOpts = Sets.newHashSet();
getOpts.add(StatusGetOpts.GET_COUNTERS);
DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(getOpts);
System.out.println(dagStatus.getDAGCounters());
TezCounters counters = dagStatus.getDAGCounters();
//Ensure that atleast 10 spills were there in this job.
assertTrue(counters.findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT).getValue() > 10);
if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
return -1;
}
return 0;
}
示例9: createDAG
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
String jobSubmitDir, Credentials ts,
Map<String, LocalResource> jobLocalResources) throws IOException {
String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
YarnConfiguration.DEFAULT_APPLICATION_NAME);
DAG dag = DAG.create(jobName);
LOG.info("Number of stages: " + stageConfs.length);
List<TaskLocationHint> mapInputLocations =
getMapLocationHintsFromInputSplits(
jobId, fs, stageConfs[0], jobSubmitDir);
List<TaskLocationHint> reduceInputLocations = null;
Vertex[] vertices = new Vertex[stageConfs.length];
for (int i = 0; i < stageConfs.length; i++) {
vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
i == 0 ? mapInputLocations : reduceInputLocations, i,
stageConfs.length);
}
for (int i = 0; i < vertices.length; i++) {
dag.addVertex(vertices[i]);
if (i > 0) {
// Set edge conf based on Input conf (compression etc properties for MapReduce are
// w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
Map<String, String> partitionerConf = null;
if (stageConfs[i-1] != null) {
partitionerConf = Maps.newHashMap();
for (Map.Entry<String, String> entry : stageConfs[i - 1]) {
partitionerConf.put(entry.getKey(), entry.getValue());
}
}
OrderedPartitionedKVEdgeConfig edgeConf =
OrderedPartitionedKVEdgeConfig.newBuilder(stageConfs[i - 1].get(
TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS),
stageConfs[i - 1].get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS),
MRPartitioner.class.getName(), partitionerConf)
.setFromConfigurationUnfiltered(stageConfs[i-1])
.configureInput().useLegacyInput().done()
.build();
Edge edge = Edge.create(vertices[i - 1], vertices[i], edgeConf.createDefaultEdgeProperty());
dag.addEdge(edge);
}
}
return dag;
}
示例10: runWordCount
import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; //导入依赖的package包/类
private String runWordCount(String tokenizerProcessor, String summationProcessor,
String dagName, boolean withTimeline)
throws Exception {
//HDFS path
Path outputLoc = new Path("/tmp/outPath_" + System.currentTimeMillis());
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf,
TextInputFormat.class, inputLoc.toString()).build();
DataSinkDescriptor dataSink =
MROutput.createConfigBuilder(conf, TextOutputFormat.class, outputLoc.toString()).build();
Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
tokenizerProcessor)).addDataSource(INPUT, dataSource);
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName()).build();
Vertex summationVertex = Vertex.create(SUMMATION,
ProcessorDescriptor.create(summationProcessor), 1).addDataSink(OUTPUT, dataSink);
// Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
DAG dag = DAG.create(dagName);
dag.addVertex(tokenizerVertex).addVertex(summationVertex).addEdge(
Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
TezClient tezClient = getTezClient(withTimeline);
// Update Caller Context
CallerContext callerContext = CallerContext.create("TezExamples", "Tez WordCount Example Job");
ApplicationId appId = tezClient.getAppMasterApplicationId();
if (appId == null) {
appId = ApplicationId.newInstance(1001l, 1);
}
callerContext.setCallerIdAndType(appId.toString(), "TezApplication");
dag.setCallerContext(callerContext);
DAGClient client = tezClient.submitDAG(dag);
client.waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
TezDAGID tezDAGID = TezDAGID.getInstance(tezClient.getAppMasterApplicationId(), 1);
if (tezClient != null) {
tezClient.stop();
}
return tezDAGID.toString();
}