本文整理汇总了Java中org.apache.flink.api.java.DataSet.map方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.map方法的具体用法?Java DataSet.map怎么用?Java DataSet.map使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.map方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath)
.fieldDelimiter("|")
.types(String.class, Integer.class, String.class);
//output the data with AvroOutputFormat for specific user type
DataSet<User> specificUser = input.map(new ConvertToUser());
AvroOutputFormat<User> avroOutputFormat = new AvroOutputFormat<User>(User.class);
avroOutputFormat.setCodec(AvroOutputFormat.Codec.SNAPPY); // FLINK-4771: use a codec
avroOutputFormat.setSchema(User.SCHEMA$); //FLINK-3304: Ensure the OF is properly serializing the schema
specificUser.write(avroOutputFormat, outputPath1);
//output the data with AvroOutputFormat for reflect user type
DataSet<ReflectiveUser> reflectiveUser = specificUser.map(new ConvertToReflective());
reflectiveUser.write(new AvroOutputFormat<ReflectiveUser>(ReflectiveUser.class), outputPath2);
env.execute();
}
示例2: testFaultyAccumulator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testFaultyAccumulator() throws Exception {
env.getConfig().disableSysoutLogging();
// Test Exception forwarding with faulty Accumulator implementation
DataSet<Long> input = env.generateSequence(0, 10000);
DataSet<Long> map = input.map(new FaultyAccumulatorUsingMapper());
map.output(new DiscardingOutputFormat<Long>());
try {
env.execute();
fail("Should have failed.");
} catch (JobExecutionException e) {
Assert.assertTrue("Root cause should be:",
e.getCause() instanceof CustomException);
}
}
示例3: run
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
public DataSet<Tuple3<K, K, K>> run(Graph<K, VV, EV> input) throws Exception {
DataSet<Edge<K, EV>> edges = input.getEdges();
// annotate edges with degrees
DataSet<EdgeWithDegrees<K>> edgesWithDegrees = edges.flatMap(new EdgeDuplicator<>())
.groupBy(0).sortGroup(1, Order.ASCENDING).reduceGroup(new DegreeCounter<>())
.groupBy(EdgeWithDegrees.V1, EdgeWithDegrees.V2).reduce(new DegreeJoiner<>());
// project edges by degrees
DataSet<Edge<K, NullValue>> edgesByDegree = edgesWithDegrees.map(new EdgeByDegreeProjector<>());
// project edges by vertex id
DataSet<Edge<K, NullValue>> edgesById = edgesByDegree.map(new EdgeByIdProjector<>());
DataSet<Tuple3<K, K, K>> triangles = edgesByDegree
// build triads
.groupBy(EdgeWithDegrees.V1).sortGroup(EdgeWithDegrees.V2, Order.ASCENDING)
.reduceGroup(new TriadBuilder<>())
// filter triads
.join(edgesById, JoinHint.REPARTITION_HASH_SECOND).where(Triad.V2, Triad.V3).equalTo(0, 1).with(new TriadFilter<>());
return triangles;
}
示例4: testExecuteAfterGetExecutionPlan
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testExecuteAfterGetExecutionPlan() {
ExecutionEnvironment env = new LocalEnvironment();
env.getConfig().disableSysoutLogging();
DataSet<Integer> baseSet = env.fromElements(1, 2);
DataSet<Integer> result = baseSet.map(new MapFunction<Integer, Integer>() {
@Override public Integer map(Integer value) throws Exception {
return value * 2;
}});
result.output(new DiscardingOutputFormat<Integer>());
try {
env.getExecutionPlan();
env.execute();
}
catch (Exception e) {
e.printStackTrace();
fail("Cannot run both #getExecutionPlan and #execute.");
}
}
示例5: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> longs = env.generateSequence(0, 100000);
DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
DataSet<Tuple1<Long>> longT2 = longT1.project(0);
DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());
longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
.join(longT1).where(0).equalTo(0).projectFirst(0)
.writeAsText(resultPath);
env.execute();
}
示例6: testCreatePlanAfterGetExecutionPlan
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testCreatePlanAfterGetExecutionPlan() {
ExecutionEnvironment env = new LocalEnvironment();
DataSet<Integer> baseSet = env.fromElements(1, 2);
DataSet<Integer> result = baseSet.map(new MapFunction<Integer, Integer>() {
@Override public Integer map(Integer value) throws Exception {
return value * 2;
}});
result.output(new DiscardingOutputFormat<Integer>());
try {
env.getExecutionPlan();
env.createProgramPlan();
} catch (Exception e) {
e.printStackTrace();
fail("Cannot run both #getExecutionPlan and #execute. Message: " + e.getMessage());
}
}
示例7: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例8: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
iteration.setSolutionSetUnManaged(true);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例9: test
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
/*
* Test mapper if UDF returns input object - increment first field of a tuple
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> inputObjMapDs = ds.
map(new Mapper7());
List<Tuple3<Integer, Long, String>> result = inputObjMapDs.collect();
String expected = "2,1,Hi\n" +
"3,2,Hello\n" +
"4,2,Hello world\n" +
"5,3,Hello world, how are you?\n" +
"6,3,I am fine.\n" +
"7,3,Luke Skywalker\n" +
"8,4,Comment#1\n" +
"9,4,Comment#2\n" +
"10,4,Comment#3\n" +
"11,4,Comment#4\n" +
"12,5,Comment#5\n" +
"13,5,Comment#6\n" +
"14,5,Comment#7\n" +
"15,5,Comment#8\n" +
"16,5,Comment#9\n" +
"17,6,Comment#10\n" +
"18,6,Comment#11\n" +
"19,6,Comment#12\n" +
"20,6,Comment#13\n" +
"21,6,Comment#14\n" +
"22,6,Comment#15\n";
compareResultAsTuples(result, expected);
}
示例10: testTypeConversionMapperTupleToBasic
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionMapperTupleToBasic() throws Exception {
/*
* Test type conversion mapper (Tuple -> Basic)
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<String> typeConversionMapDs = ds.
map(new Mapper4());
List<String> result = typeConversionMapDs.collect();
String expected = "Hi\n" + "Hello\n" + "Hello world\n" +
"Hello world, how are you?\n" +
"I am fine.\n" + "Luke Skywalker\n" +
"Comment#1\n" + "Comment#2\n" +
"Comment#3\n" + "Comment#4\n" +
"Comment#5\n" + "Comment#6\n" +
"Comment#7\n" + "Comment#8\n" +
"Comment#9\n" + "Comment#10\n" +
"Comment#11\n" + "Comment#12\n" +
"Comment#13\n" + "Comment#14\n" +
"Comment#15\n";
compareResultAsText(result, expected);
}
示例11: testIdentityMapWithTuple
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testIdentityMapWithTuple() throws Exception {
/*
* Test identity map with a tuple
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds.
map(new Mapper2());
List<Tuple3<Integer, Long, String>> result = identityMapDs.collect();
String expected = "1,1,Hi\n" +
"2,2,Hello\n" +
"3,2,Hello world\n" +
"4,3,Hello world, how are you?\n" +
"5,3,I am fine.\n" +
"6,3,Luke Skywalker\n" +
"7,4,Comment#1\n" +
"8,4,Comment#2\n" +
"9,4,Comment#3\n" +
"10,4,Comment#4\n" +
"11,5,Comment#5\n" +
"12,5,Comment#6\n" +
"13,5,Comment#7\n" +
"14,5,Comment#8\n" +
"15,5,Comment#9\n" +
"16,6,Comment#10\n" +
"17,6,Comment#11\n" +
"18,6,Comment#12\n" +
"19,6,Comment#13\n" +
"20,6,Comment#14\n" +
"21,6,Comment#15\n";
compareResultAsTuples(result, expected);
}
示例12: testMapperOnTupleIncrementIntegerFieldReorderSecondAndThirdFields
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testMapperOnTupleIncrementIntegerFieldReorderSecondAndThirdFields() throws
Exception {
/*
* Test mapper on tuple - Increment Integer field, reorder second and third fields
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, String, Long>> tupleMapDs = ds.
map(new Mapper5());
List<Tuple3<Integer, String, Long>> result = tupleMapDs.collect();
String expected = "2,Hi,1\n" +
"3,Hello,2\n" +
"4,Hello world,2\n" +
"5,Hello world, how are you?,3\n" +
"6,I am fine.,3\n" +
"7,Luke Skywalker,3\n" +
"8,Comment#1,4\n" +
"9,Comment#2,4\n" +
"10,Comment#3,4\n" +
"11,Comment#4,4\n" +
"12,Comment#5,5\n" +
"13,Comment#6,5\n" +
"14,Comment#7,5\n" +
"15,Comment#8,5\n" +
"16,Comment#9,5\n" +
"17,Comment#10,6\n" +
"18,Comment#11,6\n" +
"19,Comment#12,6\n" +
"20,Comment#13,6\n" +
"21,Comment#14,6\n" +
"22,Comment#15,6\n";
compareResultAsTuples(result, expected);
}
示例13: testTypeConversionMapperCustomToTuple
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionMapperCustomToTuple() throws Exception {
/*
* Test type conversion mapper (Custom -> Tuple)
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
DataSet<Tuple3<Integer, Long, String>> typeConversionMapDs = ds.
map(new Mapper3());
List<Tuple3<Integer, Long, String>> result = typeConversionMapDs.collect();
String expected = "1,0,Hi\n" +
"2,1,Hello\n" +
"2,2,Hello world\n" +
"3,3,Hello world, how are you?\n" +
"3,4,I am fine.\n" +
"3,5,Luke Skywalker\n" +
"4,6,Comment#1\n" +
"4,7,Comment#2\n" +
"4,8,Comment#3\n" +
"4,9,Comment#4\n" +
"5,10,Comment#5\n" +
"5,11,Comment#6\n" +
"5,12,Comment#7\n" +
"5,13,Comment#8\n" +
"5,14,Comment#9\n" +
"6,15,Comment#10\n" +
"6,16,Comment#11\n" +
"6,17,Comment#12\n" +
"6,18,Comment#13\n" +
"6,19,Comment#14\n" +
"6,20,Comment#15\n";
compareResultAsTuples(result, expected);
}
示例14: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
DataSet<Integer> input = env.fromElements(1, 2, 3, 4, 5);
DataSet<CustomType> customTypes = input.map(new MapFunction<Integer, CustomType>() {
private static final long serialVersionUID = -5878758010124912128L;
@Override
public CustomType map(Integer integer) throws Exception {
return new CustomType(integer);
}
}).rebalance();
DataSet<Integer> result = customTypes.map(new MapFunction<CustomType, Integer>() {
private static final long serialVersionUID = -7950126399899584991L;
@Override
public Integer map(CustomType value) throws Exception {
return value.value;
}
});
result.output(new DiscardingOutputFormat<Integer>());
env.execute();
}
示例15: testPartialReduceWithDifferentInputOutputType
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testPartialReduceWithDifferentInputOutputType() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// data
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds
// wrap values as Kv pairs with the grouping key as key
.map(new Tuple3KvWrapper());
List<Tuple2<Integer, Long>> result = dsWrapped
.groupBy(0)
// reduce partially
.combineGroup(new Tuple3toTuple2GroupReduce())
.groupBy(0)
// reduce fully to check result
.reduceGroup(new Tuple2toTuple2GroupReduce())
//unwrap
.map(new MapFunction<Tuple2<Long,Tuple2<Integer,Long>>, Tuple2<Integer,Long>>() {
@Override
public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception {
return value.f1;
}
}).collect();
String expected = "1,3\n" +
"5,20\n" +
"15,58\n" +
"34,52\n" +
"65,70\n" +
"111,96\n";
compareResultAsTuples(result, expected);
}