本文整理汇总了Java中org.apache.flink.api.java.DataSet类的典型用法代码示例。如果您正苦于以下问题:Java DataSet类的具体用法?Java DataSet怎么用?Java DataSet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataSet类属于org.apache.flink.api.java包,在下文中一共展示了DataSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: transformation
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
* Data transformation.
* The method group by trackId, sum the number of occurrences, sort the output
* and get the top elements defined by the user.
* @param input
* @return
*/
@Override
public DataSet<ChartsResult> transformation(DataSet<?> input) {
log.info("Transformation Phase. Computing the tags");
return input
.groupBy(0) // Grouping by trackId
.sum(1) // Sum the occurrences of each grouped item
.sortPartition(1, Order.DESCENDING).setParallelism(1) // Sort by count
.first(pipelineConf.args.getLimit())
.map( t -> {
Tuple3<Long, Integer, TagEvent> tuple= (Tuple3<Long, Integer, TagEvent>) t;
return new ChartsResult(tuple.f0, tuple.f1, tuple.f2);
})
.returns(new TypeHint<ChartsResult>(){});
}
示例2: main
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
DataSet<Record> csvInput = env
.readCsvFile("D://NOTBACKEDUP//dataflow//flink-table//src//main//resources//data//olympic-athletes.csv")
.pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
// register the DataSet athletes as table "athletes" with fields derived
// from the dataset
Table atheltes = tableEnv.fromDataSet(csvInput);
tableEnv.registerTable("athletes", atheltes);
// run a SQL query on the Table and retrieve the result as a new Table
Table groupedByCountry = tableEnv.sql("SELECT country, SUM(total) as frequency FROM athletes group by country");
DataSet<Result> result = tableEnv.toDataSet(groupedByCountry, Result.class);
result.print();
Table groupedByGame = atheltes.groupBy("game").select("game, total.sum as frequency");
DataSet<GameResult> gameResult = tableEnv.toDataSet(groupedByGame, GameResult.class);
gameResult.print();
}
示例3: main
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String... args) throws Exception {
File txtFile = new File("/tmp/test/file.txt");
File csvFile = new File("/tmp/test/file.csv");
File binFile = new File("/tmp/test/file.bin");
writeToFile(txtFile, "txt");
writeToFile(csvFile, "csv");
writeToFile(binFile, "bin");
final ExecutionEnvironment env =
ExecutionEnvironment.getExecutionEnvironment();
final TextInputFormat format = new TextInputFormat(new Path("/tmp/test"));
GlobFilePathFilter filesFilter = new GlobFilePathFilter(
Collections.singletonList("**"),
Arrays.asList("**/file.bin")
);
System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields()));
format.setFilesFilter(filesFilter);
DataSet<String> result = env.readFile(format, "/tmp");
result.writeAsText("/temp/out");
env.execute("GlobFilePathFilter-Test");
}
示例4: transformation
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
* Data transformation.
* The method group by trackId, sum the number of occurrences, sort the output
* and get the top elements defined by the user.
* @param input
* @return
*/
@Override
public DataSet<ChartsResult> transformation(DataSet<?> input) {
final int limit= pipelineConf.getArgs().getLimit();
log.info("Transformation Phase. Computing the tags");
SortPartitionOperator<Tuple4<Long, Integer, String, TagEvent>> grouped = (SortPartitionOperator<Tuple4<Long, Integer, String, TagEvent>>) input
.groupBy(2, 0) // Grouping by state & trackId
.sum(1) // Sum the occurrences of each grouped item
.sortPartition(2, Order.ASCENDING).setParallelism(1) // Sort by state
.sortPartition(1, Order.DESCENDING).setParallelism(1);// Sort by count
return grouped.reduceGroup(new ReduceLimit(limit, 2)); // Reducing groups applying the limit specified by user
}
示例5: cleansingTest
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
* Test to validate the cleansing method.
* We generate a DataSet with 10 TagEvents and modify 3 items to force bad data
* The assertion checks that only are obtained the proper number of items after the
* cleansing process.
* @throws Exception
*/
@Test
public void cleansingTest() throws Exception {
String args[]= {"-c", "state_chart", "-l", "3"};
argsParser= ArgsParser.builder(args);
PipelineChartsConf pipelineConf= new PipelineChartsConf(config, argsParser);
StateChartsPipeline pipeline= new StateChartsPipeline(pipelineConf);
List<TagEvent> mockCollection= TagEventUtils.getMockData(10);
mockCollection.set(0, new TagEvent(0l, "xxx", "yy","ZZ", "Locality", "United States"));
mockCollection.set(2, new TagEvent(0l, "xxx", "yy","ZZ", "Locality", "UK"));
mockCollection.set(4, new TagEvent(99l, "xxx", "yy","", "", ""));
DataSet<TagEvent> mockDataset= pipeline.getEnv().fromCollection(mockCollection);
DataSet<Tuple4<Long, Integer, String, TagEvent>> clean = pipeline.cleansing(mockDataset);
assertEquals(7, clean.count());
}
示例6: cleansingTest
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
* Test to validate the cleansing method.
* We generate a DataSet with 10 TagEvents and modify 2 items to force bad data
* The assertion checks that only are obtained the proper number of items after the
* cleansing process.
* @throws Exception
*/
@Test
public void cleansingTest() throws Exception {
String args[]= {"-c", "chart", "-l", "3"};
argsParser= ArgsParser.builder(args);
PipelineChartsConf pipelineConf= new PipelineChartsConf(config, argsParser);
SimpleChartsPipeline pipeline= new SimpleChartsPipeline(pipelineConf);
List<TagEvent> mockCollection= TagEventUtils.getMockData(10);
mockCollection.set(0, new TagEvent(0l, "xxx", "yy","zz"));
mockCollection.set(4, new TagEvent(99l, "xxx", "yy",""));
DataSet<TagEvent> mockDataset= pipeline.getEnv().fromCollection(mockCollection);
DataSet<Tuple3<Long, Integer, TagEvent>> clean = pipeline.cleansing(mockDataset);
assertEquals(9, clean.count());
}
示例7: main
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,"
);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new LineSplitter())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// execute and print result
counts.print();
}
示例8: testSelectingMultipleFieldsUsingExpressionLanguage
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testSelectingMultipleFieldsUsingExpressionLanguage() throws Exception {
/*
* selecting multiple fields using expression language
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
ds1.join(ds2).where("nestedPojo.longNumber", "number", "str").equalTo("f6", "f0", "f1");
env.setParallelism(1);
List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
"2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
"3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
compareResultAsTuples(result, expected);
}
示例9: testCoGroupKeyMixing2
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testCoGroupKeyMixing2() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
// should work
try {
ds1.coGroup(ds2)
.where(3)
.equalTo(
new KeySelector<CustomType, Long>() {
@Override
public Long getKey(CustomType value) {
return value.myLong;
}
}
);
} catch (Exception e) {
Assert.fail();
}
}
示例10: testAsFromTupleByName
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testAsFromTupleByName() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
Table table = tableEnv.fromDataSet(CollectionDataSets.get3TupleDataSet(env), "f2");
DataSet<Row> ds = tableEnv.toDataSet(table, Row.class);
List<Row> results = ds.collect();
String expected = "Hi\n" + "Hello\n" + "Hello world\n" +
"Hello world, how are you?\n" + "I am fine.\n" + "Luke Skywalker\n" +
"Comment#1\n" + "Comment#2\n" + "Comment#3\n" + "Comment#4\n" +
"Comment#5\n" + "Comment#6\n" + "Comment#7\n" +
"Comment#8\n" + "Comment#9\n" + "Comment#10\n" +
"Comment#11\n" + "Comment#12\n" + "Comment#13\n" +
"Comment#14\n" + "Comment#15\n";
compareResultAsText(results, expected);
}
示例11: testUnaryFunctionMovingForwardedAnnotation
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testUnaryFunctionMovingForwardedAnnotation() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@SuppressWarnings("unchecked")
DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
input.map(new ShufflingMapper<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
Plan plan = env.createProgramPlan();
GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
FieldSet fw2 = semantics.getForwardingTargetFields(0, 1);
FieldSet fw3 = semantics.getForwardingTargetFields(0, 2);
assertNotNull(fw1);
assertNotNull(fw2);
assertNotNull(fw3);
assertTrue(fw1.contains(2));
assertTrue(fw2.contains(0));
assertTrue(fw3.contains(1));
}
示例12: testProgram
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Integer, String>> left = env.fromElements(
new Tuple2<Integer, String>(1, "hello"),
new Tuple2<Integer, String>(2, "what's"),
new Tuple2<Integer, String>(2, "up")
);
DataSet<Tuple2<Integer, String>> right = env.fromElements(
new Tuple2<Integer, String>(1, "not"),
new Tuple2<Integer, String>(1, "much"),
new Tuple2<Integer, String>(2, "really")
);
DataSet<Tuple2<Integer,String>> joined = left.join(right).where(0).equalTo(0)
.with((t,s,out) -> out.collect(new Tuple2<Integer,String>(t.f0, t.f1 + " " + s.f1)));
joined.writeAsCsv(resultPath);
env.execute();
}
示例13: testWithtuple1Value
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testWithtuple1Value() throws Exception {
/*
* Test mapVertices() and change the value type to a Tuple1
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
TestGraphUtils.getLongLongEdgeData(env), env);
DataSet<Vertex<Long, Tuple1<Long>>> mappedVertices = graph.mapVertices(new ToTuple1Mapper()).getVertices();
List<Vertex<Long, Tuple1<Long>>> result = mappedVertices.collect();
expectedResult = "1,(1)\n" +
"2,(2)\n" +
"3,(3)\n" +
"4,(4)\n" +
"5,(5)\n";
compareResultAsTuples(result, expectedResult);
}
示例14: testInDegrees
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testInDegrees() throws Exception {
/*
* Test inDegrees()
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
TestGraphUtils.getLongLongEdgeData(env), env);
DataSet<Tuple2<Long, LongValue>> data = graph.inDegrees();
List<Tuple2<Long, LongValue>> result = data.collect();
expectedResult = "1,1\n" +
"2,1\n" +
"3,2\n" +
"4,1\n" +
"5,2\n";
compareResultAsTuples(result, expectedResult);
}
示例15: testKeySelection
import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testKeySelection() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Alyssa,1)\n(Charlie,1)\n";
}