本文整理汇总了Java中org.apache.flink.api.java.aggregation.Aggregations类的典型用法代码示例。如果您正苦于以下问题:Java Aggregations类的具体用法?Java Aggregations怎么用?Java Aggregations使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Aggregations类属于org.apache.flink.api.java.aggregation包,在下文中一共展示了Aggregations类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ScalaAggregateOperator
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
/**
* Non grouped aggregation.
*/
public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) {
super(Preconditions.checkNotNull(input), input.getType());
Preconditions.checkNotNull(function);
if (!input.getType().isTupleType()) {
throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
}
TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
if (field < 0 || field >= inType.getArity()) {
throw new IllegalArgumentException("Aggregation field position is out of range.");
}
AggregationFunctionFactory factory = function.getFactory();
AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
// this is the first aggregation operator after a regular data set (non grouped aggregation)
this.aggregationFunctions.add(aggFunct);
this.fields.add(field);
this.grouping = null;
}
示例2: and
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
Preconditions.checkNotNull(function);
TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();
if (field < 0 || field >= inType.getArity()) {
throw new IllegalArgumentException("Aggregation field position is out of range.");
}
AggregationFunctionFactory factory = function.getFactory();
AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
this.aggregationFunctions.add(aggFunct);
this.fields.add(field);
return this;
}
示例3: testFullAggregate
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testFullAggregate() throws Exception {
/*
* Full Aggregate
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<Integer, Long>> aggregateDs = ds
.aggregate(Aggregations.SUM, 0)
.and(Aggregations.MAX, 1)
.project(0, 1);
List<Tuple2<Integer, Long>> result = aggregateDs.collect();
String expected = "231,6\n";
compareResultAsTuples(result, expected);
}
示例4: testFullAggregateOfMutableValueTypes
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testFullAggregateOfMutableValueTypes() throws Exception {
/*
* Full Aggregate of mutable value types
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds
.aggregate(Aggregations.SUM, 0)
.and(Aggregations.MAX, 1)
.project(0, 1);
List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();
String expected = "231,6\n";
compareResultAsTuples(result, expected);
}
示例5: testGroupedAggregate
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testGroupedAggregate() throws Exception {
/*
* Grouped Aggregate
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
.aggregate(Aggregations.SUM, 0)
.project(1, 0);
List<Tuple2<Long, Integer>> result = aggregateDs.collect();
String expected = "1,1\n" +
"2,5\n" +
"3,15\n" +
"4,34\n" +
"5,65\n" +
"6,111\n";
compareResultAsTuples(result, expected);
}
示例6: testGroupedAggregateOfMutableValueTypes
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
/*
* Grouped Aggregate of mutable value types
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
.aggregate(Aggregations.SUM, 0)
.project(1, 0);
List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();
String expected = "1,1\n" +
"2,5\n" +
"3,15\n" +
"4,34\n" +
"5,65\n" +
"6,111\n";
compareResultAsTuples(result, expected);
}
示例7: testNestedAggregate
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testNestedAggregate() throws Exception {
/*
* Nested Aggregate
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
.aggregate(Aggregations.MIN, 0)
.aggregate(Aggregations.MIN, 0)
.project(0);
List<Tuple1<Integer>> result = aggregateDs.collect();
String expected = "1\n";
compareResultAsTuples(result, expected);
}
示例8: testNestedAggregateOfMutableValueTypes
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testNestedAggregateOfMutableValueTypes() throws Exception {
/*
* Nested Aggregate of mutable value types
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple1<IntValue>> aggregateDs = ds.groupBy(1)
.aggregate(Aggregations.MIN, 0)
.aggregate(Aggregations.MIN, 0)
.project(0);
List<Tuple1<IntValue>> result = aggregateDs.collect();
String expected = "1\n";
compareResultAsTuples(result, expected);
}
示例9: testProgram
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例10: AggregateOperator
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
/**
* Non grouped aggregation.
*/
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
super(Preconditions.checkNotNull(input), input.getType());
Preconditions.checkNotNull(function);
this.aggregateLocationName = aggregateLocationName;
if (!input.getType().isTupleType()) {
throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
}
TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
if (field < 0 || field >= inType.getArity()) {
throw new IllegalArgumentException("Aggregation field position is out of range.");
}
AggregationFunctionFactory factory = function.getFactory();
AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
// this is the first aggregation operator after a regular data set (non grouped aggregation)
this.aggregationFunctions.add(aggFunct);
this.fields.add(field);
this.grouping = null;
}
示例11: and
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
public AggregateOperator<IN> and(Aggregations function, int field) {
Preconditions.checkNotNull(function);
TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();
if (field < 0 || field >= inType.getArity()) {
throw new IllegalArgumentException("Aggregation field position is out of range.");
}
AggregationFunctionFactory factory = function.getFactory();
AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
this.aggregationFunctions.add(aggFunct);
this.fields.add(field);
return this;
}
示例12: testAggregationTypes
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
@Test
public void testAggregationTypes() {
try {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
// should work: multiple aggregates
tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);
// should work: nested aggregates
tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);
// should not work: average on string
try {
tupleDs.aggregate(Aggregations.SUM, 2);
Assert.fail();
} catch (UnsupportedAggregationTypeException iae) {
// we're good here
}
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例13: main
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例14: main
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
if (!parseParameters(args)) {
return;
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<DataDim> dataDims = getDataDimDataSet(env).map(new MapDataDim());
DataSet<Item> item = getItemDataSet(env).map(new MapItem());
DataSet<StoreSales> storeSales = getStoreSalesDataSet(env).map(new MapStoreSales());
dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales())
.join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems())
.groupBy(1, 0).aggregate(Aggregations.SUM, 2)
.print();
// execute program
env.execute("TPC-DS Query 55 Example with Parquet input");
System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
}
示例15: main
import org.apache.flink.api.java.aggregation.Aggregations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple4<Short, Short, Integer, Double>> measurements =
env.readCsvFile(Config.pathTo("temperatures.tsv"))
.fieldDelimiter('\t')
.types(Short.class, Short.class, Integer.class, Double.class);
DataSet<Tuple2<Short, Integer>> maxTemperatures =
measurements.groupBy(YEAR_FIELD)
.aggregate(Aggregations.MAX, TEMPERATURE_FIELD)
.project(YEAR_FIELD, TEMPERATURE_FIELD);
maxTemperatures.writeAsCsv(Config.outputPathTo("maximumTemperatures"), FileSystem.WriteMode.OVERWRITE);
env.execute();
}