本文整理汇总了Java中org.apache.flink.api.java.DataSet.groupBy方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.groupBy方法的具体用法?Java DataSet.groupBy怎么用?Java DataSet.groupBy使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.groupBy方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testForkingReduceOnKeyedDataset
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testForkingReduceOnKeyedDataset() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// creates the input data and distributes them evenly among the available downstream tasks
DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0);
DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
String expected = "k1,6,true\n" +
"k2,4,true\n" +
"k1,6,true\n" +
"k2,4,true\n";
compareResultAsTuples(actual, expected);
}
示例2: testForkingReduceOnKeyedDatasetWithSelection
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testForkingReduceOnKeyedDatasetWithSelection() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// creates the input data and distributes them evenly among the available downstream tasks
DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(new KeySelectorX());
DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
String expected = "k1,6,true\n" +
"k2,4,true\n" +
"k1,6,true\n" +
"k2,4,true\n";
compareResultAsTuples(actual, expected);
}
示例3: testGroupByKeyExpressions1
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testGroupByKeyExpressions1() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
this.customTypeData.add(new CustomType());
DataSet<CustomType> ds = env.fromCollection(customTypeData);
// should work
try {
ds.groupBy("myInt");
} catch (Exception e) {
Assert.fail();
}
}
示例4: testGroupByKeyExpressions1Nested
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testGroupByKeyExpressions1Nested() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
this.customTypeData.add(new CustomType());
DataSet<CustomType> ds = env.fromCollection(customTypeData);
// should work
try {
ds.groupBy("nested.myInt");
} catch (Exception e) {
Assert.fail();
}
}
示例5: testGroupByKeySelector1
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector1() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
this.customTypeData.add(new CustomType());
try {
DataSet<CustomType> customDs = env.fromCollection(customTypeData);
// should work
customDs.groupBy(
new KeySelector<GroupingTest.CustomType, Long>() {
@Override
public Long getKey(CustomType value) {
return value.myLong;
}
});
} catch (Exception e) {
Assert.fail();
}
}
示例6: testGroupByKeySelector2
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector2() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
this.customTypeData.add(new CustomType());
try {
DataSet<CustomType> customDs = env.fromCollection(customTypeData);
// should work
customDs.groupBy(
new KeySelector<GroupingTest.CustomType, Tuple2<Integer, Long>>() {
@Override
public Tuple2<Integer, Long> getKey(CustomType value) {
return new Tuple2<Integer, Long>(value.myInt, value.myLong);
}
});
} catch (Exception e) {
Assert.fail();
}
}
示例7: testGroupByKeySelector3
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector3() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
this.customTypeData.add(new CustomType());
try {
DataSet<CustomType> customDs = env.fromCollection(customTypeData);
// should not work
customDs.groupBy(
new KeySelector<GroupingTest.CustomType, CustomType>() {
@Override
public CustomType getKey(CustomType value) {
return value;
}
});
} catch (Exception e) {
Assert.fail();
}
}
示例8: testGroupAtomicTypeWithInvalid1
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid1() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3);
dataSet.groupBy("*", "invalidField");
}
示例9: testGroupByKeyFields1
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testGroupByKeyFields1() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
// should work
try {
tupleDs.groupBy(0);
} catch (Exception e) {
Assert.fail();
}
}
示例10: testGroupByKeyFields2
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testGroupByKeyFields2() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO);
// should not work: groups on basic type
longDs.groupBy(0);
}
示例11: testGroupByKeyFields4
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = IndexOutOfBoundsException.class)
public void testGroupByKeyFields4() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
// should not work, key out of tuple bounds
tupleDs.groupBy(5);
}
示例12: testGroupByKeyFields5
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = IndexOutOfBoundsException.class)
public void testGroupByKeyFields5() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
// should not work, negative field position
tupleDs.groupBy(-1);
}
示例13: testGroupByKeyFieldsOnPrimitiveArray
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testGroupByKeyFieldsOnPrimitiveArray() {
this.byteArrayData.add(new Tuple2(new byte[]{0}, new byte[]{1}));
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<byte[], byte[]>> tupleDs = env.fromCollection(byteArrayData);
tupleDs.groupBy(0);
}
示例14: testGroupingAtomicType
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testGroupingAtomicType() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> dataSet = env.fromElements(0, 1, 1, 2, 0, 0);
dataSet.groupBy("*");
}
示例15: testGroupAtomicTypeWithInvalid3
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid3() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<ArrayList<Integer>> dataSet = env.fromElements(new ArrayList<Integer>());
dataSet.groupBy("*");
}