本文整理汇总了Java中org.apache.flink.api.java.operators.DistinctOperator类的典型用法代码示例。如果您正苦于以下问题:Java DistinctOperator类的具体用法?Java DistinctOperator怎么用?Java DistinctOperator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DistinctOperator类属于org.apache.flink.api.java.operators包,在下文中一共展示了DistinctOperator类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCombinable
import org.apache.flink.api.java.operators.DistinctOperator; //导入依赖的package包/类
@Test
public void testCombinable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.fromElements("1", "2", "1", "3");
DistinctOperator<String> op = input.distinct(new KeySelector<String, String>() {
public String getKey(String value) { return value; }
});
op.print();
Plan p = env.createProgramPlan();
GroupReduceOperatorBase<?, ?, ?> reduceOp = (GroupReduceOperatorBase<?, ?, ?>) p.getDataSinks().iterator().next().getInput();
Assert.assertTrue(reduceOp.isCombinable());
}
catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例2: testDistinctWithFieldPositionKeyCombinable
import org.apache.flink.api.java.operators.DistinctOperator; //导入依赖的package包/类
@Test
public void testDistinctWithFieldPositionKeyCombinable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
DistinctOperator<Tuple2<String, Double>> reduced = data
.distinct(1).name("reducer");
reduced.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
示例3: distinct
import org.apache.flink.api.java.operators.DistinctOperator; //导入依赖的package包/类
/**
* Returns a distinct set of a {@link DataSet} using a {@link KeySelector} function.
*
* <p>The KeySelector function is called for each element of the DataSet and extracts a single key value on which the
* decision is made if two items are distinct or not.
*
* @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which the
* distinction of the DataSet is decided.
* @return A DistinctOperator that represents the distinct DataSet.
*/
public <K> DistinctOperator<T> distinct(KeySelector<T, K> keyExtractor) {
TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType());
return new DistinctOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), Utils.getCallLocationName());
}
示例4: distinct
import org.apache.flink.api.java.operators.DistinctOperator; //导入依赖的package包/类
/**
* Returns a distinct set of a {@link DataSet} using a {@link KeySelector} function.
* <p/>
* The KeySelector function is called for each element of the DataSet and extracts a single key value on which the
* decision is made if two items are distinct or not.
*
* @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which the
* distinction of the DataSet is decided.
* @return A DistinctOperator that represents the distinct DataSet.
*/
public <K> DistinctOperator<T> distinct(KeySelector<T, K> keyExtractor) {
TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, type);
return new DistinctOperator<T>(this, new Keys.SelectorFunctionKeys<T, K>(keyExtractor, getType(), keyType));
}