本文整理汇总了Java中org.apache.flink.api.java.DataSet.getType方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.getType方法的具体用法?Java DataSet.getType怎么用?Java DataSet.getType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.getType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testTypeExtraction
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeExtraction() {
try {
InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);
TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<MyAvroType> input = env.createInput(format);
TypeInformation<?> typeInfoDataSet = input.getType();
Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);
Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例2: summarize
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* Summarize a DataSet of Tuples by collecting single pass statistics for all columns.
*
* <p>Example usage:
* <pre>
* {@code
* Dataset<Tuple3<Double, String, Boolean>> input = // [...]
* Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
*
* summary.f0.getStandardDeviation()
* summary.f1.getMaxLength()
* }
* </pre>
* @return the summary as a Tuple the same width as input rows
*/
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
if (!input.getType().isTupleType()) {
throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
}
final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {
@Override
public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
for (Tuple value : values) {
aggregator.aggregate(value);
}
out.collect(aggregator);
}
}).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {
@Override
public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
agg1.combine(agg2);
return agg1;
}
});
return result.collect().get(0).result();
}
示例3: AggregateOperator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* Non grouped aggregation.
*/
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
super(Preconditions.checkNotNull(input), input.getType());
Preconditions.checkNotNull(function);
this.aggregateLocationName = aggregateLocationName;
if (!input.getType().isTupleType()) {
throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
}
TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
if (field < 0 || field >= inType.getArity()) {
throw new IllegalArgumentException("Aggregation field position is out of range.");
}
AggregationFunctionFactory factory = function.getFactory();
AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
// this is the first aggregation operator after a regular data set (non grouped aggregation)
this.aggregationFunctions.add(aggFunct);
this.fields.add(field);
this.grouping = null;
}
示例4: PartitionOperator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
super(input, input.getType());
Preconditions.checkNotNull(pMethod);
Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");
if (distribution != null) {
Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
"The types of the flat key fields must be equal to the types of the fields of the distribution.");
}
if (customPartitioner != null) {
pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
}
this.pMethod = pMethod;
this.pKeys = pKeys;
this.partitionLocationName = partitionLocationName;
this.customPartitioner = customPartitioner;
this.distribution = distribution;
}
示例5: testTypeExtraction
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeExtraction() {
try {
InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);
TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<MyAvroType> input = env.createInput(format);
TypeInformation<?> typeInfoDataSet = input.getType();
Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);
Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例6: sampleWithSize
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* Generate a sample of DataSet which contains fixed size elements.
*
* <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with
* fraction unless you need exact precision.
*
* @param withReplacement Whether element can be selected more than once.
* @param numSamples The expected sample size.
* @param seed Random number generator seed.
* @return The sampled DataSet
*/
public static <T> DataSet<T> sampleWithSize(
DataSet <T> input,
final boolean withReplacement,
final int numSamples,
final long seed) {
SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed);
MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition);
// There is no previous group, so the parallelism of GroupReduceOperator is always 1.
String callLocation = Utils.getCallLocationName();
SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed);
return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation);
}
示例7: createCoGroupOperation
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private <IN1, IN2, OUT> void createCoGroupOperation(PythonOperationInfo info, TypeInformation<OUT> type) {
DataSet<IN1> op1 = sets.getDataSet(info.parentID);
DataSet<IN2> op2 = sets.getDataSet(info.otherID);
Keys.ExpressionKeys<IN1> key1 = new Keys.ExpressionKeys<>(info.keys1, op1.getType());
Keys.ExpressionKeys<IN2> key2 = new Keys.ExpressionKeys<>(info.keys2, op2.getType());
PythonCoGroup<IN1, IN2, OUT> pcg = new PythonCoGroup<>(operatorConfig, info.envID, info.setID, type);
sets.add(info.setID, new CoGroupRawOperator<>(op1, op2, key1, key2, pcg, type, info.name).setParallelism(info.parallelism));
}
示例8: UnionOperator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* Create an operator that produces the union of the two given data sets.
*
* @param input1 The first data set to be unioned.
* @param input2 The second data set to be unioned.
*/
public UnionOperator(DataSet<T> input1, DataSet<T> input2, String unionLocationName) {
super(input1, input2, input1.getType());
if (!input1.getType().equals(input2.getType())) {
throw new InvalidProgramException("Cannot union inputs of different types. Input1="
+ input1.getType() + ", input2=" + input2.getType());
}
this.unionLocationName = unionLocationName;
}
示例9: ReduceOperator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* This is the case for a reduce-all case (in contrast to the reduce-per-group case).
*
* @param input
* @param function
*/
public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function, String defaultName) {
super(input, input.getType());
this.function = function;
this.grouper = null;
this.defaultName = defaultName;
this.hint = null;
}
示例10: DistinctOperator
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) {
super(input, input.getType());
this.distinctLocationName = distinctLocationName;
// if keys is null distinction is done on all fields
if (keys == null) {
keys = new Keys.ExpressionKeys<>(input.getType());
}
this.keys = keys;
}
示例11: DeltaIteration
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) {
initialSolutionSet = solutionSet;
initialWorkset = workset;
solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this);
worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType());
this.keys = keys;
this.maxIterations = maxIterations;
}
示例12: partitionByRange
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* Range-partitions a DataSet on the specified tuple field positions.
*/
public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, int... fields) {
return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType(), false), distribution, Utils.getCallLocationName());
}
示例13: DefaultJoin
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DefaultJoin(DataSet<I1> input1, DataSet<I2> input2,
Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, String joinLocationName, JoinType type) {
super(input1, input2, keys1, keys2,
new DefaultFlatJoinFunction<I1, I2>(),
new TupleTypeInfo<Tuple2<I1, I2>>(input1.getType(), input2.getType()), hint, joinLocationName, type);
}
示例14: JoinProjection
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public JoinProjection(DataSet<I1> ds1, DataSet<I2> ds2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] firstFieldIndexes, int[] secondFieldIndexes) {
this.ds1 = ds1;
this.ds2 = ds2;
this.keys1 = keys1;
this.keys2 = keys2;
this.hint = hint;
boolean isFirstTuple;
boolean isSecondTuple;
if (ds1.getType() instanceof TupleTypeInfo) {
numFieldsDs1 = ds1.getType().getArity();
isFirstTuple = true;
} else {
numFieldsDs1 = 1;
isFirstTuple = false;
}
if (ds2.getType() instanceof TupleTypeInfo) {
numFieldsDs2 = ds2.getType().getArity();
isSecondTuple = true;
} else {
numFieldsDs2 = 1;
isSecondTuple = false;
}
boolean isTuple;
boolean firstInput;
if (firstFieldIndexes != null && secondFieldIndexes == null) {
// index array for first input is provided
firstInput = true;
isTuple = isFirstTuple;
this.fieldIndexes = firstFieldIndexes;
if (this.fieldIndexes.length == 0) {
// no indexes provided, treat tuple as regular object
isTuple = false;
}
} else if (firstFieldIndexes == null && secondFieldIndexes != null) {
// index array for second input is provided
firstInput = false;
isTuple = isSecondTuple;
this.fieldIndexes = secondFieldIndexes;
if (this.fieldIndexes.length == 0) {
// no indexes provided, treat tuple as regular object
isTuple = false;
}
} else if (firstFieldIndexes == null && secondFieldIndexes == null) {
throw new IllegalArgumentException("You must provide at least one field index array.");
} else {
throw new IllegalArgumentException("You must provide at most one field index array.");
}
if (!isTuple && this.fieldIndexes.length != 0) {
// field index provided for non-Tuple input
throw new IllegalArgumentException("Input is not a Tuple. Call projectFirst() (or projectSecond()) without arguments to include it.");
} else if (this.fieldIndexes.length > 22) {
throw new IllegalArgumentException("You may select only up to twenty-two (22) fields.");
}
if (isTuple) {
this.isFieldInFirst = new boolean[this.fieldIndexes.length];
// check field indexes and adapt to position in tuple
int maxFieldIndex = firstInput ? numFieldsDs1 : numFieldsDs2;
for (int i = 0; i < this.fieldIndexes.length; i++) {
Preconditions.checkElementIndex(this.fieldIndexes[i], maxFieldIndex);
if (firstInput) {
this.isFieldInFirst[i] = true;
} else {
this.isFieldInFirst[i] = false;
}
}
} else {
this.isFieldInFirst = new boolean[]{firstInput};
this.fieldIndexes = new int[]{-1};
}
}
示例15: translateDistinctKeySelector
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void translateDistinctKeySelector() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.distinct(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {
public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
return value.f1;
}
}).setParallelism(4).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
MapOperatorBase<?, ?, ?> keyRemover = (MapOperatorBase<?, ?, ?>) sink.getInput();
PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyRemover.getInput();
MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();
// check the parallelisms
assertEquals(1, keyExtractor.getParallelism());
assertEquals(4, reducer.getParallelism());
// check types
TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(
new ValueTypeInfo<StringValue>(StringValue.class),
initialData.getType());
assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, keyRemover.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), keyRemover.getOperatorInfo().getOutputType());
// check keys
assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());
assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}