当前位置: 首页>>代码示例>>Java>>正文


Java DataSet.getType方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.DataSet.getType方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.getType方法的具体用法?Java DataSet.getType怎么用?Java DataSet.getType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.DataSet的用法示例。


在下文中一共展示了DataSet.getType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testTypeExtraction

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:AvroInputFormatTypeExtractionTest.java

示例2: summarize

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * Summarize a DataSet of Tuples by collecting single pass statistics for all columns.
 *
 * <p>Example usage:
 * <pre>
 * {@code
 * Dataset<Tuple3<Double, String, Boolean>> input = // [...]
 * Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
 *
 * summary.f0.getStandardDeviation()
 * summary.f1.getMaxLength()
 * }
 * </pre>
 * @return the summary as a Tuple the same width as input rows
 */
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
	if (!input.getType().isTupleType()) {
		throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
	}
	final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
	DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {
		@Override
		public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
			TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
			for (Tuple value : values) {
				aggregator.aggregate(value);
			}
			out.collect(aggregator);
		}
	}).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {
		@Override
		public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
			agg1.combine(agg2);
			return agg1;
		}
	});
	return result.collect().get(0).result();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:39,代码来源:DataSetUtils.java

示例3: AggregateOperator

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * Non grouped aggregation.
 */
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input), input.getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:28,代码来源:AggregateOperator.java

示例4: PartitionOperator

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:PartitionOperator.java

示例5: testTypeExtraction

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();


		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:23,代码来源:AvroInputFormatTypeExtractionTest.java

示例6: sampleWithSize

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * Generate a sample of DataSet which contains fixed size elements.
 *
 * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with
 * fraction unless you need exact precision.
 *
 * @param withReplacement Whether element can be selected more than once.
 * @param numSamples       The expected sample size.
 * @param seed            Random number generator seed.
 * @return The sampled DataSet
 */
public static <T> DataSet<T> sampleWithSize(
	DataSet <T> input,
	final boolean withReplacement,
	final int numSamples,
	final long seed) {

	SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed);
	MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition);

	// There is no previous group, so the parallelism of GroupReduceOperator is always 1.
	String callLocation = Utils.getCallLocationName();
	SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed);
	return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:DataSetUtils.java

示例7: createCoGroupOperation

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private <IN1, IN2, OUT> void createCoGroupOperation(PythonOperationInfo info, TypeInformation<OUT> type) {
	DataSet<IN1> op1 = sets.getDataSet(info.parentID);
	DataSet<IN2> op2 = sets.getDataSet(info.otherID);
	Keys.ExpressionKeys<IN1> key1 = new Keys.ExpressionKeys<>(info.keys1, op1.getType());
	Keys.ExpressionKeys<IN2> key2 = new Keys.ExpressionKeys<>(info.keys2, op2.getType());
	PythonCoGroup<IN1, IN2, OUT> pcg = new PythonCoGroup<>(operatorConfig, info.envID, info.setID, type);
	sets.add(info.setID, new CoGroupRawOperator<>(op1, op2, key1, key2, pcg, type, info.name).setParallelism(info.parallelism));
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:9,代码来源:PythonPlanBinder.java

示例8: UnionOperator

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * Create an operator that produces the union of the two given data sets.
 *
 * @param input1 The first data set to be unioned.
 * @param input2 The second data set to be unioned.
 */
public UnionOperator(DataSet<T> input1, DataSet<T> input2, String unionLocationName) {
	super(input1, input2, input1.getType());

	if (!input1.getType().equals(input2.getType())) {
		throw new InvalidProgramException("Cannot union inputs of different types. Input1="
				+ input1.getType() + ", input2=" + input2.getType());
	}

	this.unionLocationName = unionLocationName;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:17,代码来源:UnionOperator.java

示例9: ReduceOperator

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * This is the case for a reduce-all case (in contrast to the reduce-per-group case).
 *
 * @param input
 * @param function
 */
public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function, String defaultName) {
	super(input, input.getType());

	this.function = function;
	this.grouper = null;
	this.defaultName = defaultName;
	this.hint = null;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:15,代码来源:ReduceOperator.java

示例10: DistinctOperator

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) {
	super(input, input.getType());

	this.distinctLocationName = distinctLocationName;

	// if keys is null distinction is done on all fields
	if (keys == null) {
		keys = new Keys.ExpressionKeys<>(input.getType());
	}

	this.keys = keys;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:13,代码来源:DistinctOperator.java

示例11: DeltaIteration

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) {
	initialSolutionSet = solutionSet;
	initialWorkset = workset;
	solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this);
	worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType());
	this.keys = keys;
	this.maxIterations = maxIterations;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:9,代码来源:DeltaIteration.java

示例12: partitionByRange

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * Range-partitions a DataSet on the specified tuple field positions.
 */
public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, int... fields) {
	return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType(), false), distribution, Utils.getCallLocationName());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:7,代码来源:DataSetUtils.java

示例13: DefaultJoin

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public DefaultJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, String joinLocationName, JoinType type) {
	super(input1, input2, keys1, keys2,
		new DefaultFlatJoinFunction<I1, I2>(),
		new TupleTypeInfo<Tuple2<I1, I2>>(input1.getType(), input2.getType()), hint, joinLocationName, type);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:7,代码来源:JoinOperator.java

示例14: JoinProjection

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public JoinProjection(DataSet<I1> ds1, DataSet<I2> ds2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] firstFieldIndexes, int[] secondFieldIndexes) {
	this.ds1 = ds1;
	this.ds2 = ds2;
	this.keys1 = keys1;
	this.keys2 = keys2;
	this.hint = hint;

	boolean isFirstTuple;
	boolean isSecondTuple;

	if (ds1.getType() instanceof TupleTypeInfo) {
		numFieldsDs1 = ds1.getType().getArity();
		isFirstTuple = true;
	} else {
		numFieldsDs1 = 1;
		isFirstTuple = false;
	}
	if (ds2.getType() instanceof TupleTypeInfo) {
		numFieldsDs2 = ds2.getType().getArity();
		isSecondTuple = true;
	} else {
		numFieldsDs2 = 1;
		isSecondTuple = false;
	}

	boolean isTuple;
	boolean firstInput;

	if (firstFieldIndexes != null && secondFieldIndexes == null) {
		// index array for first input is provided
		firstInput = true;
		isTuple = isFirstTuple;
		this.fieldIndexes = firstFieldIndexes;

		if (this.fieldIndexes.length == 0) {
			// no indexes provided, treat tuple as regular object
			isTuple = false;
		}
	} else if (firstFieldIndexes == null && secondFieldIndexes != null) {
		// index array for second input is provided
		firstInput = false;
		isTuple = isSecondTuple;
		this.fieldIndexes = secondFieldIndexes;

		if (this.fieldIndexes.length == 0) {
			// no indexes provided, treat tuple as regular object
			isTuple = false;
		}
	} else if (firstFieldIndexes == null && secondFieldIndexes == null) {
		throw new IllegalArgumentException("You must provide at least one field index array.");
	} else {
		throw new IllegalArgumentException("You must provide at most one field index array.");
	}

	if (!isTuple && this.fieldIndexes.length != 0) {
		// field index provided for non-Tuple input
		throw new IllegalArgumentException("Input is not a Tuple. Call projectFirst() (or projectSecond()) without arguments to include it.");
	} else if (this.fieldIndexes.length > 22) {
		throw new IllegalArgumentException("You may select only up to twenty-two (22) fields.");
	}

	if (isTuple) {
		this.isFieldInFirst = new boolean[this.fieldIndexes.length];

		// check field indexes and adapt to position in tuple
		int maxFieldIndex = firstInput ? numFieldsDs1 : numFieldsDs2;
		for (int i = 0; i < this.fieldIndexes.length; i++) {
			Preconditions.checkElementIndex(this.fieldIndexes[i], maxFieldIndex);

			if (firstInput) {
				this.isFieldInFirst[i] = true;
			} else {
				this.isFieldInFirst[i] = false;
			}
		}
	} else {
		this.isFieldInFirst = new boolean[]{firstInput};
		this.fieldIndexes = new int[]{-1};
	}

}
 
开发者ID:axbaretto,项目名称:flink,代码行数:82,代码来源:JoinOperator.java

示例15: translateDistinctKeySelector

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void translateDistinctKeySelector() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {
			public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
				return value.f1;
			}
		}).setParallelism(4).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		MapOperatorBase<?, ?, ?> keyRemover = (MapOperatorBase<?, ?, ?>) sink.getInput();
		PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyRemover.getInput();
		MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();

		// check the parallelisms
		assertEquals(1, keyExtractor.getParallelism());
		assertEquals(4, reducer.getParallelism());

		// check types
		TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(
				new ValueTypeInfo<StringValue>(StringValue.class),
				initialData.getType());

		assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
		assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());

		assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
		assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());

		assertEquals(keyValueInfo, keyRemover.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), keyRemover.getOperatorInfo().getOutputType());

		// check keys
		assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());

		assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:52,代码来源:DistinctTranslationTest.java


注:本文中的org.apache.flink.api.java.DataSet.getType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。