当前位置: 首页>>代码示例>>Java>>正文


Java DataSource类代码示例

本文整理汇总了Java中org.apache.flink.api.java.operators.DataSource的典型用法代码示例。如果您正苦于以下问题:Java DataSource类的具体用法?Java DataSource怎么用?Java DataSource使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DataSource类属于org.apache.flink.api.java.operators包,在下文中一共展示了DataSource类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testAggregationWithTwoCount

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testAggregationWithTwoCount() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSource<Tuple2<Float, String>> input =
		env.fromElements(
			new Tuple2<>(1f, "Hello"),
			new Tuple2<>(2f, "Ciao"));

	Table table =
		tableEnv.fromDataSet(input);

	Table result =
		table.select("f0.count, f1.count");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "2,2";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:AggregationsITCase.java

示例2: generate

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Override
public Graph<LongValue, NullValue, NullValue> generate() {
	Preconditions.checkState(vertexCount >= 0);

	// Vertices
	DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);

	// Edges
	DataSource<Edge<LongValue, NullValue>> edges = env
		.fromCollection(Collections.<Edge<LongValue, NullValue>>emptyList(), TypeInformation.of(new TypeHint<Edge<LongValue, NullValue>>(){}))
			.setParallelism(parallelism)
			.name("Empty edge set");

	// Graph
	return Graph.fromDataSet(vertices, edges, env);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:17,代码来源:EmptyGraph.java

示例3: main

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

        // parse parameters
        ParameterTool params = ParameterTool.fromArgs(args);
        // path to ratings.csv file
        String ratingsCsvPath = params.getRequired("input");

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        DataSource<String> file = env.readTextFile(ratingsCsvPath);
        file.flatMap(new ExtractRating())
            .groupBy(0)
            // .reduceGroup(new SumRatingCount())
            .sum(1)
            .print();
    }
 
开发者ID:mushketyk,项目名称:flink-examples,代码行数:17,代码来源:RatingsDistribution.java

示例4: testRangePartitionerOnSequenceData

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSource<Long> dataSource = env.generateSequence(0, 10000);
	KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();

	MapPartitionFunction<Long, Tuple2<Long, Long>> MinMaxSelector = new MinMaxSelector<>(new LongComparator(true));

	Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));

	List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(MinMaxSelector).collect();
	Collections.sort(collected, tuple2Comparator);

	long previousMax = -1;
	for (Tuple2<Long, Long> tuple2 : collected) {
		if (previousMax == -1) {
			previousMax = tuple2.f1;
		} else {
			long currentMin = tuple2.f0;
			assertTrue(tuple2.f0 < tuple2.f1);
			assertEquals(previousMax + 1, currentMin);
			previousMax = tuple2.f1;
		}
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:PartitionITCase.java

示例5: testRangePartitionerOnSequenceData

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSource<Long> dataSource = env.generateSequence(0, 10000);
	KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();

	MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true));

	Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));

	List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect();
	Collections.sort(collected, tuple2Comparator);

	long previousMax = -1;
	for (Tuple2<Long, Long> tuple2 : collected) {
		if (previousMax == -1) {
			previousMax = tuple2.f1;
		} else {
			long currentMin = tuple2.f0;
			assertTrue(tuple2.f0 < tuple2.f1);
			assertEquals(previousMax + 1, currentMin);
			previousMax = tuple2.f1;
		}
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:PartitionITCase.java

示例6: readFile

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
	if (inputFormat == null) {
		throw new IllegalArgumentException("InputFormat must not be null.");
	}
	if (filePath == null) {
		throw new IllegalArgumentException("The file path must not be null.");
	}

	inputFormat.setFilePath(new Path(filePath));
	try {
		return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
	}
	catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
				"Please specify the TypeInformation of the produced type explicitly by using the " +
				"'createInput(InputFormat, TypeInformation)' method instead.");
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:19,代码来源:ExecutionEnvironment.java

示例7: fromElements

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
/**
 * Creates a new data set that contains the given elements. The elements must all be of the same type,
 * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(data[0]);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)");
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:36,代码来源:ExecutionEnvironment.java

示例8: tupleType

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
/**
 * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
 * {@link Tuple}. The type information for the fields is obtained from the type class. The type
 * consequently needs to specify all generic field types of the tuple.
 *
 * @param targetType The class of the target type, needs to be a subclass of Tuple.
 * @return The DataSet representing the parsed CSV data.
 */
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
	Preconditions.checkNotNull(targetType, "The target type class must not be null.");
	if (!Tuple.class.isAssignableFrom(targetType)) {
		throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
	}

	@SuppressWarnings("unchecked")
	TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
	CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);

	Class<?>[] classes = new Class<?>[typeInfo.getArity()];
	for (int i = 0; i < typeInfo.getArity(); i++) {
		classes[i] = typeInfo.getTypeAt(i).getTypeClass();
	}

	configureInputFormat(inputFormat);
	return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:27,代码来源:CsvReader.java

示例9: testFieldTypes

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testFieldTypes() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<Item> items = reader.tupleType(Item.class);

	TypeInformation<?> info = items.getType();
	if (!info.isTupleType()) {
		Assert.fail();
	} else {
		TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info;
		Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
		Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	}

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:CSVReaderTest.java

示例10: testSubClass

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:CSVReaderTest.java

示例11: testSubClassWithPartialsInHierarchie

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:24,代码来源:CSVReaderTest.java

示例12: testNumericAutocastInArithmetic

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testNumericAutocastInArithmetic() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSource<Tuple8<Byte, Short, Integer, Long, Float, Double, Long, Double>> input =
			env.fromElements(new Tuple8<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d, 1L, 1001.1));

	Table table =
			tableEnv.fromDataSet(input);

	Table result = table.select("f0 + 1, f1 +" +
			" 1, f2 + 1L, f3 + 1.0f, f4 + 1.0d, f5 + 1, f6 + 1.0d, f7 + f0");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "2,2,2,2.0,2.0,2.0,2.0,1002.1";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:CastingITCase.java

示例13: testNumericAutocastInComparison

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testNumericAutocastInComparison() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSource<Tuple6<Byte, Short, Integer, Long, Float, Double>> input =
			env.fromElements(
					new Tuple6<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d),
					new Tuple6<>((byte) 2, (short) 2, 2, 2L, 2.0f, 2.0d));

	Table table =
			tableEnv.fromDataSet(input, "a,b,c,d,e,f");

	Table result = table
			.filter("a > 1 && b > 1 && c > 1L && d > 1.0f && e > 1.0d && f > 1");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "2,2,2,2,2.0,2.0";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:CastingITCase.java

示例14: testCastFromString

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testCastFromString() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSource<Tuple3<String, String, String>> input =
			env.fromElements(new Tuple3<>("1", "true", "2.0"));

	Table table =
			tableEnv.fromDataSet(input);

	Table result = table.select(
			"f0.cast(BYTE), f0.cast(SHORT), f0.cast(INT), f0.cast(LONG), f2.cast(DOUBLE), f2.cast(FLOAT), f1.cast(BOOL)");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "1,1,1,1,2.0,2.0,true\n";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:CastingITCase.java

示例15: testWorkingAggregationDataTypes

import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testWorkingAggregationDataTypes() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	DataSource<Tuple7<Byte, Short, Integer, Long, Float, Double, String>> input =
			env.fromElements(
					new Tuple7<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d, "Hello"),
					new Tuple7<>((byte) 2, (short) 2, 2, 2L, 2.0f, 2.0d, "Ciao"));

	Table table = tableEnv.fromDataSet(input);

	Table result =
			table.select("f0.avg, f1.avg, f2.avg, f3.avg, f4.avg, f5.avg, f6.count");

	DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
	List<Row> results = ds.collect();
	String expected = "1,1,1,1,1.5,1.5,2";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:AggregationsITCase.java


注:本文中的org.apache.flink.api.java.operators.DataSource类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。