本文整理汇总了Java中org.apache.flink.api.java.operators.DataSource类的典型用法代码示例。如果您正苦于以下问题:Java DataSource类的具体用法?Java DataSource怎么用?Java DataSource使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataSource类属于org.apache.flink.api.java.operators包,在下文中一共展示了DataSource类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testAggregationWithTwoCount
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testAggregationWithTwoCount() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
DataSource<Tuple2<Float, String>> input =
env.fromElements(
new Tuple2<>(1f, "Hello"),
new Tuple2<>(2f, "Ciao"));
Table table =
tableEnv.fromDataSet(input);
Table result =
table.select("f0.count, f1.count");
DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
List<Row> results = ds.collect();
String expected = "2,2";
compareResultAsText(results, expected);
}
示例2: generate
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Override
public Graph<LongValue, NullValue, NullValue> generate() {
Preconditions.checkState(vertexCount >= 0);
// Vertices
DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
// Edges
DataSource<Edge<LongValue, NullValue>> edges = env
.fromCollection(Collections.<Edge<LongValue, NullValue>>emptyList(), TypeInformation.of(new TypeHint<Edge<LongValue, NullValue>>(){}))
.setParallelism(parallelism)
.name("Empty edge set");
// Graph
return Graph.fromDataSet(vertices, edges, env);
}
示例3: main
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// parse parameters
ParameterTool params = ParameterTool.fromArgs(args);
// path to ratings.csv file
String ratingsCsvPath = params.getRequired("input");
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> file = env.readTextFile(ratingsCsvPath);
file.flatMap(new ExtractRating())
.groupBy(0)
// .reduceGroup(new SumRatingCount())
.sum(1)
.print();
}
示例4: testRangePartitionerOnSequenceData
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> dataSource = env.generateSequence(0, 10000);
KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();
MapPartitionFunction<Long, Tuple2<Long, Long>> MinMaxSelector = new MinMaxSelector<>(new LongComparator(true));
Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));
List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(MinMaxSelector).collect();
Collections.sort(collected, tuple2Comparator);
long previousMax = -1;
for (Tuple2<Long, Long> tuple2 : collected) {
if (previousMax == -1) {
previousMax = tuple2.f1;
} else {
long currentMin = tuple2.f0;
assertTrue(tuple2.f0 < tuple2.f1);
assertEquals(previousMax + 1, currentMin);
previousMax = tuple2.f1;
}
}
}
示例5: testRangePartitionerOnSequenceData
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> dataSource = env.generateSequence(0, 10000);
KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();
MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true));
Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));
List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect();
Collections.sort(collected, tuple2Comparator);
long previousMax = -1;
for (Tuple2<Long, Long> tuple2 : collected) {
if (previousMax == -1) {
previousMax = tuple2.f1;
} else {
long currentMin = tuple2.f0;
assertTrue(tuple2.f0 < tuple2.f1);
assertEquals(previousMax + 1, currentMin);
previousMax = tuple2.f1;
}
}
}
示例6: readFile
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
if (inputFormat == null) {
throw new IllegalArgumentException("InputFormat must not be null.");
}
if (filePath == null) {
throw new IllegalArgumentException("The file path must not be null.");
}
inputFormat.setFilePath(new Path(filePath));
try {
return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
}
catch (Exception e) {
throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
"Please specify the TypeInformation of the produced type explicitly by using the " +
"'createInput(InputFormat, TypeInformation)' method instead.");
}
}
示例7: fromElements
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
/**
* Creates a new data set that contains the given elements. The elements must all be of the same type,
* for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
*
* <p>The framework will try and determine the exact type from the collection elements.
* In case of generic elements, it may be necessary to manually supply the type information
* via {@link #fromCollection(Collection, TypeInformation)}.
*
* <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
* a parallelism of one.
*
* @param data The elements to make up the data set.
* @return A DataSet representing the given list of elements.
*/
@SafeVarargs
public final <X> DataSource<X> fromElements(X... data) {
if (data == null) {
throw new IllegalArgumentException("The data must not be null.");
}
if (data.length == 0) {
throw new IllegalArgumentException("The number of elements must not be zero.");
}
TypeInformation<X> typeInfo;
try {
typeInfo = TypeExtractor.getForObject(data[0]);
}
catch (Exception e) {
throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
+ "; please specify the TypeInformation manually via "
+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)");
}
return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
示例8: tupleType
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
/**
* Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
* {@link Tuple}. The type information for the fields is obtained from the type class. The type
* consequently needs to specify all generic field types of the tuple.
*
* @param targetType The class of the target type, needs to be a subclass of Tuple.
* @return The DataSet representing the parsed CSV data.
*/
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
Preconditions.checkNotNull(targetType, "The target type class must not be null.");
if (!Tuple.class.isAssignableFrom(targetType)) {
throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
}
@SuppressWarnings("unchecked")
TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);
Class<?>[] classes = new Class<?>[typeInfo.getArity()];
for (int i = 0; i < typeInfo.getArity(); i++) {
classes[i] = typeInfo.getTypeAt(i).getTypeClass();
}
configureInputFormat(inputFormat);
return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}
示例9: testFieldTypes
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testFieldTypes() throws Exception {
CsvReader reader = getCsvReader();
DataSource<Item> items = reader.tupleType(Item.class);
TypeInformation<?> info = items.getType();
if (!info.isTupleType()) {
Assert.fail();
} else {
TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info;
Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));
}
CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat();
Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
示例10: testSubClass
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testSubClass() throws Exception {
CsvReader reader = getCsvReader();
DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
TypeInformation<?> info = sitems.getType();
Assert.assertEquals(true, info.isTupleType());
Assert.assertEquals(SubItem.class, info.getTypeClass());
@SuppressWarnings("unchecked")
TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;
Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));
CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
示例11: testSubClassWithPartialsInHierarchie
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
CsvReader reader = getCsvReader();
DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
TypeInformation<?> info = sitems.getType();
Assert.assertEquals(true, info.isTupleType());
Assert.assertEquals(FinalItem.class, info.getTypeClass());
@SuppressWarnings("unchecked")
TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;
Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());
CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
示例12: testNumericAutocastInArithmetic
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testNumericAutocastInArithmetic() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
DataSource<Tuple8<Byte, Short, Integer, Long, Float, Double, Long, Double>> input =
env.fromElements(new Tuple8<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d, 1L, 1001.1));
Table table =
tableEnv.fromDataSet(input);
Table result = table.select("f0 + 1, f1 +" +
" 1, f2 + 1L, f3 + 1.0f, f4 + 1.0d, f5 + 1, f6 + 1.0d, f7 + f0");
DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
List<Row> results = ds.collect();
String expected = "2,2,2,2.0,2.0,2.0,2.0,1002.1";
compareResultAsText(results, expected);
}
示例13: testNumericAutocastInComparison
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testNumericAutocastInComparison() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
DataSource<Tuple6<Byte, Short, Integer, Long, Float, Double>> input =
env.fromElements(
new Tuple6<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d),
new Tuple6<>((byte) 2, (short) 2, 2, 2L, 2.0f, 2.0d));
Table table =
tableEnv.fromDataSet(input, "a,b,c,d,e,f");
Table result = table
.filter("a > 1 && b > 1 && c > 1L && d > 1.0f && e > 1.0d && f > 1");
DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
List<Row> results = ds.collect();
String expected = "2,2,2,2,2.0,2.0";
compareResultAsText(results, expected);
}
示例14: testCastFromString
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testCastFromString() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
DataSource<Tuple3<String, String, String>> input =
env.fromElements(new Tuple3<>("1", "true", "2.0"));
Table table =
tableEnv.fromDataSet(input);
Table result = table.select(
"f0.cast(BYTE), f0.cast(SHORT), f0.cast(INT), f0.cast(LONG), f2.cast(DOUBLE), f2.cast(FLOAT), f1.cast(BOOL)");
DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
List<Row> results = ds.collect();
String expected = "1,1,1,1,2.0,2.0,true\n";
compareResultAsText(results, expected);
}
示例15: testWorkingAggregationDataTypes
import org.apache.flink.api.java.operators.DataSource; //导入依赖的package包/类
@Test
public void testWorkingAggregationDataTypes() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());
DataSource<Tuple7<Byte, Short, Integer, Long, Float, Double, String>> input =
env.fromElements(
new Tuple7<>((byte) 1, (short) 1, 1, 1L, 1.0f, 1.0d, "Hello"),
new Tuple7<>((byte) 2, (short) 2, 2, 2L, 2.0f, 2.0d, "Ciao"));
Table table = tableEnv.fromDataSet(input);
Table result =
table.select("f0.avg, f1.avg, f2.avg, f3.avg, f4.avg, f5.avg, f6.count");
DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
List<Row> results = ds.collect();
String expected = "1,1,1,1,1.5,1.5,2";
compareResultAsText(results, expected);
}