当前位置: 首页>>代码示例>>Java>>正文


Java DataSet类代码示例

本文整理汇总了Java中org.apache.flink.api.java.DataSet的典型用法代码示例。如果您正苦于以下问题:Java DataSet类的具体用法?Java DataSet怎么用?Java DataSet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DataSet类属于org.apache.flink.api.java包,在下文中一共展示了DataSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: transformation

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
 * Data transformation.
 * The method group by trackId, sum the number of occurrences, sort the output
 * and get the top elements defined by the user.
 * @param input
 * @return
 */
@Override
public DataSet<ChartsResult> transformation(DataSet<?> input) {
    log.info("Transformation Phase. Computing the tags");
    return input
            .groupBy(0) // Grouping by trackId
            .sum(1) // Sum the occurrences of each grouped item
            .sortPartition(1, Order.DESCENDING).setParallelism(1) // Sort by count
            .first(pipelineConf.args.getLimit())
            .map( t -> {
                    Tuple3<Long, Integer, TagEvent> tuple= (Tuple3<Long, Integer, TagEvent>) t;
                    return new ChartsResult(tuple.f0, tuple.f1, tuple.f2);
            })
            .returns(new TypeHint<ChartsResult>(){});
}
 
开发者ID:aaitor,项目名称:flink-charts,代码行数:22,代码来源:SimpleChartsPipeline.java

示例2: main

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

	DataSet<Record> csvInput = env
			.readCsvFile("D://NOTBACKEDUP//dataflow//flink-table//src//main//resources//data//olympic-athletes.csv")
			.pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
	// register the DataSet athletes as table "athletes" with fields derived
	// from the dataset
	Table atheltes = tableEnv.fromDataSet(csvInput);
	tableEnv.registerTable("athletes", atheltes);
	// run a SQL query on the Table and retrieve the result as a new Table
	Table groupedByCountry = tableEnv.sql("SELECT country, SUM(total) as frequency FROM athletes group by country");

	DataSet<Result> result = tableEnv.toDataSet(groupedByCountry, Result.class);

	result.print();

	Table groupedByGame = atheltes.groupBy("game").select("game, total.sum as frequency");

	DataSet<GameResult> gameResult = tableEnv.toDataSet(groupedByGame, GameResult.class);

	gameResult.print();

}
 
开发者ID:PacktPublishing,项目名称:Mastering-Apache-Flink,代码行数:26,代码来源:BatchJob.java

示例3: main

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String... args) throws  Exception {
    File txtFile = new File("/tmp/test/file.txt");
    File csvFile = new File("/tmp/test/file.csv");
    File binFile = new File("/tmp/test/file.bin");

    writeToFile(txtFile, "txt");
    writeToFile(csvFile, "csv");
    writeToFile(binFile, "bin");

    final ExecutionEnvironment env =
            ExecutionEnvironment.getExecutionEnvironment();
    final TextInputFormat format = new TextInputFormat(new Path("/tmp/test"));

    GlobFilePathFilter filesFilter = new GlobFilePathFilter(
            Collections.singletonList("**"),
            Arrays.asList("**/file.bin")
    );
    System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields()));
    format.setFilesFilter(filesFilter);

    DataSet<String> result = env.readFile(format, "/tmp");
    result.writeAsText("/temp/out");
    env.execute("GlobFilePathFilter-Test");
}
 
开发者ID:mushketyk,项目名称:flink-examples,代码行数:25,代码来源:GlobExample.java

示例4: transformation

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
 * Data transformation.
 * The method group by trackId, sum the number of occurrences, sort the output
 * and get the top elements defined by the user.
 * @param input
 * @return
 */
@Override
public DataSet<ChartsResult> transformation(DataSet<?> input) {
    final int limit= pipelineConf.getArgs().getLimit();

    log.info("Transformation Phase. Computing the tags");
    SortPartitionOperator<Tuple4<Long, Integer, String, TagEvent>> grouped = (SortPartitionOperator<Tuple4<Long, Integer, String, TagEvent>>) input
            .groupBy(2, 0) // Grouping by state & trackId
            .sum(1) // Sum the occurrences of each grouped item
            .sortPartition(2, Order.ASCENDING).setParallelism(1) // Sort by state
            .sortPartition(1, Order.DESCENDING).setParallelism(1);// Sort by count
            return grouped.reduceGroup(new ReduceLimit(limit, 2)); // Reducing groups applying the limit specified by user
}
 
开发者ID:aaitor,项目名称:flink-charts,代码行数:20,代码来源:StateChartsPipeline.java

示例5: cleansingTest

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
 * Test to validate the cleansing method.
 * We generate a DataSet with 10 TagEvents and modify 3 items to force bad data
 * The assertion checks that only are obtained the proper number of items after the
 * cleansing process.
 * @throws Exception
 */
@Test
public void cleansingTest() throws Exception {
    String args[]= {"-c", "state_chart", "-l", "3"};
    argsParser= ArgsParser.builder(args);

    PipelineChartsConf pipelineConf= new PipelineChartsConf(config, argsParser);
    StateChartsPipeline pipeline= new StateChartsPipeline(pipelineConf);

    List<TagEvent> mockCollection= TagEventUtils.getMockData(10);
    mockCollection.set(0, new TagEvent(0l, "xxx", "yy","ZZ", "Locality", "United States"));
    mockCollection.set(2, new TagEvent(0l, "xxx", "yy","ZZ", "Locality", "UK"));
    mockCollection.set(4, new TagEvent(99l, "xxx", "yy","", "", ""));


    DataSet<TagEvent> mockDataset= pipeline.getEnv().fromCollection(mockCollection);

    DataSet<Tuple4<Long, Integer, String, TagEvent>> clean = pipeline.cleansing(mockDataset);
    assertEquals(7, clean.count());
}
 
开发者ID:aaitor,项目名称:flink-charts,代码行数:27,代码来源:StateChartsPipelineTest.java

示例6: cleansingTest

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
/**
 * Test to validate the cleansing method.
 * We generate a DataSet with 10 TagEvents and modify 2 items to force bad data
 * The assertion checks that only are obtained the proper number of items after the
 * cleansing process.
 * @throws Exception
 */
@Test
public void cleansingTest() throws Exception {
    String args[]= {"-c", "chart", "-l", "3"};
    argsParser= ArgsParser.builder(args);

    PipelineChartsConf pipelineConf= new PipelineChartsConf(config, argsParser);
    SimpleChartsPipeline pipeline= new SimpleChartsPipeline(pipelineConf);

    List<TagEvent> mockCollection= TagEventUtils.getMockData(10);
    mockCollection.set(0, new TagEvent(0l, "xxx", "yy","zz"));
    mockCollection.set(4, new TagEvent(99l, "xxx", "yy",""));

    DataSet<TagEvent> mockDataset= pipeline.getEnv().fromCollection(mockCollection);

    DataSet<Tuple3<Long, Integer, TagEvent>> clean = pipeline.cleansing(mockDataset);
    assertEquals(9, clean.count());
}
 
开发者ID:aaitor,项目名称:flink-charts,代码行数:25,代码来源:SimpleChartsPipelineTest.java

示例7: main

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,"
				);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new LineSplitter())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// execute and print result
		counts.print();

	}
 
开发者ID:dineshtrivedi,项目名称:flink-java-project,代码行数:25,代码来源:WordCount.java

示例8: testSelectingMultipleFieldsUsingExpressionLanguage

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testSelectingMultipleFieldsUsingExpressionLanguage() throws Exception {
	/*
	 * selecting multiple fields using expression language
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
	DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
	DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
			ds1.join(ds2).where("nestedPojo.longNumber", "number", "str").equalTo("f6", "f0", "f1");

	env.setParallelism(1);
	List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();

	String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
			"2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
			"3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";

	compareResultAsTuples(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:JoinITCase.java

示例9: testCoGroupKeyMixing2

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testCoGroupKeyMixing2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

	// should work
	try {
		ds1.coGroup(ds2)
		.where(3)
		.equalTo(
				new KeySelector<CustomType, Long>() {

						@Override
						public Long getKey(CustomType value) {
							return value.myLong;
						}
					}
				);
	} catch (Exception e) {
		Assert.fail();
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:25,代码来源:CoGroupOperatorTest.java

示例10: testAsFromTupleByName

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testAsFromTupleByName() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config());

	Table table = tableEnv.fromDataSet(CollectionDataSets.get3TupleDataSet(env), "f2");

	DataSet<Row> ds = tableEnv.toDataSet(table, Row.class);
	List<Row> results = ds.collect();
	String expected = "Hi\n" + "Hello\n" + "Hello world\n" +
		"Hello world, how are you?\n" + "I am fine.\n" + "Luke Skywalker\n" +
		"Comment#1\n" + "Comment#2\n" + "Comment#3\n" + "Comment#4\n" +
		"Comment#5\n" + "Comment#6\n" + "Comment#7\n" +
		"Comment#8\n" + "Comment#9\n" + "Comment#10\n" +
		"Comment#11\n" + "Comment#12\n" + "Comment#13\n" +
		"Comment#14\n" + "Comment#15\n";
	compareResultAsText(results, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:19,代码来源:JavaTableEnvironmentITCase.java

示例11: testUnaryFunctionMovingForwardedAnnotation

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testUnaryFunctionMovingForwardedAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new ShufflingMapper<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 1);
	FieldSet fw3 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertNotNull(fw3);
	assertTrue(fw1.contains(2));
	assertTrue(fw2.contains(0));
	assertTrue(fw3.contains(1));
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:25,代码来源:SemanticPropertiesTranslationTest.java

示例12: testProgram

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Integer, String>> left = env.fromElements(
			new Tuple2<Integer, String>(1, "hello"),
			new Tuple2<Integer, String>(2, "what's"),
			new Tuple2<Integer, String>(2, "up")
			);
	DataSet<Tuple2<Integer, String>> right = env.fromElements(
			new Tuple2<Integer, String>(1, "not"),
			new Tuple2<Integer, String>(1, "much"),
			new Tuple2<Integer, String>(2, "really")
			);
	DataSet<Tuple2<Integer,String>> joined = left.join(right).where(0).equalTo(0)
			.with((t,s,out) -> out.collect(new Tuple2<Integer,String>(t.f0, t.f1 + " " + s.f1)));
	joined.writeAsCsv(resultPath);
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:FlatJoinITCase.java

示例13: testWithtuple1Value

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testWithtuple1Value() throws Exception {
	/*
	 * Test mapVertices() and change the value type to a Tuple1
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Vertex<Long, Tuple1<Long>>> mappedVertices = graph.mapVertices(new ToTuple1Mapper()).getVertices();
	List<Vertex<Long, Tuple1<Long>>> result = mappedVertices.collect();

	expectedResult = "1,(1)\n" +
		"2,(2)\n" +
		"3,(3)\n" +
		"4,(4)\n" +
		"5,(5)\n";

	compareResultAsTuples(result, expectedResult);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:MapVerticesITCase.java

示例14: testInDegrees

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testInDegrees() throws Exception {
	/*
	 * Test inDegrees()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, LongValue>> data = graph.inDegrees();
	List<Tuple2<Long, LongValue>> result = data.collect();

	expectedResult = "1,1\n" +
		"2,1\n" +
		"3,2\n" +
		"4,1\n" +
		"5,2\n";
	compareResultAsTuples(result, expectedResult);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:DegreesITCase.java

示例15: testKeySelection

import org.apache.flink.api.java.DataSet; //导入依赖的package包/类
@Test
public void testKeySelection() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	Path in = new Path(inFile.getAbsoluteFile().toURI());

	AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
	DataSet<User> usersDS = env.createInput(users);

	DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
		@Override
		public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
			for (User u : values) {
				out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
			}
		}
	});
	res.writeAsText(resultPath);
	env.execute("Avro Key selection");


	expected = "(Alyssa,1)\n(Charlie,1)\n";
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:24,代码来源:AvroPojoTest.java


注:本文中的org.apache.flink.api.java.DataSet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。