当前位置: 首页>>代码示例>>Java>>正文


Java DataSet.writeAsCsv方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.DataSet.writeAsCsv方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.writeAsCsv方法的具体用法?Java DataSet.writeAsCsv怎么用?Java DataSet.writeAsCsv使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.DataSet的用法示例。


在下文中一共展示了DataSet.writeAsCsv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        if(!parseParameters(args)) {
            return;
        }

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // get input data
        DataSet<String> text = env.readTextFile(textPath);

        DataSet<Tuple2<String, Integer>> counts =
                // split up the lines in pairs (2-tuples) containing: (word,1)
                text.flatMap(new Tokenizer())
                // group by the tuple field "0" and sum up tuple field "1"
                .groupBy(0)
                .sum(1);

        // emit result
        counts.writeAsCsv(outputPath, "\n", " ");
        // execute program
        env.execute("WordCount Example");
    }
 
开发者ID:thrill,项目名称:fst-bench,代码行数:25,代码来源:JavaWordCount.java

示例2: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>");
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0])
			.fieldDelimiter("\t").types(Long.class, Long.class);

	DataSet<Tuple2<Long, Long>> result = input.map(
			new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				public Tuple2<Long, Long> map(Tuple2<Long, Long> value){
					return new Tuple2<Long, Long>(value.f0, value.f1 + 1);
				}
			});
	result.writeAsCsv(args[1], "\n", "\t");
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:ClientTest.java

示例3: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Integer, String>> left = env.fromElements(
			new Tuple2<Integer, String>(1, "hello"),
			new Tuple2<Integer, String>(2, "what's"),
			new Tuple2<Integer, String>(2, "up")
			);
	DataSet<Tuple2<Integer, String>> right = env.fromElements(
			new Tuple2<Integer, String>(1, "not"),
			new Tuple2<Integer, String>(1, "much"),
			new Tuple2<Integer, String>(2, "really")
			);
	DataSet<Tuple2<Integer,String>> joined = left.cross(right)
			.with((t,s) -> new Tuple2<Integer, String> (t.f0 + s.f0, t.f1 + " " + s.f1));
	joined.writeAsCsv(resultPath);
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:CrossITCase.java

示例4: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
			.groupBy(4, 0)
			.reduce((in1, in2) -> {
				Tuple5<Integer, Long, Integer, String, Long> out = new Tuple5<Integer, Long, Integer, String, Long>();
				out.setFields(in1.f0, in1.f1 + in2.f1, 0, "P-)", in1.f4);
				return out;
			});

	reduceDs.writeAsCsv(resultPath);
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:17,代码来源:ReduceITCase.java

示例5: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Integer, String>> left = env.fromElements(
			new Tuple2<Integer, String>(1, "hello"),
			new Tuple2<Integer, String>(2, "what's"),
			new Tuple2<Integer, String>(2, "up")
			);
	DataSet<Tuple2<Integer, String>> right = env.fromElements(
			new Tuple2<Integer, String>(1, "not"),
			new Tuple2<Integer, String>(1, "much"),
			new Tuple2<Integer, String>(2, "really")
			);
	DataSet<Tuple2<Integer, String>> joined = left.join(right).where(0).equalTo(0)
			.with((t, s, out) -> out.collect(new Tuple2<Integer, String>(t.f0, t.f1 + " " + s.f1)));
	joined.writeAsCsv(resultPath);
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:FlatJoinITCase.java

示例6: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    String inputPath = args[0];
    String outputPath = args[1] + "_" + System.currentTimeMillis();

    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = env.readTextFile(inputPath);
    DataSet<Tuple2<String, Long>> counts = text
            .<Tuple2<String, Long>>flatMap((line, out) -> {
                StringTokenizer tokenizer = new StringTokenizer(line);
                while (tokenizer.hasMoreTokens()) {
                    out.collect(new Tuple2<>(tokenizer.nextToken(), 1L));
                }
            })
            .returns(new TypeHint<Tuple2<String, Long>>() {
            })
            // group by the tuple field "0" and sum up tuple field "1"
            .groupBy(0)
            .sum(1);

    // emit result
    counts.writeAsCsv(outputPath);
    // execute program
    long t = System.currentTimeMillis();
    env.execute("Streaming WordCount Example");
    System.out.println("Time=" + (System.currentTimeMillis() - t));
}
 
开发者ID:hazelcast,项目名称:big-data-benchmark,代码行数:29,代码来源:FlinkWordCount.java

示例7: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:35,代码来源:ConnectedComponentsWithSolutionSetFirstITCase.java

示例8: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:34,代码来源:ConnectedComponentsWithObjectMapITCase.java

示例9: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// normalize and split each line
				text.map(line -> line.toLowerCase().split("\\W+"))
				// convert split line in pairs (2-tuples) containing: (word,1)
				.flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
					// emit the pairs with non-zero-length words
					Arrays.stream(tokens)
					.filter(t -> t.length() > 0)
					.forEach(t -> out.collect(new Tuple2<>(t, 1)));
				})
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// emit result
		if (fileOutput) {
			counts.writeAsCsv(outputPath, "\n", " ");
		} else {
			counts.print();
		}

		// execute program
		env.execute("WordCount Example");
	}
 
开发者ID:axbaretto,项目名称:flink,代码行数:37,代码来源:WordCount.java

示例10: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);

		// Execute the scatter-gather iteration
		Graph<Long, Double, Double> result = graph.runScatterGatherIteration(
				new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Single Source Shortest Paths Example");
		} else {
			singleSourceShortestPaths.print();
		}

	}
 
开发者ID:axbaretto,项目名称:flink,代码行数:31,代码来源:SingleSourceShortestPaths.java

示例11: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(value -> value.f2.contains("world"));
	filterDs.writeAsCsv(resultPath);
	env.execute();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:11,代码来源:FilterITCase.java

示例12: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
开发者ID:axbaretto,项目名称:flink,代码行数:33,代码来源:EmptyFieldsCountAccumulator.java

示例13: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		if (fileOutput) {
			counts.writeAsCsv(outputPath, "\n", " ");
			// execute program
			env.execute("WordCount Example");
		} else {
			counts.print();
		}
	}
 
开发者ID:axbaretto,项目名称:flink,代码行数:29,代码来源:WordCount.java

示例14: testIncrementalSSSPNonSPEdge

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testIncrementalSSSPNonSPEdge() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Vertex<Long, Double>> vertices = IncrementalSSSPData.getDefaultVertexDataSet(env);
	DataSet<Edge<Long, Double>> edges = IncrementalSSSPData.getDefaultEdgeDataSet(env);
	DataSet<Edge<Long, Double>> edgesInSSSP = IncrementalSSSPData.getDefaultEdgesInSSSP(env);
	// the edge to be removed is a non-SP edge
	Edge<Long, Double> edgeToBeRemoved = new Edge<>(3L, 5L, 5.0);

	Graph<Long, Double, Double> graph = Graph.fromDataSet(vertices, edges, env);
	// Assumption: all minimum weight paths are kept
	Graph<Long, Double, Double> ssspGraph = Graph.fromDataSet(vertices, edgesInSSSP, env);
	// remove the edge
	graph.removeEdge(edgeToBeRemoved);

	// configure the iteration
	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	if (IncrementalSSSP.isInSSSP(edgeToBeRemoved, edgesInSSSP)) {

		parameters.setDirection(EdgeDirection.IN);
		parameters.setOptDegrees(true);

		// run the scatter gather iteration to propagate info
		Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(
				new IncrementalSSSP.InvalidateMessenger(edgeToBeRemoved),
				new IncrementalSSSP.VertexDistanceUpdater(),
				IncrementalSSSPData.NUM_VERTICES, parameters);

		DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

		resultedVertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	} else {
		vertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	}

	expected = IncrementalSSSPData.VERTICES;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:41,代码来源:IncrementalSSSPITCase.java

示例15: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("serial")
public static void main(String [] args) throws Exception {

	if(!parseParameters(args)) {
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new MapFunction<Long, Long>() {
		@Override
		public Long map(Long value) throws Exception {
			return value;
		}
	}, env);

	DataSet<Vertex<Long, Long>> verticesWithMinIds = graph
			.run(new GSAConnectedComponents<Long, Long, NullValue>(maxIterations));

	// emit result
	if (fileOutput) {
		verticesWithMinIds.writeAsCsv(outputPath, "\n", ",");

		// since file sinks are lazy, we trigger the execution explicitly
		env.execute("Connected Components Example");
	} else {
		verticesWithMinIds.print();
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:32,代码来源:ConnectedComponents.java


注:本文中的org.apache.flink.api.java.DataSet.writeAsCsv方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。