本文整理汇总了Java中org.apache.flink.api.java.DataSet.writeAsCsv方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.writeAsCsv方法的具体用法?Java DataSet.writeAsCsv怎么用?Java DataSet.writeAsCsv使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.writeAsCsv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = env.readTextFile(textPath);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
counts.writeAsCsv(outputPath, "\n", " ");
// execute program
env.execute("WordCount Example");
}
示例2: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>");
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0])
.fieldDelimiter("\t").types(Long.class, Long.class);
DataSet<Tuple2<Long, Long>> result = input.map(
new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
public Tuple2<Long, Long> map(Tuple2<Long, Long> value){
return new Tuple2<Long, Long>(value.f0, value.f1 + 1);
}
});
result.writeAsCsv(args[1], "\n", "\t");
env.execute();
}
示例3: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Integer, String>> left = env.fromElements(
new Tuple2<Integer, String>(1, "hello"),
new Tuple2<Integer, String>(2, "what's"),
new Tuple2<Integer, String>(2, "up")
);
DataSet<Tuple2<Integer, String>> right = env.fromElements(
new Tuple2<Integer, String>(1, "not"),
new Tuple2<Integer, String>(1, "much"),
new Tuple2<Integer, String>(2, "really")
);
DataSet<Tuple2<Integer,String>> joined = left.cross(right)
.with((t,s) -> new Tuple2<Integer, String> (t.f0 + s.f0, t.f1 + " " + s.f1));
joined.writeAsCsv(resultPath);
env.execute();
}
示例4: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = get5TupleDataSet(env);
DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
.groupBy(4, 0)
.reduce((in1, in2) -> {
Tuple5<Integer, Long, Integer, String, Long> out = new Tuple5<Integer, Long, Integer, String, Long>();
out.setFields(in1.f0, in1.f1 + in2.f1, 0, "P-)", in1.f4);
return out;
});
reduceDs.writeAsCsv(resultPath);
env.execute();
}
示例5: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Integer, String>> left = env.fromElements(
new Tuple2<Integer, String>(1, "hello"),
new Tuple2<Integer, String>(2, "what's"),
new Tuple2<Integer, String>(2, "up")
);
DataSet<Tuple2<Integer, String>> right = env.fromElements(
new Tuple2<Integer, String>(1, "not"),
new Tuple2<Integer, String>(1, "much"),
new Tuple2<Integer, String>(2, "really")
);
DataSet<Tuple2<Integer, String>> joined = left.join(right).where(0).equalTo(0)
.with((t, s, out) -> out.collect(new Tuple2<Integer, String>(t.f0, t.f1 + " " + s.f1)));
joined.writeAsCsv(resultPath);
env.execute();
}
示例6: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
String inputPath = args[0];
String outputPath = args[1] + "_" + System.currentTimeMillis();
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = env.readTextFile(inputPath);
DataSet<Tuple2<String, Long>> counts = text
.<Tuple2<String, Long>>flatMap((line, out) -> {
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
out.collect(new Tuple2<>(tokenizer.nextToken(), 1L));
}
})
.returns(new TypeHint<Tuple2<String, Long>>() {
})
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
counts.writeAsCsv(outputPath);
// execute program
long t = System.currentTimeMillis();
env.execute("Streaming WordCount Example");
System.out.println("Time=" + (System.currentTimeMillis() - t));
}
示例7: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1);
DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例8: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
iteration.setSolutionSetUnManaged(true);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例9: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts =
// normalize and split each line
text.map(line -> line.toLowerCase().split("\\W+"))
// convert split line in pairs (2-tuples) containing: (word,1)
.flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
// emit the pairs with non-zero-length words
Arrays.stream(tokens)
.filter(t -> t.length() > 0)
.forEach(t -> out.collect(new Tuple2<>(t, 1)));
})
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
} else {
counts.print();
}
// execute program
env.execute("WordCount Example");
}
示例10: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);
Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);
// Execute the scatter-gather iteration
Graph<Long, Double, Double> result = graph.runScatterGatherIteration(
new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations);
// Extract the vertices as the result
DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();
// emit result
if (fileOutput) {
singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");
// since file sinks are lazy, we trigger the execution explicitly
env.execute("Single Source Shortest Paths Example");
} else {
singleSourceShortestPaths.print();
}
}
示例11: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
filter(value -> value.f2.contains("world"));
filterDs.writeAsCsv(resultPath);
env.execute();
}
示例12: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(final String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// get the data set
final DataSet<StringTriple> file = getDataSet(env, params);
// filter lines with empty fields
final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());
// Here, we could do further processing with the filtered lines...
JobExecutionResult result;
// output the filtered lines
if (params.has("output")) {
filteredLines.writeAsCsv(params.get("output"));
// execute program
result = env.execute("Accumulator example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
filteredLines.print();
result = env.getLastJobExecutionResult();
}
// get the accumulator result via its registration key
final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
}
示例13: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = getTextDataSet(env);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.aggregate(Aggregations.SUM, 1);
// emit result
if (fileOutput) {
counts.writeAsCsv(outputPath, "\n", " ");
// execute program
env.execute("WordCount Example");
} else {
counts.print();
}
}
示例14: testIncrementalSSSPNonSPEdge
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testIncrementalSSSPNonSPEdge() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Vertex<Long, Double>> vertices = IncrementalSSSPData.getDefaultVertexDataSet(env);
DataSet<Edge<Long, Double>> edges = IncrementalSSSPData.getDefaultEdgeDataSet(env);
DataSet<Edge<Long, Double>> edgesInSSSP = IncrementalSSSPData.getDefaultEdgesInSSSP(env);
// the edge to be removed is a non-SP edge
Edge<Long, Double> edgeToBeRemoved = new Edge<>(3L, 5L, 5.0);
Graph<Long, Double, Double> graph = Graph.fromDataSet(vertices, edges, env);
// Assumption: all minimum weight paths are kept
Graph<Long, Double, Double> ssspGraph = Graph.fromDataSet(vertices, edgesInSSSP, env);
// remove the edge
graph.removeEdge(edgeToBeRemoved);
// configure the iteration
ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
if (IncrementalSSSP.isInSSSP(edgeToBeRemoved, edgesInSSSP)) {
parameters.setDirection(EdgeDirection.IN);
parameters.setOptDegrees(true);
// run the scatter gather iteration to propagate info
Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(
new IncrementalSSSP.InvalidateMessenger(edgeToBeRemoved),
new IncrementalSSSP.VertexDistanceUpdater(),
IncrementalSSSPData.NUM_VERTICES, parameters);
DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();
resultedVertices.writeAsCsv(resultPath, "\n", ",");
env.execute();
} else {
vertices.writeAsCsv(resultPath, "\n", ",");
env.execute();
}
expected = IncrementalSSSPData.VERTICES;
}
示例15: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@SuppressWarnings("serial")
public static void main(String [] args) throws Exception {
if(!parseParameters(args)) {
return;
}
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Edge<Long, NullValue>> edges = getEdgesDataSet(env);
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return value;
}
}, env);
DataSet<Vertex<Long, Long>> verticesWithMinIds = graph
.run(new GSAConnectedComponents<Long, Long, NullValue>(maxIterations));
// emit result
if (fileOutput) {
verticesWithMinIds.writeAsCsv(outputPath, "\n", ",");
// since file sinks are lazy, we trigger the execution explicitly
env.execute("Connected Components Example");
} else {
verticesWithMinIds.print();
}
}