本文整理汇总了Java中org.apache.flink.api.java.ExecutionEnvironment.readTextFile方法的典型用法代码示例。如果您正苦于以下问题:Java ExecutionEnvironment.readTextFile方法的具体用法?Java ExecutionEnvironment.readTextFile怎么用?Java ExecutionEnvironment.readTextFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.ExecutionEnvironment
的用法示例。
在下文中一共展示了ExecutionEnvironment.readTextFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// parse parameters
ParameterTool params = ParameterTool.fromArgs(args);
// path to ratings.csv file
String ratingsCsvPath = params.getRequired("input");
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> file = env.readTextFile(ratingsCsvPath);
file.flatMap(new ExtractRating())
.groupBy(0)
// .reduceGroup(new SumRatingCount())
.sum(1)
.print();
}
示例2: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = env.readTextFile(textPath);
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
counts.writeAsCsv(outputPath, "\n", " ");
// execute program
env.execute("WordCount Example");
}
示例3: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WC> counts = text
.flatMap(new Tokenizer())
.groupBy("complex.someTest")
.reduce(new ReduceFunction<WC>() {
private static final long serialVersionUID = 1L;
public WC reduce(WC value1, WC value2) {
return new WC(value1.complex.someTest, value1.count + value2.count);
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
示例4: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WC> counts = text
.flatMap(new Tokenizer())
.groupBy("word")
.reduce(new ReduceFunction<WC>() {
private static final long serialVersionUID = 1L;
public WC reduce(WC value1, WC value2) {
return new WC(value1.word, value1.count + value2.count);
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
示例5: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
String inputPath = args[0];
String outputPath = args[1] + "_" + System.currentTimeMillis();
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<String> text = env.readTextFile(inputPath);
DataSet<Tuple2<String, Long>> counts = text
.<Tuple2<String, Long>>flatMap((line, out) -> {
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
out.collect(new Tuple2<>(tokenizer.nextToken(), 1L));
}
})
.returns(new TypeHint<Tuple2<String, Long>>() {
})
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
counts.writeAsCsv(outputPath);
// execute program
long t = System.currentTimeMillis();
env.execute("Streaming WordCount Example");
System.out.println("Time=" + (System.currentTimeMillis() - t));
}
示例6: getTextDataSet
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if (fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return getDefaultTextLineDataSet(env);
}
}
示例7: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WCBase> counts = text
.flatMap(new Tokenizer())
.groupBy("word")
.reduce(new ReduceFunction<WCBase>() {
private static final long serialVersionUID = 1L;
public WCBase reduce(WCBase value1, WCBase value2) {
WC wc1 = (WC) value1;
WC wc2 = (WC) value2;
int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
wc1.secretCount.setCount(c);
return wc1;
}
})
.map(new MapFunction<WCBase, WCBase>() {
@Override
public WCBase map(WCBase value) throws Exception {
WC wc = (WC) value;
wc.count = wc.secretCount.getCount();
return wc;
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
示例8: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(textPath);
DataSet<WCBase> counts = text
.flatMap(new Tokenizer())
.groupBy("word")
.reduce(new ReduceFunction<WCBase>() {
private static final long serialVersionUID = 1L;
public WCBase reduce(WCBase value1, WCBase value2) {
WC wc1 = (WC) value1;
WC wc2 = (WC) value2;
return new WC(value1.word, wc1.secretCount + wc2.secretCount);
}
})
.map(new MapFunction<WCBase, WCBase>() {
@Override
public WCBase map(WCBase value) throws Exception {
WC wc = (WC) value;
wc.count = wc.secretCount;
return wc;
}
});
counts.writeAsText(resultPath);
env.execute("WordCount with custom data types example");
}
示例9: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.readTextFile(dataPath);
input.flatMap(new TokenizeLine())
.groupBy(0)
.reduceGroup(new CountWords())
.writeAsCsv(resultPath, "\n", " ");
this.result = env.execute();
}
示例10: getTextDataSet
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if (fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return WordCountData.getDefaultTextLineDataSet(env);
}
}
示例11: getTextDataSet
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if(fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return getDefaultTextLineDataSet(env);
}
}
示例12: getMismatchesData
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private static DataSet<String> getMismatchesData(ExecutionEnvironment env) {
if (fileOutput) {
return env.readTextFile(mismatchesInputPath);
} else {
return MusicProfilesData.getMismatches(env);
}
}
示例13: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// get input data
DataSet<String> text;
if (params.has("input")) {
// read the text file from given input path
text = env.readTextFile(params.get("input"));
} else {
// get default test text data
System.out.println("Executing WordCount example with default input data set.");
System.out.println("Use --input to specify file input.");
text = WordCountData.getDefaultTextLineDataSet(env);
}
DataSet<Tuple2<String, Integer>> counts =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new Tokenizer())
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
// emit result
if (params.has("output")) {
counts.writeAsCsv(params.get("output"), "\n", " ");
// execute program
env.execute("WordCount Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
counts.print();
}
}
示例14: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// get the execution environment
final ExecutionEnvironment job = ExecutionEnvironment.getExecutionEnvironment();
String inputPath, outputPath = null;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
inputPath = params.get("input");
if (params.has("output")) {
outputPath = params.get("output");
}
// make parameters available in the web interface
job.getConfig().setGlobalJobParameters(params);
} catch (Exception e) {
System.err.println("No input specified. Please run '" + org.apache.flink.connectors.cassandra.streaming.tuple.wordcount.FileWordCount.class.getSimpleName() +
"--input <file-path>', where 'input' is the path to a text file");
return;
}
DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);
dataService.setUpEmbeddedCassandra();
dataService.setUpDataModel();
LOG.info("Example starts!");
// get input data by reading content from file
DataSet<String> text = job.readTextFile(inputPath);
DataSet<Tuple2<String, Long>> result =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
// normalize and split the line
String[] words = value.toLowerCase().split("\\W+");
// emit the pairs
for (String word : words) {
//Do not accept empty word, since word is defined as primary key in C* table
if (!word.isEmpty()) {
out.collect(new Tuple2<String, Long>(word, 1L));
}
}
}
})
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
//Update the results to C* sink
CassandraOutputFormat sink = new CassandraOutputFormat("INSERT INTO " + WordCount.CQL_KEYSPACE_NAME + "." + WordCount.CQL_TABLE_NAME + "(word, count) " +
"values (?, ?);", new ClusterBuilder() {
@Override
protected Cluster buildCluster(Cluster.Builder builder) {
builder.addContactPoint("127.0.0.1");
return builder.build();
}
});
result.output(sink);
// emit result
if (outputPath != null) {
result.writeAsText(outputPath);
}
// execute program
job.execute("[BATCH] FileWordCount w/ C* Sink");
LOG.info("20 sec sleep ...");
Thread.sleep(20 * 1000);
LOG.info("20 sec sleep ... DONE");
}
示例15: getTextDataSet
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) {
if (fileOutput) {
// read the text file from given input path
return env.readTextFile(textPath);
} else {
// get default test text data
return env.fromElements(
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"Or to take arms against a sea of troubles,",
"And by opposing end them?--To die,--to sleep,--",
"No more; and by a sleep to say we end",
"The heartache, and the thousand natural shocks",
"That flesh is heir to,--'tis a consummation",
"Devoutly to be wish'd. To die,--to sleep;--",
"To sleep! perchance to dream:--ay, there's the rub;",
"For in that sleep of death what dreams may come,",
"When we have shuffled off this mortal coil,",
"Must give us pause: there's the respect",
"That makes calamity of so long life;",
"For who would bear the whips and scorns of time,",
"The oppressor's wrong, the proud man's contumely,",
"The pangs of despis'd love, the law's delay,",
"The insolence of office, and the spurns",
"That patient merit of the unworthy takes,",
"When he himself might his quietus make",
"With a bare bodkin? who would these fardels bear,",
"To grunt and sweat under a weary life,",
"But that the dread of something after death,--",
"The undiscover'd country, from whose bourn",
"No traveller returns,--puzzles the will,",
"And makes us rather bear those ills we have",
"Than fly to others that we know not of?",
"Thus conscience does make cowards of us all;",
"And thus the native hue of resolution",
"Is sicklied o'er with the pale cast of thought;",
"And enterprises of great pith and moment,",
"With this regard, their currents turn awry,",
"And lose the name of action.--Soft you now!",
"The fair Ophelia!--Nymph, in thy orisons",
"Be all my sins remember'd."
);
}
}