本文整理汇总了Java中org.apache.flink.api.java.io.TextInputFormat.setDelimiter方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setDelimiter方法的具体用法?Java TextInputFormat.setDelimiter怎么用?Java TextInputFormat.setDelimiter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.io.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.setDelimiter方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readWikiDump
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkMlpCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getDataset());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</page>");
return env.readFile(inp, config.getDataset());
}
示例2: main
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// FlinkPdCommandConfig config = FlinkPdCommandConfig.from(args);
// run(config);
// final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
// Path filePath = new Path(filename);
// TextInputFormat inp = new TextInputFormat(filePath);
// inp.setCharsetName("UTF-8");
// inp.setDelimiter("</ARXIVFILESPLIT>");
// final DataSource<String> source = env.readFile(inp, filename);
// source.writeAsText("test", org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE);
// env.execute();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
ClassLoader classLoader = WordCount.class.getClassLoader();
URL resource = classLoader.getResource("com/formulasearchengine/mathosphere/mathpd/ex1.html");
final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
//final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
Path filePath = new Path(filename);
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
final DataSource<String> source = env.readFile(inp, filename);
// DataSet<Tuple2<String, Integer>> counts =
// // split up the lines in pairs (2-tuples) containing: (word,1)
// source.flatMap(new LineSplitter())
// // group by the tuple field "0" and sum up tuple field "1"
// .groupBy(0)
// .sum(1);
// execute and print result
//counts.print();
source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
env.execute();
}
示例3: readWikiDump
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkPdCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getDataset());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
return env.readFile(inp, config.getDataset());
}
示例4: readRefs
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readRefs(FlinkPdCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getRef());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
return env.readFile(inp, config.getRef());
}
示例5: main
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
//ClassLoader classLoader = WordCount.class.getClassLoader();
//URL resource = classLoader.getResource("ex1.html");
//final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
Path filePath = new Path(filename);
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
final DataSource<String> source = env.readFile(inp, filename);
// DataSet<Tuple2<String, Integer>> counts =
// // split up the lines in pairs (2-tuples) containing: (word,1)
// source.flatMap(new LineSplitter())
// // group by the tuple field "0" and sum up tuple field "1"
// .groupBy(0)
// .sum(1);
// execute and print result
//counts.print();
source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
env.execute();
}