本文整理汇总了Java中org.apache.flink.api.java.io.TextInputFormat.setCharsetName方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setCharsetName方法的具体用法?Java TextInputFormat.setCharsetName怎么用?Java TextInputFormat.setCharsetName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.io.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.setCharsetName方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readWikiDump
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkMlpCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getDataset());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</page>");
return env.readFile(inp, config.getDataset());
}
示例2: main
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// FlinkPdCommandConfig config = FlinkPdCommandConfig.from(args);
// run(config);
// final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
// Path filePath = new Path(filename);
// TextInputFormat inp = new TextInputFormat(filePath);
// inp.setCharsetName("UTF-8");
// inp.setDelimiter("</ARXIVFILESPLIT>");
// final DataSource<String> source = env.readFile(inp, filename);
// source.writeAsText("test", org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE);
// env.execute();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
ClassLoader classLoader = WordCount.class.getClassLoader();
URL resource = classLoader.getResource("com/formulasearchengine/mathosphere/mathpd/ex1.html");
final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
//final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
Path filePath = new Path(filename);
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
final DataSource<String> source = env.readFile(inp, filename);
// DataSet<Tuple2<String, Integer>> counts =
// // split up the lines in pairs (2-tuples) containing: (word,1)
// source.flatMap(new LineSplitter())
// // group by the tuple field "0" and sum up tuple field "1"
// .groupBy(0)
// .sum(1);
// execute and print result
//counts.print();
source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
env.execute();
}
示例3: readWikiDump
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkPdCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getDataset());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
return env.readFile(inp, config.getDataset());
}
示例4: readRefs
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readRefs(FlinkPdCommandConfig config, ExecutionEnvironment env) {
Path filePath = new Path(config.getRef());
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
return env.readFile(inp, config.getRef());
}
示例5: readPreprocessedFile
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readPreprocessedFile(String pathname, ExecutionEnvironment env) {
Path filePath = new Path(pathname);
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
// env.read
return env.readFile(inp, pathname);
}
示例6: main
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
//ClassLoader classLoader = WordCount.class.getClassLoader();
//URL resource = classLoader.getResource("ex1.html");
//final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
Path filePath = new Path(filename);
TextInputFormat inp = new TextInputFormat(filePath);
inp.setCharsetName("UTF-8");
inp.setDelimiter("</ARXIVFILESPLIT>");
final DataSource<String> source = env.readFile(inp, filename);
// DataSet<Tuple2<String, Integer>> counts =
// // split up the lines in pairs (2-tuples) containing: (word,1)
// source.flatMap(new LineSplitter())
// // group by the tuple field "0" and sum up tuple field "1"
// .groupBy(0)
// .sum(1);
// execute and print result
//counts.print();
source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
env.execute();
}
示例7: readTextFile
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
* Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
* The {@link java.nio.charset.Charset} with the given name will be used to read the files.
*
* @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
* @param charsetName The name of the character set used to read the file.
* @return A {@link DataSet} that represents the data read from the given file as text lines.
*/
public DataSource<String> readTextFile(String filePath, String charsetName) {
Preconditions.checkNotNull(filePath, "The file path may not be null.");
TextInputFormat format = new TextInputFormat(new Path(filePath));
format.setCharsetName(charsetName);
return new DataSource<>(this, format, BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
}
示例8: readTextFile
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
* Reads the given file line-by-line and creates a data stream that contains a string with the
* contents of each such line. The {@link java.nio.charset.Charset} with the given name will be
* used to read the files.
*
* <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the
* {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed,
* forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
* and exits, without waiting for the readers to finish reading. This implies that no more checkpoint
* barriers are going to be forwarded after the source exits, thus having no checkpoints after that point.
*
* @param filePath
* The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
* @param charsetName
* The name of the character set used to read the file
* @return The data stream that represents the data read from the given file as text lines
*/
public DataStreamSource<String> readTextFile(String filePath, String charsetName) {
Preconditions.checkNotNull(filePath, "The file path must not be null.");
Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty.");
TextInputFormat format = new TextInputFormat(new Path(filePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO;
format.setCharsetName(charsetName);
return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo);
}
示例9: readTextFile
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
* Creates a DataSet that represents the Strings produced by reading the given file line wise.
* The {@link java.nio.charset.Charset} with the given name will be used to read the files.
*
* @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
* @param charsetName The name of the character set used to read the file.
* @return A DataSet that represents the data read from the given file as text lines.
*/
public DataSource<String> readTextFile(String filePath, String charsetName) {
Validate.notNull(filePath, "The file path may not be null.");
TextInputFormat format = new TextInputFormat(new Path(filePath));
format.setCharsetName(charsetName);
return new DataSource<String>(this, format, BasicTypeInfo.STRING_TYPE_INFO );
}