当前位置: 首页>>代码示例>>Java>>正文


Java TextInputFormat.setCharsetName方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.io.TextInputFormat.setCharsetName方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setCharsetName方法的具体用法?Java TextInputFormat.setCharsetName怎么用?Java TextInputFormat.setCharsetName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.io.TextInputFormat的用法示例。


在下文中一共展示了TextInputFormat.setCharsetName方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readWikiDump

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkMlpCommandConfig config, ExecutionEnvironment env) {
    Path filePath = new Path(config.getDataset());
    TextInputFormat inp = new TextInputFormat(filePath);
    inp.setCharsetName("UTF-8");
    inp.setDelimiter("</page>");
    return env.readFile(inp, config.getDataset());
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:8,代码来源:FlinkMlpRelationFinder.java

示例2: main

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
//        FlinkPdCommandConfig config = FlinkPdCommandConfig.from(args);
//        run(config);
//        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//        final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
//        Path filePath = new Path(filename);
//        TextInputFormat inp = new TextInputFormat(filePath);
//        inp.setCharsetName("UTF-8");
//        inp.setDelimiter("</ARXIVFILESPLIT>");
//        final DataSource<String> source = env.readFile(inp, filename);
//        source.writeAsText("test", org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE);
//        env.execute();
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // get input data
        ClassLoader classLoader = WordCount.class.getClassLoader();
        URL resource = classLoader.getResource("com/formulasearchengine/mathosphere/mathpd/ex1.html");
        final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
        //final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
        Path filePath = new Path(filename);
        TextInputFormat inp = new TextInputFormat(filePath);
        inp.setCharsetName("UTF-8");
        inp.setDelimiter("</ARXIVFILESPLIT>");
        final DataSource<String> source = env.readFile(inp, filename);
//		DataSet<Tuple2<String, Integer>> counts =
//				// split up the lines in pairs (2-tuples) containing: (word,1)
//				source.flatMap(new LineSplitter())
//				// group by the tuple field "0" and sum up tuple field "1"
//				.groupBy(0)
//				.sum(1);

        // execute and print result
        //counts.print();
        source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
        env.execute();
    }
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:37,代码来源:FlinkPd.java

示例3: readWikiDump

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readWikiDump(FlinkPdCommandConfig config, ExecutionEnvironment env) {
    Path filePath = new Path(config.getDataset());
    TextInputFormat inp = new TextInputFormat(filePath);
    inp.setCharsetName("UTF-8");
    inp.setDelimiter("</ARXIVFILESPLIT>");
    return env.readFile(inp, config.getDataset());
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:8,代码来源:FlinkPd.java

示例4: readRefs

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readRefs(FlinkPdCommandConfig config, ExecutionEnvironment env) {
    Path filePath = new Path(config.getRef());
    TextInputFormat inp = new TextInputFormat(filePath);
    inp.setCharsetName("UTF-8");
    inp.setDelimiter("</ARXIVFILESPLIT>");
    return env.readFile(inp, config.getRef());
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:8,代码来源:FlinkPd.java

示例5: readPreprocessedFile

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static DataSource<String> readPreprocessedFile(String pathname, ExecutionEnvironment env) {
    Path filePath = new Path(pathname);
    TextInputFormat inp = new TextInputFormat(filePath);
    inp.setCharsetName("UTF-8");
    // env.read
    return env.readFile(inp, pathname);
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:8,代码来源:FlinkPd.java

示例6: main

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		//ClassLoader classLoader = WordCount.class.getClassLoader();
		//URL resource = classLoader.getResource("ex1.html");
		//final String filename = URLDecoder.decode(resource.getFile(), "UTF-8");
		final String filename = URLDecoder.decode("file:/C:/git/flink/readFileTest/target/classes/ex1.html", "UTF-8");
		Path filePath = new Path(filename);
		TextInputFormat inp = new TextInputFormat(filePath);
		inp.setCharsetName("UTF-8");
		inp.setDelimiter("</ARXIVFILESPLIT>");
		final DataSource<String> source = env.readFile(inp, filename);
//		DataSet<Tuple2<String, Integer>> counts =
//				// split up the lines in pairs (2-tuples) containing: (word,1)
//				source.flatMap(new LineSplitter())
//				// group by the tuple field "0" and sum up tuple field "1"
//				.groupBy(0)
//				.sum(1);

		// execute and print result
		//counts.print();
		source.writeAsText("test", FileSystem.WriteMode.OVERWRITE);
		env.execute();

	}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:29,代码来源:WordCount.java

示例7: readTextFile

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @return A {@link DataSet} that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath, String charsetName) {
	Preconditions.checkNotNull(filePath, "The file path may not be null.");

	TextInputFormat format = new TextInputFormat(new Path(filePath));
	format.setCharsetName(charsetName);
	return new DataSource<>(this, format, BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:16,代码来源:ExecutionEnvironment.java

示例8: readTextFile

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
 * Reads the given file line-by-line and creates a data stream that contains a string with the
 * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be
 * used to read the files.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed,
 * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
 * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point.
 *
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param charsetName
 * 		The name of the character set used to read the file
 * @return The data stream that represents the data read from the given file as text lines
 */
public DataStreamSource<String> readTextFile(String filePath, String charsetName) {
	Preconditions.checkNotNull(filePath, "The file path must not be null.");
	Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty.");

	TextInputFormat format = new TextInputFormat(new Path(filePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());
	TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO;
	format.setCharsetName(charsetName);

	return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:29,代码来源:StreamExecutionEnvironment.java

示例9: readTextFile

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
 * Creates a DataSet that represents the Strings produced by reading the given file line wise.
 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 * 
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath, String charsetName) {
	Validate.notNull(filePath, "The file path may not be null.");

	TextInputFormat format = new TextInputFormat(new Path(filePath));
	format.setCharsetName(charsetName);
	return new DataSource<String>(this, format, BasicTypeInfo.STRING_TYPE_INFO );
}
 
开发者ID:citlab,项目名称:vs.msc.ws14,代码行数:16,代码来源:ExecutionEnvironment.java


注:本文中的org.apache.flink.api.java.io.TextInputFormat.setCharsetName方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。