当前位置: 首页>>代码示例>>Java>>正文


Java TextInputFormat.setFilesFilter方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.io.TextInputFormat.setFilesFilter方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setFilesFilter方法的具体用法?Java TextInputFormat.setFilesFilter怎么用?Java TextInputFormat.setFilesFilter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.io.TextInputFormat的用法示例。


在下文中一共展示了TextInputFormat.setFilesFilter方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String... args) throws  Exception {
    File txtFile = new File("/tmp/test/file.txt");
    File csvFile = new File("/tmp/test/file.csv");
    File binFile = new File("/tmp/test/file.bin");

    writeToFile(txtFile, "txt");
    writeToFile(csvFile, "csv");
    writeToFile(binFile, "bin");

    final ExecutionEnvironment env =
            ExecutionEnvironment.getExecutionEnvironment();
    final TextInputFormat format = new TextInputFormat(new Path("/tmp/test"));

    GlobFilePathFilter filesFilter = new GlobFilePathFilter(
            Collections.singletonList("**"),
            Arrays.asList("**/file.bin")
    );
    System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields()));
    format.setFilesFilter(filesFilter);

    DataSet<String> result = env.readFile(format, "/tmp");
    result.writeAsText("/temp/out");
    env.execute("GlobFilePathFilter-Test");
}
 
开发者ID:mushketyk,项目名称:flink-examples,代码行数:25,代码来源:GlobExample.java

示例2: testProgram

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// set the restart strategy.
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:28,代码来源:ContinuousFileProcessingCheckpointITCase.java

示例3: testSortingOnModTime

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testSortingOnModTime() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final long[] modTimes = new long[NO_OF_FILES];
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];

	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		Thread.sleep(400);

		filesCreated[i] = file.f0;
		modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// this is just to verify that all splits have been forwarded later.
	FileInputSplit[] splits = format.createInputSplits(1);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);

	ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);

	monitoringFunction.open(new Configuration());
	monitoringFunction.run(context);
	Assert.assertEquals(splits.length, context.getCounter());

	// delete the created files.
	for (int i = 0; i < NO_OF_FILES; i++) {
		hdfs.delete(filesCreated[i], false);
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:38,代码来源:ContinuousFileProcessingTest.java

示例4: testProgram

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	FileCreator fileCreator = new FileCreator(INTERVAL);
	Thread t = new Thread(fileCreator);
	t.start();

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);

		format.setFilesFilter(FilePathFilter.createDefaultFilter());
		ContinuousFileMonitoringFunction<String> monitoringFunction =
			new ContinuousFileMonitoringFunction<>(format, hdfsURI,
				FileProcessingMode.PROCESS_CONTINUOUSLY,
				env.getParallelism(), INTERVAL);

		TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
		ContinuousFileReaderOperator<String, ?> reader = new ContinuousFileReaderOperator<>(format);
		TestingSinkFunction sink = new TestingSinkFunction();

		DataStream<FileInputSplit> splits = env.addSource(monitoringFunction);
		splits.transform("FileSplitReader", typeInfo, reader).addSink(sink).setParallelism(1);
		env.execute();

	} catch (Exception e) {
		Throwable th = e;
		int depth = 0;

		for (; depth < 20; depth++) {
			if (th instanceof SuccessException) {
				try {
					postSubmit();
				} catch (Exception e1) {
					e1.printStackTrace();
				}
				return;
			} else if (th.getCause() != null) {
				th = th.getCause();
			} else {
				break;
			}
		}
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:59,代码来源:ContinuousFileMonitoringFunctionITCase.java

示例5: testProcessOnce

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testProcessOnce() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	// the source is supposed to read only this file.
	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);

	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction);

	final Thread t = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);

				// we would never arrive here if we were in
				// PROCESS_CONTINUOUSLY mode.

				// this will trigger the latch
				context.close();

			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = ignoredFile.f0;
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:68,代码来源:ContinuousFileProcessingTest.java

示例6: testProcessContinuously

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testProcessContinuously() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);

	final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES
	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch,
		monitoringFunction, 1, totalNoOfFilesToBeRead);

	final Thread t = new Thread() {

		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);
			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = file.f0;
		filesToBeRead.add(file.f0.getName());
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:64,代码来源:ContinuousFileProcessingTest.java

示例7: readTextFile

import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
 * Reads the given file line-by-line and creates a data stream that contains a string with the
 * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be
 * used to read the files.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed,
 * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
 * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point.
 *
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param charsetName
 * 		The name of the character set used to read the file
 * @return The data stream that represents the data read from the given file as text lines
 */
public DataStreamSource<String> readTextFile(String filePath, String charsetName) {
	Preconditions.checkNotNull(filePath, "The file path must not be null.");
	Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty.");

	TextInputFormat format = new TextInputFormat(new Path(filePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());
	TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO;
	format.setCharsetName(charsetName);

	return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:29,代码来源:StreamExecutionEnvironment.java


注:本文中的org.apache.flink.api.java.io.TextInputFormat.setFilesFilter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。