本文整理汇总了Java中org.apache.flink.api.java.io.TextInputFormat.setFilesFilter方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setFilesFilter方法的具体用法?Java TextInputFormat.setFilesFilter怎么用?Java TextInputFormat.setFilesFilter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.io.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.setFilesFilter方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
public static void main(String... args) throws Exception {
File txtFile = new File("/tmp/test/file.txt");
File csvFile = new File("/tmp/test/file.csv");
File binFile = new File("/tmp/test/file.bin");
writeToFile(txtFile, "txt");
writeToFile(csvFile, "csv");
writeToFile(binFile, "bin");
final ExecutionEnvironment env =
ExecutionEnvironment.getExecutionEnvironment();
final TextInputFormat format = new TextInputFormat(new Path("/tmp/test"));
GlobFilePathFilter filesFilter = new GlobFilePathFilter(
Collections.singletonList("**"),
Arrays.asList("**/file.bin")
);
System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields()));
format.setFilesFilter(filesFilter);
DataSet<String> result = env.readFile(format, "/tmp");
result.writeAsText("/temp/out");
env.execute("GlobFilePathFilter-Test");
}
示例2: testProgram
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Override
public void testProgram(StreamExecutionEnvironment env) {
// set the restart strategy.
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
env.enableCheckpointing(10);
// create and start the file creating thread.
fc = new FileCreator();
fc.start();
// create the monitoring source along with the necessary readers.
TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
DataStream<String> inputStream = env.readFile(format, localFsURI,
FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);
TestingSinkFunction sink = new TestingSinkFunction();
inputStream.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
out.collect(value);
}
}).addSink(sink).setParallelism(1);
}
示例3: testSortingOnModTime
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testSortingOnModTime() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
final long[] modTimes = new long[NO_OF_FILES];
final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file =
createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
Thread.sleep(400);
filesCreated[i] = file.f0;
modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
// this is just to verify that all splits have been forwarded later.
FileInputSplit[] splits = format.createInputSplits(1);
ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format,
FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
Assert.assertEquals(splits.length, context.getCounter());
// delete the created files.
for (int i = 0; i < NO_OF_FILES; i++) {
hdfs.delete(filesCreated[i], false);
}
}
示例4: testProgram
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
/*
* This test checks the interplay between the monitor and the reader
* and also the failExternally() functionality. To test the latter we
* set the parallelism to 1 so that we have the chaining between the sink,
* which throws the SuccessException to signal the end of the test, and the
* reader.
* */
FileCreator fileCreator = new FileCreator(INTERVAL);
Thread t = new Thread(fileCreator);
t.start();
TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
format.setFilePath(hdfsURI);
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
format.setFilesFilter(FilePathFilter.createDefaultFilter());
ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format, hdfsURI,
FileProcessingMode.PROCESS_CONTINUOUSLY,
env.getParallelism(), INTERVAL);
TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<String, ?> reader = new ContinuousFileReaderOperator<>(format);
TestingSinkFunction sink = new TestingSinkFunction();
DataStream<FileInputSplit> splits = env.addSource(monitoringFunction);
splits.transform("FileSplitReader", typeInfo, reader).addSink(sink).setParallelism(1);
env.execute();
} catch (Exception e) {
Throwable th = e;
int depth = 0;
for (; depth < 20; depth++) {
if (th instanceof SuccessException) {
try {
postSubmit();
} catch (Exception e1) {
e1.printStackTrace();
}
return;
} else if (th.getCause() != null) {
th = th.getCause();
} else {
break;
}
}
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例5: testProcessOnce
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testProcessOnce() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
final OneShotLatch latch = new OneShotLatch();
// create a single file in the directory
Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
Assert.assertTrue(hdfs.exists(bootstrap.f0));
// the source is supposed to read only this file.
final Set<String> filesToBeRead = new TreeSet<>();
filesToBeRead.add(bootstrap.f0.getName());
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
final ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format,
FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction);
final Thread t = new Thread() {
@Override
public void run() {
try {
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
// we would never arrive here if we were in
// PROCESS_CONTINUOUSLY mode.
// this will trigger the latch
context.close();
} catch (Exception e) {
Assert.fail(e.getMessage());
}
}
};
t.start();
if (!latch.isTriggered()) {
latch.await();
}
// create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY
final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile =
createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
filesCreated[i] = ignoredFile.f0;
}
// wait until the monitoring thread exits
t.join();
Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
// finally delete the files created for the test.
hdfs.delete(bootstrap.f0, false);
for (org.apache.hadoop.fs.Path path: filesCreated) {
hdfs.delete(path, false);
}
}
示例6: testProcessContinuously
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testProcessContinuously() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
final OneShotLatch latch = new OneShotLatch();
// create a single file in the directory
Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
Assert.assertTrue(hdfs.exists(bootstrap.f0));
final Set<String> filesToBeRead = new TreeSet<>();
filesToBeRead.add(bootstrap.f0.getName());
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
final ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format,
FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES
final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch,
monitoringFunction, 1, totalNoOfFilesToBeRead);
final Thread t = new Thread() {
@Override
public void run() {
try {
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
} catch (Exception e) {
Assert.fail(e.getMessage());
}
}
};
t.start();
if (!latch.isTriggered()) {
latch.await();
}
// create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY
final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file =
createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
filesCreated[i] = file.f0;
filesToBeRead.add(file.f0.getName());
}
// wait until the monitoring thread exits
t.join();
Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
// finally delete the files created for the test.
hdfs.delete(bootstrap.f0, false);
for (org.apache.hadoop.fs.Path path: filesCreated) {
hdfs.delete(path, false);
}
}
示例7: readTextFile
import org.apache.flink.api.java.io.TextInputFormat; //导入方法依赖的package包/类
/**
* Reads the given file line-by-line and creates a data stream that contains a string with the
* contents of each such line. The {@link java.nio.charset.Charset} with the given name will be
* used to read the files.
*
* <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the
* {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed,
* forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
* and exits, without waiting for the readers to finish reading. This implies that no more checkpoint
* barriers are going to be forwarded after the source exits, thus having no checkpoints after that point.
*
* @param filePath
* The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
* @param charsetName
* The name of the character set used to read the file
* @return The data stream that represents the data read from the given file as text lines
*/
public DataStreamSource<String> readTextFile(String filePath, String charsetName) {
Preconditions.checkNotNull(filePath, "The file path must not be null.");
Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty.");
TextInputFormat format = new TextInputFormat(new Path(filePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO;
format.setCharsetName(charsetName);
return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo);
}