本文整理汇总了Java中org.apache.flink.core.fs.FileInputSplit类的典型用法代码示例。如果您正苦于以下问题:Java FileInputSplit类的具体用法?Java FileInputSplit怎么用?Java FileInputSplit使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
FileInputSplit类属于org.apache.flink.core.fs包,在下文中一共展示了FileInputSplit类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testReadCustomDelimiter
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadCustomDelimiter() throws IOException {
final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
final FileInputSplit split = createTempFile(myString);
final Configuration parameters = new Configuration();
format.setDelimiter("$$$");
format.configure(parameters);
format.open(split);
String first = format.nextRecord(null);
assertNotNull(first);
assertEquals("my key|my val", first);
String second = format.nextRecord(null);
assertNotNull(second);
assertEquals("my key2\n$$ctd.$$|my value2", second);
assertNull(format.nextRecord(null));
assertTrue(format.reachedEnd());
}
示例2: testCsvWithNullEdge
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCsvWithNullEdge() throws Exception {
/*
Test fromCsvReader with edge and vertex path and nullvalue for edge
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String vertexFileContent = "1,one\n" +
"2,two\n" +
"3,three\n";
final String edgeFileContent = "1,2\n" +
"3,2\n" +
"3,1\n";
final FileInputSplit split = createTempFile(vertexFileContent);
final FileInputSplit edgeSplit = createTempFile(edgeFileContent);
Graph<Long, String, NullValue> graph = Graph.fromCsvReader(split.getPath().toString(), edgeSplit.getPath().toString(),
env).vertexTypes(Long.class, String.class);
List<Triplet<Long, String, NullValue>> result = graph.getTriplets().collect();
expectedResult = "1,2,one,two,(null)\n" +
"3,2,three,two,(null)\n" +
"3,1,three,one,(null)\n";
compareResultAsTuples(result, expectedResult);
}
示例3: getInputSplitsSortedByModTime
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
* Creates the input splits to be forwarded to the downstream tasks of the
* {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
* being forwarded and only splits belonging to files in the {@code eligibleFiles}
* list will be processed.
* @param eligibleFiles The files to process.
*/
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
Map<Path, FileStatus> eligibleFiles) throws IOException {
Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
if (eligibleFiles.isEmpty()) {
return splitsByModTime;
}
for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
FileStatus fileStatus = eligibleFiles.get(split.getPath());
if (fileStatus != null) {
Long modTime = fileStatus.getModificationTime();
List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
if (splitsToForward == null) {
splitsToForward = new ArrayList<>();
splitsByModTime.put(modTime, splitsToForward);
}
splitsToForward.add(new TimestampedFileInputSplit(
modTime, split.getSplitNumber(), split.getPath(),
split.getStart(), split.getLength(), split.getHostnames()));
}
}
return splitsByModTime;
}
示例4: testCreateWithCsvFile
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCreateWithCsvFile() throws Exception {
/*
* Test with two Csv files one with Vertex Data and one with Edges data
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent = "1,1\n" +
"2,2\n" +
"3,3\n";
final FileInputSplit split = createTempFile(fileContent);
final String fileContent2 = "1,2,ot\n" +
"3,2,tt\n" +
"3,1,to\n";
final FileInputSplit split2 = createTempFile(fileContent2);
Graph<Long, Long, String> graph = Graph.fromCsvReader(split.getPath().toString(), split2.getPath().toString(), env)
.types(Long.class, Long.class, String.class);
List<Triplet<Long, Long, String>> result = graph.getTriplets().collect();
expectedResult = "1,2,1,2,ot\n" +
"3,2,3,2,tt\n" +
"3,1,3,1,to\n";
compareResultAsTuples(result, expectedResult);
}
示例5: testCsvWithConstantValueMapper
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCsvWithConstantValueMapper() throws Exception {
/*
*Test fromCsvReader with edge path and a mapper that assigns a Double constant as value
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent = "1,2,ot\n" +
"3,2,tt\n" +
"3,1,to\n";
final FileInputSplit split = createTempFile(fileContent);
Graph<Long, Double, String> graph = Graph.fromCsvReader(split.getPath().toString(),
new AssignDoubleValueMapper(), env).types(Long.class, Double.class, String.class);
List<Triplet<Long, Double, String>> result = graph.getTriplets().collect();
//graph.getTriplets().writeAsCsv(resultPath);
expectedResult = "1,2,0.1,0.1,ot\n" + "3,1,0.1,0.1,to\n" + "3,2,0.1,0.1,tt\n";
compareResultAsTuples(result, expectedResult);
}
示例6: checkJoinWithReplicatedSourceInputBehindMapChangingparallelism
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source behind map and changing parallelism.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例7: getOffsetAndLengthForSplit
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
private Tuple2<Long, Long> getOffsetAndLengthForSplit(FileInputSplit split, List<StripeInformation> stripes) {
long splitStart = split.getStart();
long splitEnd = splitStart + split.getLength();
long readStart = Long.MAX_VALUE;
long readEnd = Long.MIN_VALUE;
for (StripeInformation s : stripes) {
if (splitStart <= s.getOffset() && s.getOffset() < splitEnd) {
// stripe starts in split, so it is included
readStart = Math.min(readStart, s.getOffset());
readEnd = Math.max(readEnd, s.getOffset() + s.getLength());
}
}
if (readStart < Long.MAX_VALUE) {
// at least one split is included
return Tuple2.of(readStart, readEnd - readStart);
} else {
return Tuple2.of(0L, 0L);
}
}
示例8: testNoNestedDirectoryTrue
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
* Test without nested directory and recursive.file.enumeration = true
*/
@Test
public void testNoNestedDirectoryTrue() {
try {
String filePath = TestFileUtils.createTempFile("foo");
this.format.setFilePath(new Path(filePath));
this.config.setBoolean("recursive.file.enumeration", true);
format.configure(this.config);
FileInputSplit[] splits = format.createInputSplits(1);
Assert.assertEquals(1, splits.length);
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
示例9: testReadDecimalTypeFile
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadDecimalTypeFile() throws IOException{
rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());
FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
assertEquals(1, splits.length);
rowOrcInputFormat.openInputFormat();
rowOrcInputFormat.open(splits[0]);
assertFalse(rowOrcInputFormat.reachedEnd());
Row row = rowOrcInputFormat.nextRecord(null);
// validate first row
assertNotNull(row);
assertEquals(1, row.getArity());
assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));
// check correct number of rows
long cnt = 1;
while (!rowOrcInputFormat.reachedEnd()) {
assertNotNull(rowOrcInputFormat.nextRecord(null));
cnt++;
}
assertEquals(6000, cnt);
}
示例10: initReader
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
示例11: testReadFileInSplits
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadFileInSplits() throws IOException{
rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
rowOrcInputFormat.selectFields(0, 1);
FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(4);
assertEquals(4, splits.length);
rowOrcInputFormat.openInputFormat();
long cnt = 0;
// read all splits
for (FileInputSplit split : splits) {
// open split
rowOrcInputFormat.open(split);
// read and count all rows
while (!rowOrcInputFormat.reachedEnd()) {
assertNotNull(rowOrcInputFormat.nextRecord(null));
cnt++;
}
}
// check that all rows have been read
assertEquals(1920800, cnt);
}
示例12: testMultiCharDelimiter
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testMultiCharDelimiter() throws IOException {
final String myString = "www112xx1123yyy11123zzzzz1123";
final FileInputSplit split = createTempFile(myString);
final Configuration parameters = new Configuration();
format.setDelimiter("1123");
format.configure(parameters);
format.open(split);
String first = format.nextRecord(null);
assertNotNull(first);
assertEquals("www112xx", first);
String second = format.nextRecord(null);
assertNotNull(second);
assertEquals("yyy1", second);
String third = format.nextRecord(null);
assertNotNull(third);
assertEquals("zzzzz", third);
assertNull(format.nextRecord(null));
assertTrue(format.reachedEnd());
}
示例13: open
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Override
public void open(FileInputSplit split) throws IOException {
super.open(split);
pojoFields = new Field[pojoFieldNames.length];
Map<String, Field> allFields = new HashMap<String, Field>();
findAllFields(pojoTypeClass, allFields);
for (int i = 0; i < pojoFieldNames.length; i++) {
pojoFields[i] = allFields.get(pojoFieldNames[i]);
if (pojoFields[i] != null) {
pojoFields[i].setAccessible(true);
} else {
throw new RuntimeException("There is no field called \"" + pojoFieldNames[i] + "\" in " + pojoTypeClass.getName());
}
}
}
示例14: checkJoinWithReplicatedSourceInputChangingparallelism
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source and changing parallelism.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例15: testDelimiterOnBufferBoundary
import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testDelimiterOnBufferBoundary() throws IOException {
String[] records = new String[]{"1234567890<DEL?NO!>1234567890", "1234567890<DEL?NO!>1234567890", "<DEL?NO!>"};
String delimiter = "<DELIM>";
String fileContent = StringUtils.join(records, delimiter);
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setBufferSize(12);
format.setDelimiter(delimiter);
format.configure(parameters);
format.open(split);
for (String record : records) {
String value = format.nextRecord(null);
assertEquals(record, value);
}
assertNull(format.nextRecord(null));
assertTrue(format.reachedEnd());
format.close();
}