当前位置: 首页>>代码示例>>Java>>正文


Java FileInputSplit类代码示例

本文整理汇总了Java中org.apache.flink.core.fs.FileInputSplit的典型用法代码示例。如果您正苦于以下问题:Java FileInputSplit类的具体用法?Java FileInputSplit怎么用?Java FileInputSplit使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


FileInputSplit类属于org.apache.flink.core.fs包,在下文中一共展示了FileInputSplit类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testReadCustomDelimiter

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadCustomDelimiter() throws IOException {
	final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();

	format.setDelimiter("$$$");
	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	assertNotNull(first);
	assertEquals("my key|my val", first);

	String second = format.nextRecord(null);
	assertNotNull(second);
	assertEquals("my key2\n$$ctd.$$|my value2", second);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:23,代码来源:DelimitedInputFormatTest.java

示例2: testCsvWithNullEdge

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCsvWithNullEdge() throws Exception {
	/*
	Test fromCsvReader with edge and vertex path and nullvalue for edge
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	final String vertexFileContent = "1,one\n" +
			"2,two\n" +
			"3,three\n";
	final String edgeFileContent = "1,2\n" +
			"3,2\n" +
			"3,1\n";
	final FileInputSplit split = createTempFile(vertexFileContent);
	final FileInputSplit edgeSplit = createTempFile(edgeFileContent);

	Graph<Long, String, NullValue> graph = Graph.fromCsvReader(split.getPath().toString(), edgeSplit.getPath().toString(),
			env).vertexTypes(Long.class, String.class);

	List<Triplet<Long, String, NullValue>> result = graph.getTriplets().collect();

	expectedResult = "1,2,one,two,(null)\n" +
			"3,2,three,two,(null)\n" +
			"3,1,three,one,(null)\n";

	compareResultAsTuples(result, expectedResult);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:27,代码来源:GraphCreationWithCsvITCase.java

示例3: getInputSplitsSortedByModTime

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:32,代码来源:ContinuousFileMonitoringFunction.java

示例4: testCreateWithCsvFile

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCreateWithCsvFile() throws Exception {
	/*
	 * Test with two Csv files one with Vertex Data and one with Edges data
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	final String fileContent = "1,1\n" +
			"2,2\n" +
			"3,3\n";
	final FileInputSplit split = createTempFile(fileContent);
	final String fileContent2 = "1,2,ot\n" +
			"3,2,tt\n" +
			"3,1,to\n";
	final FileInputSplit split2 = createTempFile(fileContent2);

	Graph<Long, Long, String> graph = Graph.fromCsvReader(split.getPath().toString(), split2.getPath().toString(), env)
			.types(Long.class, Long.class, String.class);

	List<Triplet<Long, Long, String>> result = graph.getTriplets().collect();

	expectedResult = "1,2,1,2,ot\n" +
			"3,2,3,2,tt\n" +
			"3,1,3,1,to\n";

	compareResultAsTuples(result, expectedResult);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:27,代码来源:GraphCreationWithCsvITCase.java

示例5: testCsvWithConstantValueMapper

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testCsvWithConstantValueMapper() throws Exception {
	/*
	*Test fromCsvReader with edge path and a mapper that assigns a Double constant as value
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	final String fileContent = "1,2,ot\n" +
			"3,2,tt\n" +
			"3,1,to\n";
	final FileInputSplit split = createTempFile(fileContent);

	Graph<Long, Double, String> graph = Graph.fromCsvReader(split.getPath().toString(),
			new AssignDoubleValueMapper(), env).types(Long.class, Double.class, String.class);

	List<Triplet<Long, Double, String>> result = graph.getTriplets().collect();
	//graph.getTriplets().writeAsCsv(resultPath);
	expectedResult = "1,2,0.1,0.1,ot\n" + "3,1,0.1,0.1,to\n" + "3,2,0.1,0.1,tt\n";
	compareResultAsTuples(result, expectedResult);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:GraphCreationWithCsvITCase.java

示例6: checkJoinWithReplicatedSourceInputBehindMapChangingparallelism

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
 * Tests compiler fail for join program with replicated data source behind map and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

}
 
开发者ID:axbaretto,项目名称:flink,代码行数:28,代码来源:ReplicatingDataSourceTest.java

示例7: getOffsetAndLengthForSplit

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
private Tuple2<Long, Long> getOffsetAndLengthForSplit(FileInputSplit split, List<StripeInformation> stripes) {
	long splitStart = split.getStart();
	long splitEnd = splitStart + split.getLength();

	long readStart = Long.MAX_VALUE;
	long readEnd = Long.MIN_VALUE;

	for (StripeInformation s : stripes) {
		if (splitStart <= s.getOffset() && s.getOffset() < splitEnd) {
			// stripe starts in split, so it is included
			readStart = Math.min(readStart, s.getOffset());
			readEnd = Math.max(readEnd, s.getOffset() + s.getLength());
		}
	}

	if (readStart < Long.MAX_VALUE) {
		// at least one split is included
		return Tuple2.of(readStart, readEnd - readStart);
	} else {
		return Tuple2.of(0L, 0L);
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:23,代码来源:OrcRowInputFormat.java

示例8: testNoNestedDirectoryTrue

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
 * Test without nested directory and recursive.file.enumeration = true
 */
@Test
public void testNoNestedDirectoryTrue() {
	try {
		String filePath = TestFileUtils.createTempFile("foo");

		this.format.setFilePath(new Path(filePath));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(1, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:EnumerateNestedFilesTest.java

示例9: testReadDecimalTypeFile

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadDecimalTypeFile() throws IOException{
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(1, row.getArity());
	assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));

	// check correct number of rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	assertEquals(6000, cnt);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:OrcRowInputFormatTest.java

示例10: initReader

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:25,代码来源:AvroInputFormat.java

示例11: testReadFileInSplits

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testReadFileInSplits() throws IOException{

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(4);
	assertEquals(4, splits.length);
	rowOrcInputFormat.openInputFormat();

	long cnt = 0;
	// read all splits
	for (FileInputSplit split : splits) {

		// open split
		rowOrcInputFormat.open(split);
		// read and count all rows
		while (!rowOrcInputFormat.reachedEnd()) {
			assertNotNull(rowOrcInputFormat.nextRecord(null));
			cnt++;
		}
	}
	// check that all rows have been read
	assertEquals(1920800, cnt);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:OrcRowInputFormatTest.java

示例12: testMultiCharDelimiter

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testMultiCharDelimiter() throws IOException {
	final String myString = "www112xx1123yyy11123zzzzz1123";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();

	format.setDelimiter("1123");
	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	assertNotNull(first);
	assertEquals("www112xx", first);

	String second = format.nextRecord(null);
	assertNotNull(second);
	assertEquals("yyy1", second);

	String third = format.nextRecord(null);
	assertNotNull(third);
	assertEquals("zzzzz", third);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:27,代码来源:DelimitedInputFormatTest.java

示例13: open

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	pojoFields = new Field[pojoFieldNames.length];

	Map<String, Field> allFields = new HashMap<String, Field>();

	findAllFields(pojoTypeClass, allFields);

	for (int i = 0; i < pojoFieldNames.length; i++) {
		pojoFields[i] = allFields.get(pojoFieldNames[i]);

		if (pojoFields[i] != null) {
			pojoFields[i].setAccessible(true);
		} else {
			throw new RuntimeException("There is no field called \"" + pojoFieldNames[i] + "\" in " + pojoTypeClass.getName());
		}
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:PojoCsvInputFormat.java

示例14: checkJoinWithReplicatedSourceInputChangingparallelism

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
/**
 * Tests compiler fail for join program with replicated data source and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:ReplicatingDataSourceTest.java

示例15: testDelimiterOnBufferBoundary

import org.apache.flink.core.fs.FileInputSplit; //导入依赖的package包/类
@Test
public void testDelimiterOnBufferBoundary() throws IOException {

	String[] records = new String[]{"1234567890<DEL?NO!>1234567890", "1234567890<DEL?NO!>1234567890", "<DEL?NO!>"};
	String delimiter = "<DELIM>";
	String fileContent = StringUtils.join(records, delimiter);


	final FileInputSplit split = createTempFile(fileContent);
	final Configuration parameters = new Configuration();

	format.setBufferSize(12);
	format.setDelimiter(delimiter);
	format.configure(parameters);
	format.open(split);

	for (String record : records) {
		String value = format.nextRecord(null);
		assertEquals(record, value);
	}

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());

	format.close();
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:27,代码来源:DelimitedInputFormatTest.java


注:本文中的org.apache.flink.core.fs.FileInputSplit类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。