本文整理汇总了Java中org.apache.beam.sdk.io.BoundedSource.BoundedReader类的典型用法代码示例。如果您正苦于以下问题:Java BoundedReader类的具体用法?Java BoundedReader怎么用?Java BoundedReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BoundedReader类属于org.apache.beam.sdk.io.BoundedSource包,在下文中一共展示了BoundedReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testToUnsplittableSource
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testToUnsplittableSource() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
BoundedSource<Long> baseSource = CountingSource.upTo(100);
BoundedSource<Long> unsplittableSource = SourceTestUtils.toUnsplittableSource(baseSource);
List<?> splits = unsplittableSource.split(1, options);
assertEquals(splits.size(), 1);
assertEquals(splits.get(0), unsplittableSource);
BoundedReader<Long> unsplittableReader = unsplittableSource.createReader(options);
assertEquals(0, unsplittableReader.getFractionConsumed(), 1e-15);
Set<Long> expected = Sets.newHashSet(SourceTestUtils.readFromSource(baseSource, options));
Set<Long> actual = Sets.newHashSet();
actual.addAll(SourceTestUtils.readNItemsFromUnstartedReader(unsplittableReader, 40));
assertNull(unsplittableReader.splitAtFraction(0.5));
actual.addAll(SourceTestUtils.readRemainingFromReader(unsplittableReader, true /* started */));
assertEquals(1, unsplittableReader.getFractionConsumed(), 1e-15);
assertEquals(100, actual.size());
assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual));
}
示例2: verifyReadContents
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
private void verifyReadContents(byte[] expected, File inputFile,
@Nullable DecompressingChannelFactory decompressionFactory) throws IOException {
CompressedSource<Byte> source =
CompressedSource.from(new ByteSource(inputFile.toPath().toString(), 1));
if (decompressionFactory != null) {
source = source.withDecompression(decompressionFactory);
}
List<KV<Long, Byte>> actualOutput = Lists.newArrayList();
try (BoundedReader<Byte> reader = source.createReader(PipelineOptionsFactory.create())) {
for (boolean more = reader.start(); more; more = reader.advance()) {
actualOutput.add(KV.of(reader.getCurrentTimestamp().getMillis(), reader.getCurrent()));
}
}
List<KV<Long, Byte>> expectedOutput = Lists.newArrayList();
for (int i = 0; i < expected.length; i++) {
expectedOutput.add(KV.of((long) i, expected[i]));
}
assertEquals(expectedOutput, actualOutput);
}
示例3: testEmptyGzipProgress
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testEmptyGzipProgress() throws IOException {
File tmpFile = tmpFolder.newFile("empty.gz");
String filename = tmpFile.toPath().toString();
writeFile(tmpFile, new byte[0], CompressionMode.GZIP);
PipelineOptions options = PipelineOptionsFactory.create();
CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
assertThat(readerOrig, instanceOf(CompressedReader.class));
CompressedReader<Byte> reader = (CompressedReader<Byte>) readerOrig;
// before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// confirm empty
assertFalse(reader.start());
// after reading empty source
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
示例4: testProgressEmptySource
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testProgressEmptySource() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 100);
try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
// before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// confirm empty
assertFalse(reader.start());
// after reading empty source
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
示例5: testGetProgressFromUnstartedReader
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testGetProgressFromUnstartedReader() throws Exception {
List<FixedRecord> records = createFixedRecords(DEFAULT_RECORD_COUNT);
String filename = generateTestFile("tmp.avro", records, SyncBehavior.SYNC_DEFAULT, 1000,
AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
File file = new File(filename);
AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
try (BoundedSource.BoundedReader<FixedRecord> reader = source.createReader(null)) {
assertEquals(Double.valueOf(0.0), reader.getFractionConsumed());
}
List<? extends BoundedSource<FixedRecord>> splits =
source.split(file.length() / 3, null);
for (BoundedSource<FixedRecord> subSource : splits) {
try (BoundedSource.BoundedReader<FixedRecord> reader = subSource.createReader(null)) {
assertEquals(Double.valueOf(0.0), reader.getFractionConsumed());
}
}
}
示例6: testProgressEmptySource
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testProgressEmptySource() throws Exception {
// 0 records, 20 per block.
List<FixedRecord> records = Collections.emptyList();
String filename = generateTestFile("tmp.avro", records, SyncBehavior.SYNC_REGULAR, 2,
AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
try (BoundedSource.BoundedReader<FixedRecord> readerOrig = source.createReader(null)) {
assertThat(readerOrig, Matchers.instanceOf(BlockBasedReader.class));
BlockBasedReader<FixedRecord> reader = (BlockBasedReader<FixedRecord>) readerOrig;
// before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// confirm empty
assertFalse(reader.start());
// after reading empty source
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
}
}
示例7: testProgress
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testProgress() throws IOException {
final int numRecords = 5;
@SuppressWarnings("deprecation") // testing CountingSource
BoundedSource<Long> source = CountingSource.upTo(numRecords);
try (BoundedReader<Long> reader = source.createReader(PipelineOptionsFactory.create())) {
// Check preconditions before starting. Note that CountingReader can always give an accurate
// remaining parallelism.
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(numRecords, reader.getSplitPointsRemaining());
assertTrue(reader.start());
int i = 0;
do {
assertEquals(i, reader.getSplitPointsConsumed());
assertEquals(numRecords - i, reader.getSplitPointsRemaining());
++i;
} while (reader.advance());
assertEquals(numRecords, i); // exactly numRecords calls to advance()
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(numRecords, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
示例8: testReadersStartWhenZeroRecords
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
/**
* This test validates behavior of
* {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if
* InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero
* records.
*/
@Test
public void testReadersStartWhenZeroRecords() throws Exception {
InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
Mockito.when(
mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
Mockito.any(TaskAttemptContext.class))).thenReturn(mockReader);
Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
new HadoopInputFormatBoundedSource<Text, Employee>(
serConf,
WritableCoder.of(Text.class),
AvroCoder.of(Employee.class),
null, // No key translation required.
null, // No value translation required.
new SerializableSplit(mockInputSplit));
boundedSource.setInputFormatObj(mockInputFormat);
BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
assertEquals(false, reader.start());
assertEquals(Double.valueOf(1), reader.getFractionConsumed());
reader.close();
}
示例9: testReaderAndParentSourceReadsSameData
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
/**
* This test validates that reader and its parent source reads the same records.
*/
@Test
public void testReaderAndParentSourceReadsSameData() throws Exception {
InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
new HadoopInputFormatBoundedSource<Text, Employee>(
serConf,
WritableCoder.of(Text.class),
AvroCoder.of(Employee.class),
null, // No key translation required.
null, // No value translation required.
new SerializableSplit(mockInputSplit));
BoundedReader<KV<Text, Employee>> reader = boundedSource
.createReader(p.getOptions());
SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(reader, p.getOptions());
}
示例10: testGetCurrentSourceFunction
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
/**
* This test verifies that the method
* {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource()
* getCurrentSource()} returns correct source object.
*/
@Test
public void testGetCurrentSourceFunction() throws Exception {
SerializableSplit split = new SerializableSplit();
BoundedSource<KV<Text, Employee>> source =
new HadoopInputFormatBoundedSource<Text, Employee>(
serConf,
WritableCoder.of(Text.class),
AvroCoder.of(Employee.class),
null, // No key translation required.
null, // No value translation required.
split);
BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions());
BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource();
assertEquals(hifSource, source);
}
示例11: readFromSource
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
/**
* Reads all elements from the given {@link BoundedSource}.
*/
public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
throws IOException {
try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
return readFromUnstartedReader(reader);
}
}
示例12: assertUnstartedReaderReadsSameAsItsSource
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
/**
* Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
* records as the reader.
*/
public static <T> void assertUnstartedReaderReadsSameAsItsSource(
BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
Coder<T> coder = reader.getCurrentSource().getOutputCoder();
List<T> expected = readFromUnstartedReader(reader);
List<T> actual = readFromSource(reader.getCurrentSource(), options);
List<ReadableStructuralValue<T>> expectedStructural = createStructuralValues(coder, expected);
List<ReadableStructuralValue<T>> actualStructural = createStructuralValues(coder, actual);
assertThat(actualStructural, containsInAnyOrder(expectedStructural.toArray()));
}
示例13: testGzipProgress
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testGzipProgress() throws IOException {
int numRecords = 3;
File tmpFile = tmpFolder.newFile("nonempty.gz");
String filename = tmpFile.toPath().toString();
writeFile(tmpFile, new byte[numRecords], CompressionMode.GZIP);
PipelineOptions options = PipelineOptionsFactory.create();
CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
assertThat(readerOrig, instanceOf(CompressedReader.class));
CompressedReader<Byte> reader = (CompressedReader<Byte>) readerOrig;
// before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// confirm has three records
for (int i = 0; i < numRecords; ++i) {
if (i == 0) {
assertTrue(reader.start());
} else {
assertTrue(reader.advance());
}
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
}
assertFalse(reader.advance());
// after reading empty source
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(1, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
示例14: testUnsplittable
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testUnsplittable() throws IOException {
String baseName = "test-input";
File compressedFile = tmpFolder.newFile(baseName + ".gz");
byte[] input = generateInput(10000);
writeFile(compressedFile, input, CompressionMode.GZIP);
CompressedSource<Byte> source =
CompressedSource.from(new ByteSource(compressedFile.getPath(), 1));
List<Byte> expected = Lists.newArrayList();
for (byte i : input) {
expected.add(i);
}
PipelineOptions options = PipelineOptionsFactory.create();
BoundedReader<Byte> reader = source.createReader(options);
List<Byte> actual = Lists.newArrayList();
for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) {
actual.add(reader.getCurrent());
// checkpoint every 9 elements
if (actual.size() % 9 == 0) {
Double fractionConsumed = reader.getFractionConsumed();
assertNotNull(fractionConsumed);
assertNull(reader.splitAtFraction(fractionConsumed));
}
}
assertEquals(expected.size(), actual.size());
assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual));
}
示例15: testSplittableProgress
import org.apache.beam.sdk.io.BoundedSource.BoundedReader; //导入依赖的package包/类
@Test
public void testSplittableProgress() throws IOException {
File tmpFile = tmpFolder.newFile("nonempty.txt");
String filename = tmpFile.toPath().toString();
Files.write(new byte[2], tmpFile);
PipelineOptions options = PipelineOptionsFactory.create();
CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
assertThat(readerOrig, not(instanceOf(CompressedReader.class)));
assertThat(readerOrig, instanceOf(FileBasedReader.class));
FileBasedReader<Byte> reader = (FileBasedReader<Byte>) readerOrig;
// Check preconditions before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// First record: none consumed, unknown remaining.
assertTrue(reader.start());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Second record: 1 consumed, know that we're on the last record.
assertTrue(reader.advance());
assertEquals(1, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// Confirm empty and check post-conditions
assertFalse(reader.advance());
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(2, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}