本文整理汇总了Java中org.apache.beam.sdk.transforms.Count类的典型用法代码示例。如果您正苦于以下问题:Java Count类的具体用法?Java Count怎么用?Java Count使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Count类属于org.apache.beam.sdk.transforms包,在下文中一共展示了Count类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: expand
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Override
public PCollection<KV<String, List<CompletionCandidate>>> expand(PCollection<String> input) {
PCollection<CompletionCandidate> candidates = input
// First count how often each token appears.
.apply(Count.<String>perElement())
// Map the KV outputs of Count into our own CompletionCandiate class.
.apply("CreateCompletionCandidates", ParDo.of(
new DoFn<KV<String, Long>, CompletionCandidate>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
}
}));
// Compute the top via either a flat or recursive algorithm.
if (recursive) {
return candidates
.apply(new ComputeTopRecursive(candidatesPerPrefix, 1))
.apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
} else {
return candidates
.apply(new ComputeTopFlat(candidatesPerPrefix, 1));
}
}
示例2: expand
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Override
public PCollection<TableRow> expand(PCollection<TableRow> rows) {
// row... => month...
PCollection<Integer> tornadoes = rows.apply(
ParDo.of(new ExtractTornadoesFn()));
// month... => <month,count>...
PCollection<KV<Integer, Long>> tornadoCounts =
tornadoes.apply(Count.<Integer>perElement());
// <month,count>... => row...
PCollection<TableRow> results = tornadoCounts.apply(
ParDo.of(new FormatCountsFn()));
return results;
}
示例3: testMergingCustomWindows
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Test
@Category({ValidatesRunner.class, UsesCustomWindowMerging.class})
public void testMergingCustomWindows() {
Instant startInstant = new Instant(0L);
List<TimestampedValue<String>> input = new ArrayList<>();
PCollection<String> inputCollection =
pipeline.apply(
Create.timestamped(
TimestampedValue.of("big", startInstant.plus(Duration.standardSeconds(10))),
TimestampedValue.of("small1", startInstant.plus(Duration.standardSeconds(20))),
// This one will be outside of bigWindow thus not merged
TimestampedValue.of("small2", startInstant.plus(Duration.standardSeconds(39)))));
PCollection<String> windowedCollection =
inputCollection.apply(Window.into(new CustomWindowFn<String>()));
PCollection<Long> count =
windowedCollection.apply(Combine.globally(Count.<String>combineFn()).withoutDefaults());
// "small1" and "big" elements merged into bigWindow "small2" not merged
// because timestamp is not in bigWindow
PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L);
pipeline.run();
}
示例4: testMergingCustomWindowsKeyedCollection
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Test
@Category({ValidatesRunner.class, UsesCustomWindowMerging.class})
public void testMergingCustomWindowsKeyedCollection() {
Instant startInstant = new Instant(0L);
PCollection<KV<Integer, String>> inputCollection =
pipeline.apply(
Create.timestamped(
TimestampedValue.of(
KV.of(0, "big"), startInstant.plus(Duration.standardSeconds(10))),
TimestampedValue.of(
KV.of(1, "small1"), startInstant.plus(Duration.standardSeconds(20))),
// This element is not contained within the bigWindow and not merged
TimestampedValue.of(
KV.of(2, "small2"), startInstant.plus(Duration.standardSeconds(39)))));
PCollection<KV<Integer, String>> windowedCollection =
inputCollection.apply(Window.into(new CustomWindowFn<KV<Integer, String>>()));
PCollection<Long> count =
windowedCollection.apply(
Combine.globally(Count.<KV<Integer, String>>combineFn()).withoutDefaults());
// "small1" and "big" elements merged into bigWindow "small2" not merged
// because it is not contained in bigWindow
PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L);
pipeline.run();
}
示例5: addCountingAsserts
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
public static void addCountingAsserts(PCollection<Long> input, long numElements) {
// Count == numElements
PAssert
.thatSingleton(input.apply("Count", Count.<Long>globally()))
.isEqualTo(numElements);
// Unique count == numElements
PAssert
.thatSingleton(input.apply(Distinct.<Long>create())
.apply("UniqueCount", Count.<Long>globally()))
.isEqualTo(numElements);
// Min == 0
PAssert
.thatSingleton(input.apply("Min", Min.<Long>globally()))
.isEqualTo(0L);
// Max == numElements-1
PAssert
.thatSingleton(input.apply("Max", Max.<Long>globally()))
.isEqualTo(numElements - 1);
}
示例6: testHifIOWithElastic
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* Test to read data from embedded Elasticsearch instance and verify whether data is read
* successfully.
*/
@Test
public void testHifIOWithElastic() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc";
Configuration conf = getConfiguration();
PCollection<KV<Text, LinkedMapWritable>> esData =
pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例7: testHifIOWithElastic
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* This test reads data from the Elasticsearch instance and verifies whether data is read
* successfully.
*/
@Test
public void testHifIOWithElastic() throws SecurityException, IOException {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
final long expectedRowCount = 1000L;
String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392";
Configuration conf = getConfiguration(options);
PCollection<KV<Text, LinkedMapWritable>> esData =
pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例8: testHIFReadForCassandra
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* This test reads data from the Cassandra instance and verifies if data is read successfully.
*/
@Test
public void testHIFReadForCassandra() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1a30ad400afe4ebf5fde75f5d2d95408";
Long expectedRecordsCount = 1000L;
Configuration conf = getConfiguration(options);
PCollection<KV<Long, String>> cassandraData = pipeline.apply(HadoopInputFormatIO
.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedRecordsCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例9: testHIFReadForCassandraQuery
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* This test reads data from the Cassandra instance based on query and verifies if data is read
* successfully.
*/
@Test
public void testHIFReadForCassandraQuery() {
String expectedHashCode = "7bead6d6385c5f4dd0524720cd320b49";
Long expectedNumRows = 1L;
Configuration conf = getConfiguration(options);
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE
+ " where token(y_id) > ? and token(y_id) <= ? "
+ "and field0 = 'user48:field0:431531'");
PCollection<KV<Long, String>> cassandraData =
pipeline.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedNumRows);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例10: testHIFReadForCassandra
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* Test to read data from embedded Cassandra instance and verify whether data is read
* successfully.
* @throws Exception
*/
@Test
public void testHIFReadForCassandra() throws Exception {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1b9780833cce000138b9afa25ba63486";
Configuration conf = getConfiguration();
PCollection<KV<Long, String>> cassandraData =
p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(TEST_DATA_ROW_COUNT);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
p.run().waitUntilFinish();
}
示例11: testHIFReadForCassandraQuery
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* Test to read data from embedded Cassandra instance based on query and verify whether data is
* read successfully.
*/
@Test
public void testHIFReadForCassandraQuery() throws Exception {
Long expectedCount = 1L;
String expectedChecksum = "f11caabc7a9fc170e22b41218749166c";
Configuration conf = getConfiguration();
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE
+ " where token(id) > ? and token(id) <= ? and scientist='Faraday1' allow filtering");
PCollection<KV<Long, String>> cassandraData =
p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedChecksum);
p.run().waitUntilFinish();
}
示例12: testE2EBigtableRead
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Test
public void testE2EBigtableRead() throws Exception {
PipelineOptionsFactory.register(BigtableTestOptions.class);
BigtableTestOptions options = TestPipeline.testingPipelineOptions()
.as(BigtableTestOptions.class);
String project = options.as(GcpOptions.class).getProject();
BigtableOptions.Builder bigtableOptionsBuilder = new BigtableOptions.Builder()
.setProjectId(project)
.setInstanceId(options.getInstanceId());
final String tableId = "BigtableReadTest";
final long numRows = 1000L;
Pipeline p = Pipeline.create(options);
PCollection<Long> count = p
.apply(BigtableIO.read().withBigtableOptions(bigtableOptionsBuilder).withTableId(tableId))
.apply(Count.<Row>globally());
PAssert.thatSingleton(count).isEqualTo(numRows);
p.run();
}
示例13: testE2EV1Read
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* An end-to-end test for {@link DatastoreV1.Read#withQuery(Query)}
*
* <p>Write some test entities to datastore and then run a pipeline that
* reads and counts the total number of entities. Verify that the count matches the
* number of entities written.
*/
@Test
public void testE2EV1Read() throws Exception {
// Read from datastore
Query query = V1TestUtil.makeAncestorKindQuery(
options.getKind(), options.getNamespace(), ancestor);
DatastoreV1.Read read = DatastoreIO.v1().read()
.withProjectId(project)
.withQuery(query)
.withNamespace(options.getNamespace());
// Count the total number of entities
Pipeline p = Pipeline.create(options);
PCollection<Long> count = p
.apply(read)
.apply(Count.<Entity>globally());
PAssert.thatSingleton(count).isEqualTo(numEntities);
p.run();
}
示例14: testE2EV1ReadWithGQLQuery
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
/**
* An end-to-end test for {@link DatastoreV1.Read#withLiteralGqlQuery(String)}.
*
* <p>Write some test entities to datastore and then run a pipeline that
* reads and counts the total number of entities. Verify that the count matches
* the number of entities written.
*/
private void testE2EV1ReadWithGQLQuery(long limit) throws Exception {
String gqlQuery = String.format(
"SELECT * from %s WHERE __key__ HAS ANCESTOR KEY(%s, '%s')",
options.getKind(), options.getKind(), ancestor);
long expectedNumEntities = numEntities;
if (limit > 0) {
gqlQuery = String.format("%s LIMIT %d", gqlQuery, limit);
expectedNumEntities = limit;
}
DatastoreV1.Read read = DatastoreIO.v1().read()
.withProjectId(project)
.withLiteralGqlQuery(gqlQuery)
.withNamespace(options.getNamespace());
// Count the total number of entities
Pipeline p = Pipeline.create(options);
PCollection<Long> count = p
.apply(read)
.apply(Count.<Entity>globally());
PAssert.thatSingleton(count).isEqualTo(expectedNumEntities);
p.run();
}
示例15: testRead
import org.apache.beam.sdk.transforms.Count; //导入依赖的package包/类
@Test
public void testRead() throws Exception {
PCollection<TestRow> rows = pipeline.apply(
JdbcIO.<TestRow>read()
.withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource))
.withQuery("select name,id from " + readTableName)
.withRowMapper(new JdbcTestHelper.CreateTestRowOfNameAndId())
.withCoder(SerializableCoder.of(TestRow.class)));
PAssert.thatSingleton(
rows.apply("Count All", Count.<TestRow>globally()))
.isEqualTo((long) EXPECTED_ROW_COUNT);
Iterable<TestRow> expectedValues = TestRow.getExpectedValues(0, EXPECTED_ROW_COUNT);
PAssert.that(rows).containsInAnyOrder(expectedValues);
pipeline.run();
}