本文整理汇总了Java中org.apache.beam.sdk.values.KV类的典型用法代码示例。如果您正苦于以下问题:Java KV类的具体用法?Java KV怎么用?Java KV使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
KV类属于org.apache.beam.sdk.values包,在下文中一共展示了KV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: filterAlreadyProcessedUrls
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* @param options
* @param pipeline
* @param readContent
* @return
*/
private static PCollection<InputContent> filterAlreadyProcessedUrls(
PCollection<InputContent> readContent, Pipeline pipeline,
IndexerPipelineOptions options) {
PCollection<InputContent> contentToProcess;
String query = IndexerPipelineUtils.buildBigQueryProcessedUrlsQuery(options);
PCollection<KV<String,Long>> alreadyProcessedUrls = pipeline
.apply("Get processed URLs",BigQueryIO.read().fromQuery(query))
.apply(ParDo.of(new GetUrlFn()));
final PCollectionView<Map<String,Long>> alreadyProcessedUrlsSideInput =
alreadyProcessedUrls.apply(View.<String,Long>asMap());
contentToProcess = readContent
.apply(ParDo.of(new FilterProcessedUrls(alreadyProcessedUrlsSideInput))
.withSideInputs(alreadyProcessedUrlsSideInput));
return contentToProcess;
}
示例2: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@ProcessElement
public void processElement(ProcessContext c) {
InputContent i = c.element();
String jobName = c.getPipelineOptions().getJobName();
ByteString rowkey = ByteString.copyFromUtf8(jobName + "#" + i.expectedDocumentHash);
ByteString value = ByteString.copyFromUtf8(i.text);
Iterable<Mutation> mutations =
ImmutableList.of(Mutation.newBuilder()
.setSetCell(
Mutation.SetCell.newBuilder()
.setFamilyName(IndexerPipelineUtils.DEAD_LETTER_TABLE_ERR_CF)
.setColumnQualifier(ByteString.copyFromUtf8("text"))
.setValue(value)
)
.build());
c.output(KV.of(rowkey, mutations));
}
示例3: testUnwritableRemoveContainerPipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testUnwritableRemoveContainerPipeline() throws Exception {
final Map<String, String> dataConfiguration = singletonMap("repository",
getClass().getResource("/dataDirectory2").toURI().toString());
final File root = new File(getClass().getResource("/dataDirectory2").toURI());
assumeTrue(root.setReadOnly());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(CONTAINER_KV))
.apply(ParDo.of(new BeamProcessor(dataConfiguration, LDP.PreferContainment.getIRIString(), false)));
PAssert.that(pCollection).empty();
pipeline.run();
root.setWritable(true);
}
示例4: testUnwritableAddContainerPipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testUnwritableAddContainerPipeline() throws Exception {
final Map<String, String> dataConfiguration = singletonMap("repository",
getClass().getResource("/dataDirectory2").toURI().toString());
final File root = new File(getClass().getResource("/dataDirectory2").toURI());
assumeTrue(root.setReadOnly());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(CONTAINER_KV))
.apply(ParDo.of(new BeamProcessor(dataConfiguration, LDP.PreferContainment.getIRIString(), true)));
PAssert.that(pCollection).empty();
pipeline.run();
root.setWritable(true);
}
示例5: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* Process the element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final File dir = resourceDirectory(config, element.getKey());
if (!isNull(dir)) {
LOGGER.debug("Writing {} to directory: {}", graph, dir);
try (final Dataset dataset = deserialize(element.getValue())) {
if (VersionedResource.write(dir,
add ? empty() : dataset.stream(of(rdf.createIRI(graph)), null, null, null),
add ? dataset.stream(of(rdf.createIRI(graph)), null, null, null) : empty(), now())) {
c.output(c.element());
} else {
logError(add, graph, element.getKey());
}
} catch (final Exception ex) {
LOGGER.error("Error processing graph: {}", ex.getMessage());
}
} else {
LOGGER.error("Unable to write {} quads to {}", graph, element.getKey());
}
}
示例6: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* Process the element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final Dataset data = deserialize(element.getValue());
final String baseUrl = baseUrls.get(element.getKey().split(":", 2)[1].split("/")[0]);
if (nonNull(baseUrl) && nonNull(data)) {
final String identifier = toExternalTerm(element.getKey(), baseUrl);
final Notification notification = new Notification(identifier, data);
LOGGER.debug("Serializing notification for {}", element.getKey());
LOGGER.debug("Using baseURL: {} for {}", baseUrl, element.getKey());
service.serialize(notification).ifPresent(evt -> c.output(of(element.getKey(), evt)));
} else {
LOGGER.warn("Unable to serialize notification: baseUrl or data values not present for: {}",
element.getKey());
}
}
示例7: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* A method for processing each element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final String key = element.getKey();
final File dir = resourceDirectory(config, key);
if (!isNull(dir)) {
LOGGER.debug("Writing cache for: {}", key);
if (CachedResource.write(dir, key)) {
c.output(element);
} else {
LOGGER.error("Error writing cached resource for {}", key);
}
} else {
LOGGER.error("Error accessing cached resource location for {}", key);
}
}
示例8: testCachePipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testCachePipeline() throws Exception {
final KV<String, String> kv = KV.of("trellis:repository/resource", null);
final Map<String, String> dataConfiguration = singletonMap("repository",
getClass().getResource("/root").toURI().toString());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(kv))
.apply(ParDo.of(new CacheWriter(dataConfiguration)));
PAssert.that(pCollection).containsInAnyOrder(asList(kv));
pipeline.run();
}
示例9: testUnableToCachePipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testUnableToCachePipeline() throws Exception {
final KV<String, String> kv = KV.of("trellis:repository/some-other-resource", null);
final Map<String, String> dataConfiguration = singletonMap("repository",
getClass().getResource("/root").toURI().toString());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(kv))
.apply(ParDo.of(new CacheWriter(dataConfiguration)));
PAssert.that(pCollection).empty();
pipeline.run();
}
示例10: testInvalidDirectoryPipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testInvalidDirectoryPipeline() throws Exception {
final KV<String, String> kv = KV.of("trellis:repository/resource", null);
final Map<String, String> dataConfiguration = singletonMap("foo",
getClass().getResource("/root").toURI().toString());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(kv))
.apply(ParDo.of(new CacheWriter(dataConfiguration)));
PAssert.that(pCollection).empty();
pipeline.run();
}
示例11: testInvalidDataPipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testInvalidDataPipeline() throws Exception {
final String dataset = "<trellis:repository/resource> " +
"<http://purl.org/dc/terms/subject> <trellis:repository/resource/member> " +
"<http://www.w3.org/ns/ldp#PreferConta";
final KV<String, String> kv = KV.of("trellis:repository/resource", dataset);
final Map<String, String> dataConfiguration = singletonMap("repository",
getClass().getResource("/dataDirectory").toURI().toString());
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(kv))
.apply(ParDo.of(new BeamProcessor(dataConfiguration, LDP.PreferContainment.getIRIString(), false)));
PAssert.that(pCollection).empty();
pipeline.run();
}
示例12: testInvalidDataPipeline
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
@Test
@Category(NeedsRunner.class)
public void testInvalidDataPipeline() throws Exception {
final String dataset = "<trellis:repository/resource> " +
"<http://purl.org/dc/terms/subject> <trellis:repository/resource/member> " +
"<http://www.w3.org/ns/ldp#PreferConta";
final KV<String, String> kv = KV.of("trellis:repository/resource", dataset);
final Map<String, String> dataConfiguration = singletonMap("repository", "http://localhost/");
final PCollection<KV<String, String>> pCollection = pipeline
.apply("Create", Create.of(kv))
.apply(ParDo.of(new EventProcessor(dataConfiguration)));
PAssert.that(pCollection).empty();
pipeline.run();
}
示例13: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* Process the element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final File dir = resourceDirectory(dataLocation, element.getKey());
if (!isNull(dir)) {
LOGGER.debug("Writing {} to directory: {}", graph, dir);
try (final Dataset dataset = deserialize(element.getValue())) {
if (VersionedResource.write(dir,
add ? empty() : dataset.stream(of(rdf.createIRI(graph)), null, null, null),
add ? dataset.stream(of(rdf.createIRI(graph)), null, null, null) : empty(), now())) {
c.output(c.element());
} else {
logError(add, graph, element.getKey());
}
} catch (final Exception ex) {
LOGGER.error("Error processing graph: {}", ex.getMessage());
}
} else {
LOGGER.error("Unable to write {} quads to {}", graph, element.getKey());
}
}
示例14: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* Process the element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final Dataset data = deserialize(element.getValue());
if (nonNull(baseUrl) && nonNull(data)) {
final String identifier = toExternalTerm(element.getKey());
final Notification notification = new Notification(identifier, data);
LOGGER.debug("Serializing notification for {}", element.getKey());
LOGGER.debug("Using baseURL: {} for {}", baseUrl, element.getKey());
service.serialize(notification).ifPresent(evt -> c.output(of(element.getKey(), evt)));
} else {
LOGGER.warn("Unable to serialize notification: baseUrl or data values not present for: {}",
element.getKey());
}
}
示例15: processElement
import org.apache.beam.sdk.values.KV; //导入依赖的package包/类
/**
* A method for processing each element
* @param c the context
*/
@ProcessElement
public void processElement(final ProcessContext c) {
final KV<String, String> element = c.element();
final String key = element.getKey();
final File dir = resourceDirectory(dataLocation, key);
if (!isNull(dir)) {
LOGGER.debug("Writing cache for: {}", key);
if (CachedResource.write(dir, key)) {
c.output(element);
} else {
LOGGER.error("Error writing cached resource for {}", key);
}
} else {
LOGGER.error("Error accessing cached resource location for {}", key);
}
}