本文整理汇总了Java中com.google.cloud.dataflow.sdk.values.KV类的典型用法代码示例。如果您正苦于以下问题:Java KV类的具体用法?Java KV怎么用?Java KV使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
KV类属于com.google.cloud.dataflow.sdk.values包,在下文中一共展示了KV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
public static void main(String[] args) {
BigQueryToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
.as(BigQueryToDatastoreOptions.class);
String inputTable = options.getInputTable().get();
String projectID = options.getOutputProjectID().get();
String kind = options.getOutputKind().get();
LOG.info("Input_Table : " + inputTable);
LOG.info("ProjectID : " + projectID);
LOG.info("Kind : " + kind);
Pipeline p = Pipeline.create(options);
PCollection<KV<Integer, Iterable<TableRow>>> keywordGroups = p
.apply(BigQueryIO.Read.named("ReadUtterance").from(inputTable)).apply(new GroupKeywords());
CreateEntities createEntities = new CreateEntities();
createEntities.setKind(kind);
PCollection<Entity> entities = keywordGroups.apply(createEntities);
entities.apply(DatastoreIO.v1().write().withProjectId(projectID));
p.run();
}
示例2: apply
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Override
public PCollection<KV<String, Integer>> apply(PCollection<GameEvent> gameEvents) {
// [START EXERCISE 1]:
// JavaDoc: https://cloud.google.com/dataflow/java-sdk/JavaDoc
// Developer Docs: https://cloud.google.com/dataflow/model/par-do
//
// Fill in the code to:
// 1. Extract a KV<String, Integer> from each GameEvent corresponding to the given
// field and the score.
// 2. Compute the sum of the scores for each key.
// 3. Run your pipeline on the Dataflow service.
return gameEvents
// MapElements is a PTransform for mapping a function over the elements of a PCollection.
// MapElements.via() takes a lambda expression defining the function to apply.
// Write the expression that creates key-value pairs, using the KeyField as the key and
// the score as the value. KV.of(key, value) creates a key-value pair. Java erasure means
// we can't determine the output type of our MapElements. We declare the output type
// explicitly using withOutputType. Use the following code to add the output type:
// .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}))
.apply(new ChangeMe<>() /* TODO: YOUR CODE GOES HERE */)
// Sum is a family of PTransforms for computing the sum of elements in a PCollection.
// Select the appropriate method to compute the sum over each key.
.apply(new ChangeMe<>() /* TODO: YOUR CODE GOES HERE */);
// [END EXERCISE 1]:
}
示例3: apply
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Override
public PCollection<KV<String, Integer>> apply(PCollection<GameEvent> infos) {
// [START EXERCISE 4 PART 2]:
// JavaDoc: https://cloud.google.com/dataflow/java-sdk/JavaDoc
// Developer Docs: https://cloud.google.com/dataflow/model/par-do
//
// Fill in the code to:
// 1. Window the incoming input into fixed windows of team window duration
// 2. trigger on time results at the watermark
// 3. trigger speculative results every ten seconds
// 4. trigger late data results with a delay of thirty seconds
return infos
/* TODO: SOLUTION CODE HERE */
// Extract and sum teamname/score pairs from the event data.
.apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"));
// [END EXERCISE 4 PART 2]:
}
示例4: apply
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Override
public PCollection<KV<String, Integer>> apply(PCollection<GameEvent> infos) {
return infos
.apply(
"LeaderboardTeamFixedWindows",
Window.<GameEvent>into(FixedWindows.of(teamWindowDuration))
// We will get early (speculative) results as well as cumulative
// processing of late data.
.triggering(
AfterWatermark.pastEndOfWindow()
.withEarlyFirings(
AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_SECONDS))
.withLateFirings(
AfterProcessingTime.pastFirstElementInPane()
.plusDelayOf(THIRTY_SECONDS)))
.withAllowedLateness(allowedLateness)
.accumulatingFiredPanes())
// Extract and sum teamname/score pairs from the event data.
.apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"));
}
示例5: apply
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Override
public PCollection<KV<String, Integer>> apply(PCollection<KV<String, Integer>> userScores) {
// [START EXERCISE 5 PART a]:
// Get the sum of scores for each user.
PCollection<KV<String, Integer>> sumScores =
userScores.apply("UserSum", Sum.<String>integersPerKey());
// Extract the score from each element, and use it to find the global mean.
// Use built-in transforms Values and Mean.
final PCollectionView<Double> globalMeanScore = null; /* TODO: YOUR CODE GOES HERE */
// Filter the user sums using the global mean.
// Developer Docs: https://cloud.google.com/dataflow/model/par-do#side-inputs
//
// Use ParDo with globalMeanScore as a side input and a custom DoFn to keep only users
// with scores that are > (mean * SCORE_WEIGHT)
PCollection<KV<String, Integer>> filtered =
sumScores.apply(new ChangeMe<>() /* TODO: YOUR CODE GOES HERE */);
// [END EXERCISE 5 PART a]:
return filtered;
}
示例6: apply
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Override
public PCollection<KV<String, Integer>> apply(PCollection<GameEvent> input) {
// [START EXERCISE 2]:
// JavaDoc: https://cloud.google.com/dataflow/java-sdk/JavaDoc
// Developer Docs: https://cloud.google.com/dataflow/model/windowing
//
return input
// Window.into() takes a WindowFn and returns a PTransform that
// applies windowing to the PCollection. FixedWindows.of() returns a
// WindowFn that assigns elements to windows of a fixed size. Use
// these methods to apply fixed windows of size
// this.duration to the PCollection.
.apply(new ChangeMe<>() /* TODO: YOUR CODE GOES HERE */)
// Remember the ExtractAndSumScore PTransform from Exercise 1? We
// parameterized it over the key field. Use it here to compute the "team"
// scores.
.apply(new ChangeMe<>() /* TODO: YOUR CODE GOES HERE */);
// [END EXERCISE 2]
}
示例7: processElement
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
/**
* Convert the file path into the GCP resource object that it corresponds to.
* @param processContext The ProcessContext object that contains context-specific
* methods and objects.
* @throws IOException Thrown when there's an error reading from the API.
* @throws GeneralSecurityException Thrown when there's an error reading from the API.
*/
@Override
public void processElement(ProcessContext processContext)
throws IOException, GeneralSecurityException {
List<String> filePath = processContext.element();
if (filePath.size() == 3 && filePath.get(2).equals(GCPResourcePolicy.getPolicyFile())) {
// only project policies are supported for now.
// filePath.size() must be 3 and of the form org_id/project_id/POLICY_FILE.
GCPProject project = new GCPProject(filePath.get(1), filePath.get(0));
GCPResourceState policy = null;
try {
policy = project.getPolicy();
} catch (Exception e) {
logger.log(Level.WARNING, "Error getting policy", e);
}
if (policy != null) {
processContext.output(KV.of((GCPResource) project, policy));
}
}
else {
throw new IllegalArgumentException("Malformed input to FilePathToLiveState.");
}
}
示例8: processElement
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
/**
* Convert a GCPProject to a Key-Value pair of the project and its policy.
* @param processContext The ProcessContext object that contains processContext-specific
* methods and objects.
*/
@Override
public void processElement(ProcessContext processContext) {
GCPProject input = processContext.element();
if (input.getId() == null) {
this.addToSideOutput(processContext, input, "Null project id");
return;
}
GCPResourceState policy = null;
String errorMsg = null;
try {
policy = input.getPolicy();
} catch (Exception e) {
errorMsg = e.getMessage();
logger.log(Level.FINE, "Error getting policy", e);
}
if (policy == null) {
this.addToSideOutput(processContext, input, String.format("Policy error %s", errorMsg));
} else {
processContext.output(KV.of((GCPResource) input, policy));
}
}
示例9: processElement
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
/**
* Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>>.
* The GCPResource is the resource that is being described by the GCPResourceState. In
* this case, it's the GCP project.
* The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies.
* StateSource represents the source of the GCPResourceState:
* - it was either checked in as a known-good, or
* - it is the live state of the resource
* GCPResourceStates tagged with one StateSource (say, DESIRED) will be input through
* a side input, and those tagged with the other will be input through the main input.
* @param context The ProcessContext object that contains context-specific methods and objects.
*/
@Override
public void processElement(ProcessContext context) {
// the project
GCPResource resource = context.element().getKey();
// the project's policies
KV<StateSource, GCPResourceState> mainValue = context.element().getValue();
// if the known-good policies' projects contain this project...
if (context.sideInput(this.view).containsKey(resource)) {
// make sure there's an element in the side input with the same GCPResource.
KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource);
// the HashMap will contain two entries, one for
// the DESIRED state and one for the LIVE state.
Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2);
mismatchedStates.put(mainValue.getKey(), mainValue.getValue());
mismatchedStates.put(sideValue.getKey(), sideValue.getValue());
context.output(KV.of(resource, mismatchedStates));
}
}
示例10: processElement
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
/**
* Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>>
* and output only those states that do not match.
* The GCPResource is the resource that is being described by the GCPResourceState. In
* this case, it's the GCP project.
* The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies.
* StateSource represents the source of the GCPResourceState:
* - it was either checked in as a known-good, or
* - it is the live state of the resource
* GCPResourceStates tagged with one StateSource (say, DESIRED) will be inputted through
* a side input, and those tagged with the other will be inputted through the main input.
* @param context The ProcessContext object that contains context-specific methods and objects.
*/
@Override
public void processElement(ProcessContext context) {
// the project
GCPResource resource = context.element().getKey();
// the project's policies
KV<StateSource, GCPResourceState> mainValue = context.element().getValue();
// if the known-good policies' projects contain this project...
if (context.sideInput(this.view).containsKey(resource)) {
// make sure there's an element in the side input with the same GCPResource.
KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource);
if (!mainValue.getValue().equals(sideValue.getValue())) {
// make sure the GCPResourceStates are different.
// the HashMap will contain two entries, one for
// the DESIRED state and one for the LIVE state.
Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2);
mismatchedStates.put(mainValue.getKey(), mainValue.getValue());
mismatchedStates.put(sideValue.getKey(), sideValue.getValue());
context.output(KV.of(resource, mismatchedStates));
}
}
}
示例11: getCurrent
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
/**
* Get the next file in queue.
* @return A Key-Value pair where the key is a list of strings representing the path of
* the file and the value is a string representing the content of the file.
* @throws NoSuchElementException If the file can't be read from the GCS API.
*/
@Override
public KV<List<String>, String> getCurrent() throws NoSuchElementException {
String filePath = this.currentFiles.get(0);
String fileContent = null;
try {
fileContent = this.source.getFileContent(filePath);
} catch (IOException ioe) {
throw new NoSuchElementException(
"Object " + filePath + " not found in bucket " + this.source.bucket);
} catch (GeneralSecurityException gse) {
throw new NoSuchElementException(
"Cannot access object "
+ filePath
+ " in bucket "
+ this.source.bucket
+ " due to security reasons");
}
List<String> splitPath = Arrays.asList(filePath.split(this.source.getDirDelimiter()));
return KV.of(splitPath, fileContent);
}
示例12: testMultipleElements
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Test
public void testMultipleElements() throws IOException {
int elementCount = 5;
GCPProject project = getSampleProject("");
List<GCPProject> projects = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; ++i) {
projects.add(project);
}
when(this.getIamPolicy.execute()).thenReturn(getSamplePolicy(1));
List<KV<GCPResource, GCPResourceState>> results = this.tester.processBatch(projects);
assertEquals(results.size(), elementCount);
for (int i = 0; i < elementCount; ++i) {
assertEquals(results.get(i).getKey(), getSampleProject(""));
assertEquals(results.get(i).getValue(), getSampleGCPResourcePolicy(project));
}
}
示例13: testMultipleElements
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Test
public void testMultipleElements() {
int elementCount = 5;
GCPProject project = getSampleProject();
List<String> filePath = getSampleProjectFilePath(project);
String fileContent = getSamplePolicyBindingsString(1);
GCPResourceState policy = getSampleGCPResourcePolicy(project, 1);
List<KV<List<String>, String>> inputs = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; ++i) {
inputs.add(KV.of(filePath, fileContent));
}
List<KV<GCPResource, GCPResourceState>> results = this.tester.processBatch(inputs);
assertEquals(results.size(), elementCount);
for (int i = 0; i < elementCount; ++i) {
assertEquals(results.get(i).getKey(), project);
assertEquals(results.get(i).getValue(), policy);
}
}
示例14: testInvalidFilePathCreatesSideOutput
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Test
public void testInvalidFilePathCreatesSideOutput() {
List<String> filePath = getSampleProjectFilePath(getSampleProject());
filePath.set(2, "POLICY.txt");
String fileContent = getSamplePolicyBindingsString(1);
GCPProject project = getSampleProject();
List<KV<List<String>, String>> inputs = Arrays.asList(KV.of(filePath, fileContent));
sideOutputTester.processBatch(inputs);
List<GCPResourceErrorInfo> sideOutputs = sideOutputTester.takeSideOutputElements(errorTag);
List<GCPResourceErrorInfo> expected = new ArrayList<>();
expected.add(new GCPResourceErrorInfo(
project,
String.format("Invalid policy filepath %s/%s/%s",
filePath.get(0), filePath.get(1), filePath.get(2))));
Assert.assertEquals(expected, sideOutputs);
}
示例15: testValidFile
import com.google.cloud.dataflow.sdk.values.KV; //导入依赖的package包/类
@Test
public void testValidFile() throws IOException {
String projectId = "sampleProject";
List<String> filePath = Arrays.asList("sampleOrg", projectId, "POLICY");
GCPProject project = new GCPProject(projectId);
GCPResourcePolicy gcpResourcePolicy = getSampleGCPResourcePolicy(project);
Policy policy = getSamplePolicy();
GetIamPolicy correctRequest = mock(GetIamPolicy.class);
GetIamPolicy wrongRequest = mock(GetIamPolicy.class);
when(projects.getIamPolicy(anyString(), any(GetIamPolicyRequest.class)))
.thenReturn(wrongRequest);
when(projects.getIamPolicy(eq(projectId), any(GetIamPolicyRequest.class)))
.thenReturn(correctRequest);
when(correctRequest.execute()).thenReturn(policy);
when(wrongRequest.execute()).thenThrow(new NoSuchElementException());
try {
assertEquals(tester.processBatch(filePath), Arrays.asList(KV.of(project, gcpResourcePolicy)));
} catch (IllegalArgumentException ignored) {
fail("Exception thrown on valid statefile name");
} catch (NoSuchElementException nse) {
fail("Tried accessing the wrong project ID");
}
}