本文整理汇总了Java中com.google.cloud.dataflow.sdk.PipelineResult类的典型用法代码示例。如果您正苦于以下问题:Java PipelineResult类的具体用法?Java PipelineResult怎么用?Java PipelineResult使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PipelineResult类属于com.google.cloud.dataflow.sdk包,在下文中一共展示了PipelineResult类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: waitToFinish
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
/**
* If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
* waits for the pipeline to finish and cancels it (and the injector) before the program exists.
*/
public void waitToFinish(PipelineResult result) {
if (result instanceof DataflowPipelineJob) {
final DataflowPipelineJob job = (DataflowPipelineJob) result;
jobsToCancel.add(job);
if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
addShutdownHook(jobsToCancel);
}
try {
job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
} catch (Exception e) {
throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
}
} else {
// Do nothing if the given PipelineResult doesn't support waitToFinish(),
// such as EvaluationResults returned by DirectPipelineRunner.
}
}
示例2: run
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
/** Run a Docker workflow on Dataflow. */
public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o)
throws IOException {
LOG.info("Running workflow graph");
if (w.getArgs().getProjectId() == null) {
throw new IllegalArgumentException("Project id is required");
}
Pipeline p = DataflowFactory.dataflow(w, a, o);
LOG.info("Created Dataflow pipeline");
LOG.debug(w.toString());
PipelineResult r = p.run();
LOG.info("Dataflow pipeline completed");
LOG.info("Result state: " + r.getState());
}
示例3: run
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
/**
* Run the pipeline.
* @throws AggregatorRetrievalException
*/
public DesiredStateEnforcer run() throws AggregatorRetrievalException {
PipelineResult result = this.pipeline.run();
AggregatorValues<Long> aggregatorValues = result.getAggregatorValues(
discrepancyAutoFixMessenger.getTotalEnforcedStatesAggregator());
this.enforcedStates =
aggregatorValues.getTotalValue(
discrepancyAutoFixMessenger.getTotalEnforcedStatesAggregator().getCombineFn());
return this;
}
示例4: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise6Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise6Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// Allow the pipeline to be cancelled automatically.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference sessionsTable = new TableReference();
sessionsTable.setDatasetId(options.getOutputDataset());
sessionsTable.setProjectId(options.getProject());
sessionsTable.setTableId(options.getOutputTableName());
PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));
// Extract username/score pairs from the event stream
PCollection<KV<String, Integer>> userEvents =
rawEvents.apply(
"ExtractUserScore",
MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
.withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
// [START EXERCISE 6]:
// Detect user sessions-- that is, a burst of activity separated by a gap from further
// activity. Find and record the mean session lengths.
// This information could help the game designers track the changing user engagement
// as their set of games changes.
userEvents
// Window the user events into sessions with gap options.getSessionGap() minutes. Make sure
// to use an outputTimeFn that sets the output timestamp to the end of the window. This will
// allow us to compute means on sessions based on their end times, rather than their start
// times.
.apply(
/* TODO: YOUR CODE GOES HERE */
new ChangeMe<PCollection<KV<String, Integer>>, KV<String, Integer>>())
// For this use, we care only about the existence of the session, not any particular
// information aggregated over it, so the following is an efficient way to do that.
.apply(Combine.perKey(x -> 0))
// Get the duration per session.
.apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
// Re-window to process groups of session sums according to when the sessions complete.
// In streaming we don't just ask "what is the mean value" we must ask "what is the mean
// value for some window of time". To compute periodic means of session durations, we
// re-window the session durations.
.apply(
/* TODO: YOUR CODE GOES HERE */
new ChangeMe<PCollection<Integer>, Integer>())
// Find the mean session duration in each window.
.apply(Mean.<Integer>globally().withoutDefaults())
// Write this info to a BigQuery table.
.apply(ParDo.named("FormatSessions").of(new FormatSessionWindowFn()))
.apply(
BigQueryIO.Write.to(sessionsTable)
.withSchema(FormatSessionWindowFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// [END EXERCISE 6]:
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例5: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise4Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise4Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// For example purposes, allow the pipeline to be easily cancelled instead of running
// continuously.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference teamTable = new TableReference();
teamTable.setDatasetId(options.getOutputDataset());
teamTable.setProjectId(options.getProject());
teamTable.setTableId(options.getOutputTableName() + "_team");
TableReference userTable = new TableReference();
userTable.setDatasetId(options.getOutputDataset());
userTable.setProjectId(options.getProject());
userTable.setTableId(options.getOutputTableName() + "_user");
PCollection<GameEvent> gameEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));
gameEvents
.apply(
"CalculateTeamScores",
new CalculateTeamScores(
Duration.standardMinutes(options.getTeamWindowDuration()),
Duration.standardMinutes(options.getAllowedLateness())))
// Write the results to BigQuery.
.apply(ParDo.named("FormatTeamScores").of(new FormatTeamScoreFn()))
.apply(
BigQueryIO.Write.to(teamTable)
.withSchema(FormatTeamScoreFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
gameEvents
.apply(
"CalculateUserScores",
new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness())))
// Write the results to BigQuery.
.apply(ParDo.named("FormatUserScores").of(new FormatUserScoreFn()))
.apply(
BigQueryIO.Write.to(userTable)
.withSchema(FormatUserScoreFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例6: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise7Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise7Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// Allow the pipeline to be cancelled automatically.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference badUserTable = new TableReference();
badUserTable.setDatasetId(options.getOutputDataset());
badUserTable.setProjectId(options.getProject());
badUserTable.setTableId(options.getOutputTableName() + "_bad_users");
// 1. Read game events with message id and timestamp
// 2. Parse events
// 3. Key by event id
// 4. Sessionize.
PCollection<KV<String, GameEvent>> sessionedEvents = null; /* TODO: YOUR CODE GOES HERE */
// 1. Read play events with message id and timestamp
// 2. Parse events
// 3. Key by event id
// 4. Sessionize.
PCollection<KV<String, PlayEvent>> sessionedPlayEvents = null; /* TODO: YOUR CODE GOES HERE */
// 1. Join events
// 2. Compute latency using ComputeLatencyFn
PCollection<KV<String, Long>> userLatency = null; /* TODO: YOUR CODE GOES HERE */
// 1. Get the values of userLatencies
// 2. Re-window into GlobalWindows with periodic repeated triggers
// 3. Compute global approximate quantiles with fanout
PCollectionView<List<Long>> globalQuantiles = null; /* TODO: YOUR CODE GOES HERE */
userLatency
// Use the computed latency distribution as a side-input to filter out likely bad users.
.apply(
"DetectBadUsers",
ParDo.withSideInputs(globalQuantiles)
.of(
new DoFn<KV<String, Long>, String>() {
public void processElement(ProcessContext c) {
/* TODO: YOUR CODE GOES HERE */
throw new RuntimeException("Not implemented");
}
}))
// We want to only emilt a single BigQuery row for every bad user. To do this, we
// re-key by user, then window globally and trigger on the first element for each key.
.apply(
"KeyByUser",
WithKeys.of((String user) -> user).withKeyType(TypeDescriptor.of(String.class)))
.apply(
"GlobalWindowsTriggerOnFirst",
Window.<KV<String, String>>into(new GlobalWindows())
.triggering(
AfterProcessingTime.pastFirstElementInPane()
.plusDelayOf(Duration.standardSeconds(10)))
.accumulatingFiredPanes())
.apply("GroupByUser", GroupByKey.<String, String>create())
.apply("FormatBadUsers", ParDo.of(new FormatBadUserFn()))
.apply(
"WriteBadUsers",
BigQueryIO.Write.to(badUserTable)
.withSchema(FormatBadUserFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例7: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise6Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise6Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// Allow the pipeline to be cancelled automatically.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference sessionsTable = new TableReference();
sessionsTable.setDatasetId(options.getOutputDataset());
sessionsTable.setProjectId(options.getProject());
sessionsTable.setTableId(options.getOutputTableName());
PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));
// Extract username/score pairs from the event stream
PCollection<KV<String, Integer>> userEvents =
rawEvents.apply(
"ExtractUserScore",
MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
.withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
// Detect user sessions-- that is, a burst of activity separated by a gap from further
// activity. Find and record the mean session lengths.
// This information could help the game designers track the changing user engagement
// as their set of games changes.
userEvents
.apply(
Window.named("WindowIntoSessions")
.<KV<String, Integer>>into(
Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
.withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
// For this use, we care only about the existence of the session, not any particular
// information aggregated over it, so the following is an efficient way to do that.
.apply(Combine.perKey(x -> 0))
// Get the duration per session.
.apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
// Re-window to process groups of session sums according to when the sessions complete.
.apply(
Window.named("WindowToExtractSessionMean")
.<Integer>into(
FixedWindows.of(
Duration.standardMinutes(options.getUserActivityWindowDuration()))))
// Find the mean session duration in each window.
.apply(Mean.<Integer>globally().withoutDefaults())
// Write this info to a BigQuery table.
.apply(ParDo.named("FormatSessions").of(new FormatSessionWindowFn()))
.apply(
BigQueryIO.Write.to(sessionsTable)
.withSchema(FormatSessionWindowFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例8: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise5Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise5Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// Allow the pipeline to be cancelled automatically.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference teamTable = new TableReference();
teamTable.setDatasetId(options.getOutputDataset());
teamTable.setProjectId(options.getProject());
teamTable.setTableId(options.getOutputTableName());
PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));
// Extract username/score pairs from the event stream
PCollection<KV<String, Integer>> userEvents =
rawEvents.apply(
"ExtractUserScore",
MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
.withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
// Calculate the total score per user over fixed windows, and
// cumulative updates for late data.
final PCollectionView<Map<String, Integer>> spammersView =
userEvents
.apply(
Window.named("FixedWindowsUser")
.<KV<String, Integer>>into(
FixedWindows.of(
Duration.standardMinutes(options.getFixedWindowDuration()))))
// Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
// These might be robots/spammers.
.apply("CalculateSpammyUsers", new CalculateSpammyUsers())
// Derive a view from the collection of spammer users. It will be used as a side input
// in calculating the team score sums, below.
.apply("CreateSpammersView", View.<String, Integer>asMap());
// Calculate the total score per team over fixed windows,
// and emit cumulative updates for late data. Uses the side input derived above-- the set of
// suspected robots-- to filter out scores from those users from the sum.
// Write the results to BigQuery.
rawEvents
.apply(
Window.named("WindowIntoFixedWindows")
.<GameEvent>into(
FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
// Filter out the detected spammer users, using the side input derived above.
.apply(
ParDo.named("FilterOutSpammers")
.withSideInputs(spammersView)
.of(
new DoFn<GameEvent, GameEvent>() {
@Override
public void processElement(ProcessContext c) {
// If the user is not in the spammers Map, output the data element.
if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
c.output(c.element());
}
}
}))
// Extract and sum teamname/score pairs from the event data.
.apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
// Write the result to BigQuery
.apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
.apply(
BigQueryIO.Write.to(teamTable)
.withSchema(FormatTeamWindowFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例9: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Exercise5Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Exercise5Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
// Allow the pipeline to be cancelled automatically.
options.setRunner(DataflowPipelineRunner.class);
Pipeline pipeline = Pipeline.create(options);
TableReference teamTable = new TableReference();
teamTable.setDatasetId(options.getOutputDataset());
teamTable.setProjectId(options.getProject());
teamTable.setTableId(options.getOutputTableName());
PCollection<GameEvent> rawEvents = pipeline.apply(new Exercise3.ReadGameEvents(options));
// Extract username/score pairs from the event stream
PCollection<KV<String, Integer>> userEvents =
rawEvents.apply(
"ExtractUserScore",
MapElements.via((GameEvent gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
.withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
// Calculate the total score per user over fixed windows, and
// cumulative updates for late data.
final PCollectionView<Map<String, Integer>> spammersView =
userEvents
.apply(
Window.named("FixedWindowsUser")
.<KV<String, Integer>>into(
FixedWindows.of(
Duration.standardMinutes(options.getFixedWindowDuration()))))
// Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
// These might be robots/spammers.
.apply("CalculateSpammyUsers", new CalculateSpammyUsers())
// Derive a view from the collection of spammer users. It will be used as a side input
// in calculating the team score sums, below.
.apply("CreateSpammersView", View.<String, Integer>asMap());
// [START EXERCISE 5 PART b]:
// Calculate the total score per team over fixed windows,
// and emit cumulative updates for late data. Uses the side input derived above-- the set of
// suspected robots-- to filter out scores from those users from the sum.
// Write the results to BigQuery.
rawEvents
.apply(
Window.named("WindowIntoFixedWindows")
.<GameEvent>into(
FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration()))))
// Filter out the detected spammer users, using the side input derived above.
// Use ParDo with spammersView side input to filter out spammers.
.apply(/* TODO: YOUR CODE GOES HERE */ new ChangeMe<PCollection<GameEvent>, GameEvent>())
// Extract and sum teamname/score pairs from the event data.
.apply("ExtractTeamScore", new Exercise1.ExtractAndSumScore("team"))
// Write the result to BigQuery
.apply(ParDo.named("FormatTeamWindows").of(new FormatTeamWindowFn()))
.apply(
BigQueryIO.Write.to(teamTable)
.withSchema(FormatTeamWindowFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
// [START EXERCISE 5 PART b]:
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
}
示例10: main
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
options.setBigQuerySchema(getSchema());
// DataflowExampleUtils creates the necessary input sources to simplify execution of this
// Pipeline.
DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
options.isUnbounded());
Pipeline pipeline = Pipeline.create(options);
/**
* Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
* unbounded input source.
*/
PCollection<String> input;
if (options.isUnbounded()) {
LOG.info("Reading from PubSub.");
/**
* Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
* specified as an argument. The data elements' timestamps will come from the pubsub
* injection.
*/
input = pipeline
.apply(PubsubIO.Read.topic(options.getPubsubTopic()));
} else {
/** Else, this is a bounded pipeline. Read from the GCS file. */
input = pipeline
.apply(TextIO.Read.from(options.getInputFile()))
// Concept #2: Add an element timestamp, using an artificial time just to show windowing.
// See AddTimestampFn for more detail on this.
.apply(ParDo.of(new AddTimestampFn()));
}
/**
* Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
* minute (you can change this with a command-line option). See the documentation for more
* information on how fixed windows work, and for information on the other types of windowing
* available (e.g., sliding windows).
*/
PCollection<String> windowedWords = input
.apply(Window.<String>into(
FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
/**
* Concept #5: Re-use our existing CountWords transform that does not have knowledge of
* windows over a PCollection containing windowed values.
*/
PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
/**
* Concept #6: Format the results for a BigQuery table, then write to BigQuery.
* The BigQuery output source supports both bounded and unbounded data.
*/
wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
.apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
PipelineResult result = pipeline.run();
/**
* To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
* runs for a limited time, and publishes to the input PubSub topic.
*
* With an unbounded input source, you will need to explicitly shut down this pipeline when you
* are done with it, so that you do not continue to be charged for the instances. You can do
* this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
* pipelines. The PubSub topic will also be deleted at this time.
*/
exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
}
示例11: mockUnboundedSource
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
/**
* Start the auxiliary injector pipeline, then wait for this pipeline to finish.
*/
public void mockUnboundedSource(String inputFile, PipelineResult result) {
startInjectorIfNeeded(inputFile);
waitToFinish(result);
}
示例12: PipelineRunnerType
import com.google.cloud.dataflow.sdk.PipelineResult; //导入依赖的package包/类
PipelineRunnerType(Class<? extends PipelineRunner<? extends PipelineResult>> runner, String doc){
this.runner = runner;
this.doc = doc;
}