本文整理汇总了Python中apache_beam.metrics.execution.MetricsEnvironment.set_metrics_supported方法的典型用法代码示例。如果您正苦于以下问题:Python MetricsEnvironment.set_metrics_supported方法的具体用法?Python MetricsEnvironment.set_metrics_supported怎么用?Python MetricsEnvironment.set_metrics_supported使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类apache_beam.metrics.execution.MetricsEnvironment
的用法示例。
在下文中一共展示了MetricsEnvironment.set_metrics_supported方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_pipeline
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run_pipeline(self, pipeline):
"""Execute the entire pipeline and returns an DirectPipelineResult."""
# TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
# with resolving imports when they are at top.
# pylint: disable=wrong-import-position
from apache_beam.pipeline import PipelineVisitor
from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
ConsumerTrackingPipelineVisitor
from apache_beam.runners.direct.evaluation_context import EvaluationContext
from apache_beam.runners.direct.executor import Executor
from apache_beam.runners.direct.transform_evaluator import \
TransformEvaluatorRegistry
from apache_beam.testing.test_stream import TestStream
# Performing configured PTransform overrides.
pipeline.replace_all(_get_transform_overrides(pipeline.options))
# If the TestStream I/O is used, use a mock test clock.
class _TestStreamUsageVisitor(PipelineVisitor):
"""Visitor determining whether a Pipeline uses a TestStream."""
def __init__(self):
self.uses_test_stream = False
def visit_transform(self, applied_ptransform):
if isinstance(applied_ptransform.transform, TestStream):
self.uses_test_stream = True
visitor = _TestStreamUsageVisitor()
pipeline.visit(visitor)
clock = TestClock() if visitor.uses_test_stream else RealClock()
MetricsEnvironment.set_metrics_supported(True)
logging.info('Running pipeline with DirectRunner.')
self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
pipeline.visit(self.consumer_tracking_visitor)
evaluation_context = EvaluationContext(
pipeline._options,
BundleFactory(stacked=pipeline._options.view_as(DirectOptions)
.direct_runner_use_stacked_bundle),
self.consumer_tracking_visitor.root_transforms,
self.consumer_tracking_visitor.value_to_consumers,
self.consumer_tracking_visitor.step_names,
self.consumer_tracking_visitor.views,
clock)
executor = Executor(self.consumer_tracking_visitor.value_to_consumers,
TransformEvaluatorRegistry(evaluation_context),
evaluation_context)
# DirectRunner does not support injecting
# PipelineOptions values at runtime
RuntimeValueProvider.set_runtime_options({})
# Start the executor. This is a non-blocking call, it will start the
# execution in background threads and return.
executor.start(self.consumer_tracking_visitor.root_transforms)
result = DirectPipelineResult(executor, evaluation_context)
return result
示例2: run_pipeline
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run_pipeline(self, pipeline):
"""Execute the entire pipeline and returns an DirectPipelineResult."""
# Performing configured PTransform overrides.
pipeline.replace_all(self._ptransform_overrides)
# TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
# with resolving imports when they are at top.
# pylint: disable=wrong-import-position
from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
ConsumerTrackingPipelineVisitor
from apache_beam.runners.direct.evaluation_context import EvaluationContext
from apache_beam.runners.direct.executor import Executor
from apache_beam.runners.direct.transform_evaluator import \
TransformEvaluatorRegistry
MetricsEnvironment.set_metrics_supported(True)
logging.info('Running pipeline with DirectRunner.')
self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
pipeline.visit(self.consumer_tracking_visitor)
clock = TestClock() if self._use_test_clock else RealClock()
evaluation_context = EvaluationContext(
pipeline._options,
BundleFactory(stacked=pipeline._options.view_as(DirectOptions)
.direct_runner_use_stacked_bundle),
self.consumer_tracking_visitor.root_transforms,
self.consumer_tracking_visitor.value_to_consumers,
self.consumer_tracking_visitor.step_names,
self.consumer_tracking_visitor.views,
clock)
evaluation_context.use_pvalue_cache(self._cache)
executor = Executor(self.consumer_tracking_visitor.value_to_consumers,
TransformEvaluatorRegistry(evaluation_context),
evaluation_context)
# DirectRunner does not support injecting
# PipelineOptions values at runtime
RuntimeValueProvider.set_runtime_options({})
# Start the executor. This is a non-blocking call, it will start the
# execution in background threads and return.
executor.start(self.consumer_tracking_visitor.root_transforms)
result = DirectPipelineResult(executor, evaluation_context)
if self._cache:
# We are running in eager mode, block until the pipeline execution
# completes in order to have full results in the cache.
result.wait_until_finish()
self._cache.finalize()
return result
示例3: run
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run(self, pipeline):
"""Execute the entire pipeline and returns an DirectPipelineResult."""
# TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
# with resolving imports when they are at top.
# pylint: disable=wrong-import-position
from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
ConsumerTrackingPipelineVisitor
from apache_beam.runners.direct.evaluation_context import EvaluationContext
from apache_beam.runners.direct.executor import Executor
from apache_beam.runners.direct.transform_evaluator import \
TransformEvaluatorRegistry
MetricsEnvironment.set_metrics_supported(True)
logging.info('Running pipeline with DirectRunner.')
self.visitor = ConsumerTrackingPipelineVisitor()
pipeline.visit(self.visitor)
evaluation_context = EvaluationContext(
pipeline.options,
BundleFactory(stacked=pipeline.options.view_as(DirectOptions)
.direct_runner_use_stacked_bundle),
self.visitor.root_transforms,
self.visitor.value_to_consumers,
self.visitor.step_names,
self.visitor.views)
evaluation_context.use_pvalue_cache(self._cache)
executor = Executor(self.visitor.value_to_consumers,
TransformEvaluatorRegistry(evaluation_context),
evaluation_context)
# Start the executor. This is a non-blocking call, it will start the
# execution in background threads and return.
if pipeline.options:
RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, {})
executor.start(self.visitor.root_transforms)
result = DirectPipelineResult(executor, evaluation_context)
if self._cache:
# We are running in eager mode, block until the pipeline execution
# completes in order to have full results in the cache.
result.wait_until_finish()
self._cache.finalize()
# Unset runtime options after the pipeline finishes.
# TODO: Move this to a post finish hook and clean for all cases.
if pipeline.options:
RuntimeValueProvider.unset_runtime_options(pipeline.options._options_id)
return result
示例4: run_pipeline
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run_pipeline(self, pipeline, options):
MetricsEnvironment.set_metrics_supported(False)
RuntimeValueProvider.set_runtime_options({})
# This is sometimes needed if type checking is disabled
# to enforce that the inputs (and outputs) of GroupByKey operations
# are known to be KVs.
from apache_beam.runners.dataflow.dataflow_runner import DataflowRunner
pipeline.visit(DataflowRunner.group_by_key_input_visitor())
self._bundle_repeat = self._bundle_repeat or options.view_as(
pipeline_options.DirectOptions).direct_runner_bundle_repeat
self._profiler_factory = profiler.Profile.factory_from_options(
options.view_as(pipeline_options.ProfilingOptions))
return self.run_via_runner_api(pipeline.to_runner_api(
default_environment=self._default_environment))
示例5: run_pipeline
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run_pipeline(self, pipeline):
MetricsEnvironment.set_metrics_supported(self.has_metrics_support())
# List of map tasks Each map task is a list of
# (stage_name, operation_specs.WorkerOperation) instructions.
self.map_tasks = []
# Map of pvalues to
# (map_task_index, producer_operation_index, producer_output_index)
self.outputs = {}
# Unique mappings of PCollections to strings.
self.side_input_labels = collections.defaultdict(
lambda: str(len(self.side_input_labels)))
# Mapping of map task indices to all map tasks that must preceed them.
self.dependencies = collections.defaultdict(set)
# Visit the graph, building up the map_tasks and their metadata.
super(MapTaskExecutorRunner, self).run_pipeline(pipeline)
# Now run the tasks in topological order.
def compute_depth_map(deps):
memoized = {}
def compute_depth(x):
if x not in memoized:
memoized[x] = 1 + max([-1] + [compute_depth(y) for y in deps[x]])
return memoized[x]
return {x: compute_depth(x) for x in deps.keys()}
map_task_depths = compute_depth_map(self.dependencies)
ordered_map_tasks = sorted((map_task_depths.get(ix, -1), map_task)
for ix, map_task in enumerate(self.map_tasks))
profile_options = pipeline.options.view_as(
pipeline_options.ProfilingOptions)
if profile_options.profile_cpu:
with profiler.Profile(
profile_id='worker-runner',
profile_location=profile_options.profile_location,
log_results=True, file_copy_fn=_dependency_file_copy):
self.execute_map_tasks(ordered_map_tasks)
else:
self.execute_map_tasks(ordered_map_tasks)
return WorkerRunnerResult(PipelineState.UNKNOWN)
示例6: run
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run(self, pipeline):
MetricsEnvironment.set_metrics_supported(self.has_metrics_support())
if pipeline._verify_runner_api_compatible():
return self.run_via_runner_api(pipeline.to_runner_api())
else:
return super(FnApiRunner, self).run(pipeline)
示例7: run_pipeline
# 需要导入模块: from apache_beam.metrics.execution import MetricsEnvironment [as 别名]
# 或者: from apache_beam.metrics.execution.MetricsEnvironment import set_metrics_supported [as 别名]
def run_pipeline(self, pipeline):
MetricsEnvironment.set_metrics_supported(False)
return self.run_via_runner_api(pipeline.to_runner_api())