本文整理汇总了Python中apache_beam.transforms.window.GlobalWindows类的典型用法代码示例。如果您正苦于以下问题:Python GlobalWindows类的具体用法?Python GlobalWindows怎么用?Python GlobalWindows使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GlobalWindows类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: finish_bundle
def finish_bundle(self):
data = self._read_from_pubsub(self.source.timestamp_attribute)
if data:
output_pcollection = list(self._outputs)[0]
bundle = self._evaluation_context.create_bundle(output_pcollection)
# TODO(ccy): Respect the PubSub source's id_label field.
for timestamp, message in data:
if self.source.with_attributes:
element = message
else:
element = message.data
bundle.output(
GlobalWindows.windowed_value(element, timestamp=timestamp))
bundles = [bundle]
else:
bundles = []
if self._applied_ptransform.inputs:
input_pvalue = self._applied_ptransform.inputs[0]
else:
input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline)
unprocessed_bundle = self._evaluation_context.create_bundle(
input_pvalue)
# TODO(udim): Correct value for watermark hold.
return TransformResult(self, bundles, [unprocessed_bundle], None,
{None: Timestamp.of(time.time())})
示例2: __iter__
def __iter__(self):
output_stream = create_OutputStream()
for encoded_key, values in self._table.items():
key = self._key_coder.decode(encoded_key)
self._post_grouped_coder.get_impl().encode_to_stream(
GlobalWindows.windowed_value((key, values)), output_stream, True)
return iter([output_stream.get()])
示例3: test_update_int
def test_update_int(self):
opcounts = OperationCounters(CounterFactory(), 'some-name',
coders.PickleCoder(), 0)
self.verify_counters(opcounts, 0)
opcounts.update_from(GlobalWindows.windowed_value(1))
opcounts.update_collect()
self.verify_counters(opcounts, 1)
示例4: test_update_multiple
def test_update_multiple(self):
coder = coders.PickleCoder()
total_size = 0
opcounts = OperationCounters(CounterFactory(), 'some-name',
coder, 0)
self.verify_counters(opcounts, 0, float('nan'))
value = GlobalWindows.windowed_value('abcde')
opcounts.update_from(value)
total_size += coder.estimate_size(value)
value = GlobalWindows.windowed_value('defghij')
opcounts.update_from(value)
total_size += coder.estimate_size(value)
self.verify_counters(opcounts, 2, (float(total_size) / 2))
value = GlobalWindows.windowed_value('klmnop')
opcounts.update_from(value)
total_size += coder.estimate_size(value)
self.verify_counters(opcounts, 3, (float(total_size) / 3))
示例5: test_update_str
def test_update_str(self):
coder = coders.PickleCoder()
opcounts = OperationCounters(CounterFactory(), 'some-name',
coder, 0)
self.verify_counters(opcounts, 0, float('nan'))
value = GlobalWindows.windowed_value('abcde')
opcounts.update_from(value)
estimated_size = coder.estimate_size(value)
self.verify_counters(opcounts, 1, estimated_size)
示例6: test_update_old_object
def test_update_old_object(self):
coder = coders.PickleCoder()
opcounts = OperationCounters(CounterFactory(), 'some-name',
coder, 0)
self.verify_counters(opcounts, 0, float('nan'))
obj = OldClassThatDoesNotImplementLen()
value = GlobalWindows.windowed_value(obj)
opcounts.update_from(value)
estimated_size = coder.estimate_size(value)
self.verify_counters(opcounts, 1, estimated_size)
示例7: get_root_bundles
def get_root_bundles(self):
test_stream = self._applied_ptransform.transform
bundles = []
if len(test_stream.events) > 0:
bundle = self._evaluation_context.create_bundle(
pvalue.PBegin(self._applied_ptransform.transform.pipeline))
# Explicitly set timestamp to MIN_TIMESTAMP to ensure that we hold the
# watermark.
bundle.add(GlobalWindows.windowed_value(0, timestamp=MIN_TIMESTAMP))
bundle.commit(None)
bundles.append(bundle)
return bundles
示例8: finish_bundle
def finish_bundle(self):
unprocessed_bundles = []
hold = None
if self.current_index < len(self.test_stream.events) - 1:
unprocessed_bundle = self._evaluation_context.create_bundle(
pvalue.PBegin(self._applied_ptransform.transform.pipeline))
unprocessed_bundle.add(GlobalWindows.windowed_value(
self.current_index + 1, timestamp=self.watermark))
unprocessed_bundles.append(unprocessed_bundle)
hold = self.watermark
return TransformResult(
self._applied_ptransform, self.bundles, unprocessed_bundles, None, hold)
示例9: __iter__
def __iter__(self):
output_stream = create_OutputStream()
if self._windowing.is_default():
globally_window = GlobalWindows.windowed_value(None).with_value
windowed_key_values = lambda key, values: [globally_window((key, values))]
else:
trigger_driver = trigger.create_trigger_driver(self._windowing, True)
windowed_key_values = trigger_driver.process_entire_key
coder_impl = self._post_grouped_coder.get_impl()
key_coder_impl = self._key_coder.get_impl()
for encoded_key, windowed_values in self._table.items():
key = key_coder_impl.decode(encoded_key)
for wkvs in windowed_key_values(key, windowed_values):
coder_impl.encode_to_stream(wkvs, output_stream, True)
return iter([output_stream.get()])
示例10: _flush_batch
def _flush_batch(self, destination):
# Flush the current batch of rows to BigQuery.
rows = self._rows_buffer[destination]
table_reference = bigquery_tools.parse_table_reference(destination)
if table_reference.projectId is None:
table_reference.projectId = vp.RuntimeValueProvider.get_value(
'project', str, '')
logging.debug('Flushing data to %s. Total %s rows.',
destination, len(rows))
while True:
# TODO: Figure out an insertId to make calls idempotent.
passed, errors = self.bigquery_wrapper.insert_rows(
project_id=table_reference.projectId,
dataset_id=table_reference.datasetId,
table_id=table_reference.tableId,
rows=rows,
skip_invalid_rows=True)
logging.debug("Passed: %s. Errors are %s", passed, errors)
failed_rows = [rows[entry.index] for entry in errors]
should_retry = any(
bigquery_tools.RetryStrategy.should_retry(
self._retry_strategy, entry.errors[0].reason)
for entry in errors)
rows = failed_rows
if not should_retry:
break
else:
retry_backoff = next(self._backoff_calculator)
logging.info('Sleeping %s seconds before retrying insertion.',
retry_backoff)
time.sleep(retry_backoff)
self._total_buffered_rows -= len(self._rows_buffer[destination])
del self._rows_buffer[destination]
return [pvalue.TaggedOutput(BigQueryWriteFn.FAILED_ROWS,
GlobalWindows.windowed_value(
(destination, row))) for row in failed_rows]
示例11: finish_bundle
def finish_bundle(self):
data = self._read_from_pubsub()
if data:
output_pcollection = list(self._outputs)[0]
bundle = self._evaluation_context.create_bundle(output_pcollection)
# TODO(ccy): we currently do not use the PubSub message timestamp or
# respect the PubSub source's id_label field.
now = Timestamp.of(time.time())
for message_data in data:
bundle.output(GlobalWindows.windowed_value(message_data, timestamp=now))
bundles = [bundle]
else:
bundles = []
if self._applied_ptransform.inputs:
input_pvalue = self._applied_ptransform.inputs[0]
else:
input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline)
unprocessed_bundle = self._evaluation_context.create_bundle(
input_pvalue)
return TransformResult(
self._applied_ptransform, bundles,
[unprocessed_bundle], None, {None: Timestamp.of(time.time())})
示例12: process_element
def process_element(self, element):
index = element.value
self.watermark = element.timestamp
assert isinstance(index, int)
assert 0 <= index <= len(self.test_stream.events)
self.current_index = index
event = self.test_stream.events[self.current_index]
if isinstance(event, ElementEvent):
assert len(self._outputs) == 1
output_pcollection = list(self._outputs)[0]
bundle = self._evaluation_context.create_bundle(output_pcollection)
for tv in event.timestamped_values:
bundle.output(
GlobalWindows.windowed_value(tv.value, timestamp=tv.timestamp))
self.bundles.append(bundle)
elif isinstance(event, WatermarkEvent):
assert event.new_watermark >= self.watermark
self.watermark = event.new_watermark
elif isinstance(event, ProcessingTimeEvent):
# TODO(ccy): advance processing time in the context's mock clock.
pass
else:
raise ValueError('Invalid TestStream event: %s.' % event)
示例13: process_timer
def process_timer(self, timer_firing):
"""Default process_timer() impl. generating KeyedWorkItem element."""
self.process_element(
GlobalWindows.windowed_value(
KeyedWorkItem(timer_firing.encoded_key,
timer_firings=[timer_firing])))
示例14: _read_values_to_bundles
def _read_values_to_bundles(reader):
read_result = [GlobalWindows.windowed_value(e) for e in reader]
return self._split_list_into_bundles(
output_pcollection, read_result,
_BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1)
示例15: set
def set(self, ts):
from apache_beam.transforms.window import GlobalWindows
self._receiver.receive(
GlobalWindows.windowed_value(
(self._key,
dict(timestamp=timestamp.Timestamp.of(ts)))))