本文整理汇总了Python中singer.StateMessage方法的典型用法代码示例。如果您正苦于以下问题:Python singer.StateMessage方法的具体用法?Python singer.StateMessage怎么用?Python singer.StateMessage使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer
的用法示例。
在下文中一共展示了singer.StateMessage方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def add(self, message):
if isinstance(message, singer.RecordMessage):
stream = self.ensure_stream(message.stream)
if stream.latest_schema:
validator_fn = extend_with_default(Draft4Validator)
validator = validator_fn(
stream.latest_schema, format_checker=FormatChecker())
validator.validate(copy.deepcopy(message.record))
else:
print('I saw a record for stream {} before the schema'.format(
message.stream))
exit(1)
stream.num_records += 1
elif isinstance(message, singer.SchemaMessage):
stream = self.ensure_stream(message.stream)
stream.num_schemas += 1
stream.latest_schema = message.schema
elif isinstance(message, singer.StateMessage):
self.latest_state = message.value
self.num_states += 1
示例2: test_round_trip
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def test_round_trip(self):
record_message = singer.RecordMessage(
record={'name': 'foo'},
stream='users')
schema_message = singer.SchemaMessage(
stream='users',
key_properties=['name'],
schema={'type': 'object',
'properties': {
'name': {'type': 'string'}}})
state_message = singer.StateMessage(value={'seq': 1})
self.assertEqual(record_message,
singer.parse_message(singer.format_message(record_message)))
self.assertEqual(schema_message,
singer.parse_message(singer.format_message(schema_message)))
self.assertEqual(state_message,
singer.parse_message(singer.format_message(state_message)))
## These three tests just confirm that writing doesn't throw
示例3: overloaded_parse_message
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def overloaded_parse_message(msg):
"""Parse a message string into a Message object."""
# We are not using Decimals for parsing here.
# We recognize that exposes data to potentially
# lossy conversions. However, this will affect
# very few data points and we have chosen to
# leave conversion as is for now.
obj = simplejson.loads(msg, use_decimal=True)
msg_type = _required_key(obj, 'type')
if msg_type == 'RECORD':
time_extracted = obj.get('time_extracted')
if time_extracted:
try:
time_extracted = ciso8601.parse_datetime(time_extracted)
except Exception:
time_extracted = None
return singer.RecordMessage(stream=_required_key(obj, 'stream'),
record=_required_key(obj, 'record'),
version=obj.get('version'),
time_extracted=time_extracted)
if msg_type == 'SCHEMA':
return singer.SchemaMessage(stream=_required_key(obj, 'stream'),
schema=_required_key(obj, 'schema'),
key_properties=_required_key(obj, 'key_properties'),
bookmark_properties=obj.get('bookmark_properties'))
if msg_type == 'STATE':
return singer.StateMessage(value=_required_key(obj, 'value'))
if msg_type == 'ACTIVATE_VERSION':
return singer.ActivateVersionMessage(stream=_required_key(obj, 'stream'),
version=_required_key(obj, 'version'))
return None
示例4: test_initial_full_table
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def test_initial_full_table(self):
state = {}
expected_log_file, expected_log_pos = binlog.fetch_current_log_file_and_pos(self.conn)
global SINGER_MESSAGES
SINGER_MESSAGES.clear()
tap_mysql.do_sync(self.conn, {}, self.catalog, state)
message_types = [type(m) for m in SINGER_MESSAGES]
self.assertEqual(message_types,
[singer.StateMessage,
singer.SchemaMessage,
singer.ActivateVersionMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.StateMessage,
singer.ActivateVersionMessage,
singer.StateMessage])
record_messages = list(filter(lambda m: isinstance(m, singer.RecordMessage), SINGER_MESSAGES))
# Expected from 0.7.11
expected_records = [
{'datetime_col': None,
'id': 1,
'timestamp_col': None,
'time_col': '1970-01-01T00:00:00.000000Z',
'date_col': None},
{'datetime_col': None,
'id': 2,
'timestamp_col': None,
'time_col': None,
'date_col': None}
]
self.assertEqual(expected_records, [x.asdict()['record'] for x in record_messages])
示例5: currently_syncing_seq
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def currently_syncing_seq(messages):
return ''.join(
[(m.value.get('currently_syncing', '_') or '_')[-1]
for m in messages
if isinstance(m, singer.StateMessage)]
)
示例6: test_parse_message_state_good
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def test_parse_message_state_good(self):
message = singer.parse_message(
'{"type": "STATE", "value": {"seq": 1}}')
self.assertEqual(message, singer.StateMessage(value={'seq': 1}))
示例7: handle_line
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def handle_line(self, line):
'''Takes a raw line from stdin and handles it, updating state and possibly
flushing the batch to the Gate and the state to the output
stream.
'''
message = overloaded_parse_message(line)
# If we got a Schema, set the schema and key properties for this
# stream. Flush the batch, if there is one, in case the schema is
# different.
if isinstance(message, singer.SchemaMessage):
self.flush()
self.stream_meta[message.stream] = StreamMeta(
message.schema,
message.key_properties,
message.bookmark_properties)
elif isinstance(message, (singer.RecordMessage, singer.ActivateVersionMessage)):
if self.messages and (
message.stream != self.messages[0].stream or
message.version != self.messages[0].version):
self.flush()
self.messages.append(message)
self.buffer_size_bytes += len(line)
if isinstance(message, singer.ActivateVersionMessage):
self.contains_activate_version = True
num_bytes = self.buffer_size_bytes
num_messages = len(self.messages)
num_seconds = time.time() - self.time_last_batch_sent
enough_bytes = num_bytes >= self.max_batch_bytes
enough_messages = num_messages >= self.max_batch_records
enough_time = num_seconds >= self.batch_delay_seconds
if enough_bytes or enough_messages or enough_time:
LOGGER.debug('Flushing %d bytes, %d messages, after %.2f seconds',
num_bytes, num_messages, num_seconds)
self.flush()
elif isinstance(message, singer.StateMessage):
self.state = message.value
# only check time since state message does not increase num_messages or
# num_bytes for the batch
num_seconds = time.time() - self.time_last_batch_sent
if num_seconds >= self.batch_delay_seconds:
LOGGER.debug('Flushing %d bytes, %d messages, after %.2f seconds',
self.buffer_size_bytes, len(self.messages), num_seconds)
self.flush()
self.time_last_batch_sent = time.time()
示例8: test_initial_full_table
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def test_initial_full_table(self):
state = {}
expected_log_file, expected_log_pos = binlog.fetch_current_log_file_and_pos(self.conn)
global SINGER_MESSAGES
SINGER_MESSAGES.clear()
tap_mysql.do_sync(self.conn, {}, self.catalog, state)
message_types = [type(m) for m in SINGER_MESSAGES]
self.assertEqual(message_types,
[singer.StateMessage,
singer.SchemaMessage,
singer.ActivateVersionMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.StateMessage,
singer.ActivateVersionMessage,
singer.StateMessage,
singer.SchemaMessage,
singer.ActivateVersionMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.StateMessage,
singer.ActivateVersionMessage,
singer.StateMessage])
activate_version_message_1 = list(filter(
lambda m: isinstance(m, singer.ActivateVersionMessage) and m.stream == 'binlog_1',
SINGER_MESSAGES))[0]
activate_version_message_2 = list(filter(
lambda m: isinstance(m, singer.ActivateVersionMessage) and m.stream == 'binlog_2',
SINGER_MESSAGES))[0]
record_messages = list(filter(lambda m: isinstance(m, singer.RecordMessage), SINGER_MESSAGES))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_1', 'log_file'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_1', 'log_pos'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_2', 'log_file'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_2', 'log_pos'))
self.assertEqual(singer.get_bookmark(state, 'tap_mysql_test-binlog_1', 'version'),
activate_version_message_1.version)
self.assertEqual(singer.get_bookmark(state, 'tap_mysql_test-binlog_2', 'version'),
activate_version_message_2.version)
示例9: test_binlog_stream
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def test_binlog_stream(self):
global SINGER_MESSAGES
SINGER_MESSAGES.clear()
config = test_utils.get_db_config()
config['server_id'] = "100"
tap_mysql.do_sync(self.conn, config, self.catalog, self.state)
record_messages = list(filter(lambda m: isinstance(m, singer.RecordMessage), SINGER_MESSAGES))
message_types = [type(m) for m in SINGER_MESSAGES]
self.assertEqual(message_types,
[singer.StateMessage,
singer.SchemaMessage,
singer.SchemaMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.RecordMessage,
singer.StateMessage])
self.assertEqual([('binlog_1', 1, '2017-06-01T00:00:00.000000Z', False),
('binlog_1', 2, '2017-06-20T00:00:00.000000Z', False),
('binlog_1', 3, '2017-09-22T00:00:00.000000Z', False),
('binlog_2', 1, '2017-10-22T00:00:00.000000Z', False),
('binlog_2', 2, '2017-11-10T00:00:00.000000Z', False),
('binlog_2', 3, '2017-12-10T00:00:00.000000Z', False),
('binlog_1', 3, '2018-06-18T00:00:00.000000Z', False),
('binlog_2', 2, '2018-06-18T00:00:00.000000Z', False),
('binlog_1', 2, '2017-06-20T00:00:00.000000Z', True),
('binlog_2', 1, '2017-10-22T00:00:00.000000Z', True)],
[(m.stream,
m.record['id'],
m.record['updated'],
m.record.get(binlog.SDC_DELETED_AT) is not None)
for m in record_messages])
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_1', 'log_file'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_1', 'log_pos'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_2', 'log_file'))
self.assertIsNotNone(singer.get_bookmark(self.state, 'tap_mysql_test-binlog_2', 'log_pos'))
示例10: persist_lines
# 需要导入模块: import singer [as 别名]
# 或者: from singer import StateMessage [as 别名]
def persist_lines(service, spreadsheet, lines):
state = None
schemas = {}
key_properties = {}
headers_by_stream = {}
for line in lines:
try:
msg = singer.parse_message(line)
except json.decoder.JSONDecodeError:
logger.error("Unable to parse:\n{}".format(line))
raise
if isinstance(msg, singer.RecordMessage):
if msg.stream not in schemas:
raise Exception("A record for stream {} was encountered before a corresponding schema".format(msg.stream))
schema = schemas[msg.stream]
validate(msg.record, schema)
flattened_record = flatten(msg.record)
matching_sheet = [s for s in spreadsheet['sheets'] if s['properties']['title'] == msg.stream]
new_sheet_needed = len(matching_sheet) == 0
range_name = "{}!A1:ZZZ".format(msg.stream)
append = functools.partial(append_to_sheet, service, spreadsheet['spreadsheetId'], range_name)
if new_sheet_needed:
add_sheet(service, spreadsheet['spreadsheetId'], msg.stream)
spreadsheet = get_spreadsheet(service, spreadsheet['spreadsheetId']) # refresh this for future iterations
headers_by_stream[msg.stream] = list(flattened_record.keys())
append(headers_by_stream[msg.stream])
elif msg.stream not in headers_by_stream:
first_row = get_values(service, spreadsheet['spreadsheetId'], range_name + '1')
if 'values' in first_row:
headers_by_stream[msg.stream] = first_row.get('values', None)[0]
else:
headers_by_stream[msg.stream] = list(flattened_record.keys())
append(headers_by_stream[msg.stream])
result = append([flattened_record.get(x, None) for x in headers_by_stream[msg.stream]]) # order by actual headers found in sheet
state = None
elif isinstance(msg, singer.StateMessage):
logger.debug('Setting state to {}'.format(msg.value))
state = msg.value
elif isinstance(msg, singer.SchemaMessage):
schemas[msg.stream] = msg.schema
key_properties[msg.stream] = msg.key_properties
else:
raise Exception("Unrecognized message {}".format(msg))
return state