本文整理汇总了Python中singer.get_bookmark方法的典型用法代码示例。如果您正苦于以下问题:Python singer.get_bookmark方法的具体用法?Python singer.get_bookmark怎么用?Python singer.get_bookmark使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer
的用法示例。
在下文中一共展示了singer.get_bookmark方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: binlog_stream_requires_historical
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def binlog_stream_requires_historical(catalog_entry, state):
log_file = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'log_file')
log_pos = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'log_pos')
max_pk_values = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'max_pk_values')
last_pk_fetched = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'last_pk_fetched')
if (log_file and log_pos) and (not max_pk_values and not last_pk_fetched):
return False
return True
示例2: build_state
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def build_state(raw_state, catalog):
state = {}
for catalog_entry in catalog['streams']:
tap_stream_id = catalog_entry['tap_stream_id']
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_method = catalog_metadata.get((), {}).get('replication-method')
version = singer.get_bookmark(raw_state,
tap_stream_id,
'version')
# Preserve state that deals with resuming an incomplete bulk job
if singer.get_bookmark(raw_state, tap_stream_id, 'JobID'):
job_id = singer.get_bookmark(raw_state, tap_stream_id, 'JobID')
batches = singer.get_bookmark(raw_state, tap_stream_id, 'BatchIDs')
current_bookmark = singer.get_bookmark(raw_state, tap_stream_id, 'JobHighestBookmarkSeen')
state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batches)
state = singer.write_bookmark(state, tap_stream_id, 'JobHighestBookmarkSeen', current_bookmark)
if replication_method == 'INCREMENTAL':
replication_key = catalog_metadata.get((), {}).get('replication-key')
replication_key_value = singer.get_bookmark(raw_state,
tap_stream_id,
replication_key)
if version is not None:
state = singer.write_bookmark(
state, tap_stream_id, 'version', version)
if replication_key_value is not None:
state = singer.write_bookmark(
state, tap_stream_id, replication_key, replication_key_value)
elif replication_method == 'FULL_TABLE' and version is None:
state = singer.write_bookmark(state, tap_stream_id, 'version', version)
return state
# pylint: disable=undefined-variable
示例3: get_stream_version
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_stream_version(catalog_entry, state):
tap_stream_id = catalog_entry['tap_stream_id']
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_key = catalog_metadata.get((), {}).get('replication-key')
if singer.get_bookmark(state, tap_stream_id, 'version') is None:
stream_version = int(time.time() * 1000)
else:
stream_version = singer.get_bookmark(state, tap_stream_id, 'version')
if replication_key:
return stream_version
return int(time.time() * 1000)
示例4: get_start_date
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start_date(self, state, catalog_entry):
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_key = catalog_metadata.get((), {}).get('replication-key')
return (singer.get_bookmark(state,
catalog_entry['tap_stream_id'],
replication_key) or self.default_start_date)
示例5: get_start
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start(state, tap_stream_id, bookmark_key):
current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
if current_bookmark is None:
return CONFIG['start_date']
return current_bookmark
示例6: get_current_sync_start
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_current_sync_start(state, tap_stream_id):
current_sync_start_value = singer.get_bookmark(state, tap_stream_id, "current_sync_start")
if current_sync_start_value is None:
return current_sync_start_value
return utils.strptime_to_utc(current_sync_start_value)
示例7: get_start
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start(stream, bookmark_key):
tap_stream_id = stream.name
state = stream.state or {}
current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
if current_bookmark is None:
if isinstance(stream, IncrementalStream):
return None
else:
LOGGER.info("no bookmark found for %s, using start_date instead...%s", tap_stream_id, CONFIG['start_date'])
return pendulum.parse(CONFIG['start_date'])
LOGGER.info("found current bookmark for %s: %s", tap_stream_id, current_bookmark)
return pendulum.parse(current_bookmark)
示例8: get_stream_version
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_stream_version(tap_stream_id, state):
stream_version = singer.get_bookmark(state, tap_stream_id, 'version')
if stream_version is None:
stream_version = int(time.time() * 1000)
return stream_version
示例9: update_incremental_full_table_state
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def update_incremental_full_table_state(catalog_entry, state, cursor):
max_pk_values = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'max_pk_values') or get_max_pk_values(cursor, catalog_entry)
if not max_pk_values:
LOGGER.info("No max value for PK found for table {}".format(catalog_entry.table))
else:
state = singer.write_bookmark(state,
catalog_entry.tap_stream_id,
'max_pk_values',
max_pk_values)
return state
示例10: sync_stream
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def sync_stream(config, state, table_spec, stream):
table_name = table_spec['table_name']
modified_since = utils.strptime_with_tz(singer.get_bookmark(state, table_name, 'modified_since') or
config['start_date'])
LOGGER.info('Syncing table "%s".', table_name)
LOGGER.info('Getting files modified since %s.', modified_since)
s3_files = s3.get_input_files_for_table(
config, table_spec, modified_since)
records_streamed = 0
# We sort here so that tracking the modified_since bookmark makes
# sense. This means that we can't sync s3 buckets that are larger than
# we can sort in memory which is suboptimal. If we could bookmark
# based on anything else then we could just sync files as we see them.
for s3_file in sorted(s3_files, key=lambda item: item['last_modified']):
records_streamed += sync_table_file(
config, s3_file['key'], table_spec, stream)
state = singer.write_bookmark(state, table_name, 'modified_since', s3_file['last_modified'].isoformat())
singer.write_state(state)
LOGGER.info('Wrote %s records for table "%s".', records_streamed, table_name)
return records_streamed
示例11: get_bookmark
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_bookmark(self):
bookmark = (singer.get_bookmark(Context.state,
# name is overridden by some substreams
self.name,
self.replication_key)
or Context.config["start_date"])
return utils.strptime_with_tz(bookmark)
示例12: get_since_id
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_since_id(self):
return singer.get_bookmark(Context.state,
# name is overridden by some substreams
self.name,
'since_id')
示例13: resume_syncing_bulk_query
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter):
bulk = Bulk(sf)
current_bookmark = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen') or sf.get_start_date(state, catalog_entry)
current_bookmark = singer_utils.strptime_with_tz(current_bookmark)
batch_ids = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'BatchIDs')
start_time = singer_utils.now()
stream = catalog_entry['stream']
stream_alias = catalog_entry.get('stream_alias')
catalog_metadata = metadata.to_map(catalog_entry.get('metadata'))
replication_key = catalog_metadata.get((), {}).get('replication-key')
stream_version = get_stream_version(catalog_entry, state)
schema = catalog_entry['schema']
if not bulk.job_exists(job_id):
LOGGER.info("Found stored Job ID that no longer exists, resetting bookmark and removing JobID from state.")
return counter
# Iterate over the remaining batches, removing them once they are synced
for batch_id in batch_ids[:]:
with Transformer(pre_hook=transform_bulk_data_hook) as transformer:
for rec in bulk.get_batch_results(job_id, batch_id, catalog_entry):
counter.increment()
rec = transformer.transform(rec, schema)
rec = fix_record_anytype(rec, schema)
singer.write_message(
singer.RecordMessage(
stream=(
stream_alias or stream),
record=rec,
version=stream_version,
time_extracted=start_time))
# Update bookmark if necessary
replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key])
if replication_key_value and replication_key_value <= start_time and replication_key_value > current_bookmark:
current_bookmark = singer_utils.strptime_with_tz(rec[replication_key])
state = singer.write_bookmark(state,
catalog_entry['tap_stream_id'],
'JobHighestBookmarkSeen',
singer_utils.strftime(current_bookmark))
batch_ids.remove(batch_id)
LOGGER.info("Finished syncing batch %s. Removing batch from state.", batch_id)
LOGGER.info("Batches to go: %d", len(batch_ids))
singer.write_state(state)
return counter
示例14: generate_pk_clause
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def generate_pk_clause(catalog_entry, state):
key_properties = common.get_key_properties(catalog_entry)
max_pk_values = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'max_pk_values')
last_pk_fetched = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'last_pk_fetched')
last_pk_clause = ''
max_pk_comparisons = []
if not max_pk_values:
return ""
if last_pk_fetched:
for pk in key_properties:
column_type = catalog_entry.schema.properties.get(pk).type
# Add AND to interpolate along with max_pk_values clauses
last_pk_clause = '({}) AND '.format(generate_pk_bookmark_clause(key_properties,
last_pk_fetched,
catalog_entry))
max_pk_comparisons.append("{} <= {}".format(common.escape(pk),
quote_where_clause_value(max_pk_values[pk],
column_type)))
else:
for pk in key_properties:
column_schema = catalog_entry.schema.properties.get(pk)
column_type = column_schema.type
pk_val = quote_where_clause_value(max_pk_values[pk],
column_type)
max_pk_comparisons.append("{} <= {}".format(common.escape(pk), pk_val))
order_by_columns = [common.escape(c) for c in key_properties]
sql = " WHERE {}{} ORDER BY {} ASC".format(last_pk_clause,
" AND ".join(max_pk_comparisons),
", ".join(order_by_columns))
return sql
示例15: sync_table
# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state)
bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
version_exists = True if 'version' in bookmark else False
initial_full_table_complete = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'initial_full_table_complete')
state_version = singer.get_bookmark(state,
catalog_entry.tap_stream_id,
'version')
activate_version_message = singer.ActivateVersionMessage(
stream=catalog_entry.stream,
version=stream_version
)
# For the initial replication, emit an ACTIVATE_VERSION message
# at the beginning so the records show up right away.
if not initial_full_table_complete and not (version_exists and state_version is None):
singer.write_message(activate_version_message)
perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry)
pk_clause = ""
with connect_with_backoff(mysql_conn) as open_conn:
with open_conn.cursor() as cur:
select_sql = common.generate_select_sql(catalog_entry, columns)
if perform_resumable_sync:
LOGGER.info("Full table sync is resumable based on primary key definition, will replicate incrementally")
state = update_incremental_full_table_state(catalog_entry, state, cur)
pk_clause = generate_pk_clause(catalog_entry, state)
select_sql += pk_clause
params = {}
common.sync_query(cur,
catalog_entry,
state,
select_sql,
columns,
stream_version,
params)
# clear max pk value and last pk fetched upon successful sync
singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched')
singer.write_message(activate_version_message)