本文整理汇总了Python中singer.write_bookmark方法的典型用法代码示例。如果您正苦于以下问题:Python singer.write_bookmark方法的具体用法?Python singer.write_bookmark怎么用?Python singer.write_bookmark使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer
的用法示例。
在下文中一共展示了singer.write_bookmark方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: advance_bookmark
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def advance_bookmark(stream, bookmark_key, date):
tap_stream_id = stream.name
state = stream.state or {}
LOGGER.info('advance(%s, %s)', tap_stream_id, date)
date = pendulum.parse(date) if date else None
current_bookmark = get_start(stream, bookmark_key)
if date is None:
LOGGER.info('Did not get a date for stream %s '+
' not advancing bookmark',
tap_stream_id)
elif not current_bookmark or date > current_bookmark:
LOGGER.info('Bookmark for stream %s is currently %s, ' +
'advancing to %s',
tap_stream_id, current_bookmark, date)
state = singer.write_bookmark(state, tap_stream_id, bookmark_key, str(date))
else:
LOGGER.info('Bookmark for stream %s is currently %s ' +
'not changing to %s',
tap_stream_id, current_bookmark, date)
return state
示例2: do_sync_full_table
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def do_sync_full_table(mysql_conn, config, catalog_entry, state, columns):
LOGGER.info("Stream %s is using full table replication", catalog_entry.stream)
key_properties = common.get_key_properties(catalog_entry)
write_schema_message(catalog_entry)
stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version)
# Prefer initial_full_table_complete going forward
singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'version')
state = singer.write_bookmark(state,
catalog_entry.tap_stream_id,
'initial_full_table_complete',
True)
singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
示例3: get_all_issue_labels
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def get_all_issue_labels(schemas, repo_path, state, mdata):
# https://developer.github.com/v3/issues/labels/
# not sure if incremental key
# 'https://api.github.com/repos/{}/labels?sort=created_at&direction=desc'.format(repo_path)
with metrics.record_counter('issue_labels') as counter:
for response in authed_get_all_pages(
'issue_labels',
'https://api.github.com/repos/{}/labels'.format(repo_path)
):
issue_labels = response.json()
extraction_time = singer.utils.now()
for r in issue_labels:
r['_sdc_repository'] = repo_path
# transform and write release record
with singer.Transformer() as transformer:
rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
singer.write_record('issue_labels', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'issue_labels', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例4: get_all_releases
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def get_all_releases(schemas, repo_path, state, mdata):
# Releases doesn't seem to have an `updated_at` property, yet can be edited.
# For this reason and since the volume of release can safely be considered low,
# bookmarks were ignored for releases.
with metrics.record_counter('releases') as counter:
for response in authed_get_all_pages(
'releases',
'https://api.github.com/repos/{}/releases?sort=created_at&direction=desc'.format(repo_path)
):
releases = response.json()
extraction_time = singer.utils.now()
for r in releases:
r['_sdc_repository'] = repo_path
# transform and write release record
with singer.Transformer() as transformer:
rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
singer.write_record('releases', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'releases', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例5: get_all_assignees
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def get_all_assignees(schema, repo_path, state, mdata):
'''
https://developer.github.com/v3/issues/assignees/#list-assignees
'''
with metrics.record_counter('assignees') as counter:
for response in authed_get_all_pages(
'assignees',
'https://api.github.com/repos/{}/assignees'.format(repo_path)
):
assignees = response.json()
extraction_time = singer.utils.now()
for assignee in assignees:
assignee['_sdc_repository'] = repo_path
with singer.Transformer() as transformer:
rec = transformer.transform(assignee, schema, metadata=metadata.to_map(mdata))
singer.write_record('assignees', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'assignees', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例6: get_all_collaborators
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def get_all_collaborators(schema, repo_path, state, mdata):
'''
https://developer.github.com/v3/repos/collaborators/#list-collaborators
'''
with metrics.record_counter('collaborators') as counter:
for response in authed_get_all_pages(
'collaborators',
'https://api.github.com/repos/{}/collaborators'.format(repo_path)
):
collaborators = response.json()
extraction_time = singer.utils.now()
for collaborator in collaborators:
collaborator['_sdc_repository'] = repo_path
with singer.Transformer() as transformer:
rec = transformer.transform(collaborator, schema, metadata=metadata.to_map(mdata))
singer.write_record('collaborators', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'collaborator', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例7: update_bookmark
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def update_bookmark(self, bookmark_value, bookmark_key=None):
# NOTE: Bookmarking can never be updated to not get the most
# recent thing it saw the next time you run, because the querying
# only allows greater than or equal semantics.
singer.write_bookmark(
Context.state,
# name is overridden by some substreams
self.name,
bookmark_key or self.replication_key,
bookmark_value
)
singer.write_state(Context.state)
# This function can be overridden by subclasses for specialized API
# interactions. If you override it you need to remember to decorate it
# with shopify_error_handling to get 429 and 500 handling.
示例8: build_state
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def build_state(raw_state, catalog):
state = {}
for catalog_entry in catalog['streams']:
tap_stream_id = catalog_entry['tap_stream_id']
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_method = catalog_metadata.get((), {}).get('replication-method')
version = singer.get_bookmark(raw_state,
tap_stream_id,
'version')
# Preserve state that deals with resuming an incomplete bulk job
if singer.get_bookmark(raw_state, tap_stream_id, 'JobID'):
job_id = singer.get_bookmark(raw_state, tap_stream_id, 'JobID')
batches = singer.get_bookmark(raw_state, tap_stream_id, 'BatchIDs')
current_bookmark = singer.get_bookmark(raw_state, tap_stream_id, 'JobHighestBookmarkSeen')
state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batches)
state = singer.write_bookmark(state, tap_stream_id, 'JobHighestBookmarkSeen', current_bookmark)
if replication_method == 'INCREMENTAL':
replication_key = catalog_metadata.get((), {}).get('replication-key')
replication_key_value = singer.get_bookmark(raw_state,
tap_stream_id,
replication_key)
if version is not None:
state = singer.write_bookmark(
state, tap_stream_id, 'version', version)
if replication_key_value is not None:
state = singer.write_bookmark(
state, tap_stream_id, replication_key, replication_key_value)
elif replication_method == 'FULL_TABLE' and version is None:
state = singer.write_bookmark(state, tap_stream_id, 'version', version)
return state
# pylint: disable=undefined-variable
示例9: _bulk_query
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def _bulk_query(self, catalog_entry, state):
job_id = self._create_job(catalog_entry)
start_date = self.sf.get_start_date(state, catalog_entry)
batch_id = self._add_batch(catalog_entry, job_id, start_date)
self._close_job(job_id)
batch_status = self._poll_on_batch_status(job_id, batch_id)
if batch_status['state'] == 'Failed':
if self._can_pk_chunk_job(batch_status['stateMessage']):
batch_status = self._bulk_query_with_pk_chunking(catalog_entry, start_date)
job_id = batch_status['job_id']
# Set pk_chunking to True to indicate that we should write a bookmark differently
self.sf.pk_chunking = True
# Add the bulk Job ID and its batches to the state so it can be resumed if necessary
tap_stream_id = catalog_entry['tap_stream_id']
state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batch_status['completed'][:])
for completed_batch_id in batch_status['completed']:
for result in self.get_batch_results(job_id, completed_batch_id, catalog_entry):
yield result
# Remove the completed batch ID and write state
state['bookmarks'][catalog_entry['tap_stream_id']]["BatchIDs"].remove(completed_batch_id)
LOGGER.info("Finished syncing batch %s. Removing batch from state.", completed_batch_id)
LOGGER.info("Batches to go: %d", len(state['bookmarks'][catalog_entry['tap_stream_id']]["BatchIDs"]))
singer.write_state(state)
else:
raise TapSalesforceException(batch_status['stateMessage'])
else:
for result in self.get_batch_results(job_id, batch_id, catalog_entry):
yield result
示例10: write_current_sync_start
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def write_current_sync_start(state, tap_stream_id, start):
value = start
if start is not None:
value = utils.strftime(start)
return singer.write_bookmark(state, tap_stream_id, "current_sync_start", value)
示例11: sync_contacts
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def sync_contacts(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
bookmark_key = 'versionTimestamp'
start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
LOGGER.info("sync_contacts from %s", start)
max_bk_value = start
schema = load_schema("contacts")
singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
url = get_url("contacts_all")
vids = []
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
modified_time = None
if bookmark_key in row:
modified_time = utils.strptime_with_tz(
_transform_datetime( # pylint: disable=protected-access
row[bookmark_key],
UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
if not modified_time or modified_time >= start:
vids.append(row['vid'])
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if len(vids) == 100:
_sync_contact_vids(catalog, vids, schema, bumble_bee)
vids = []
_sync_contact_vids(catalog, vids, schema, bumble_bee)
STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
示例12: sync_forms
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def sync_forms(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("forms")
bookmark_key = 'updatedAt'
singer.write_schema("forms", schema, ["guid"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "forms", bookmark_key)
max_bk_value = start
LOGGER.info("sync_forms from %s", start)
data = request(get_url("forms")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("forms", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'forms', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例13: sync_workflows
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def sync_workflows(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("workflows")
bookmark_key = 'updatedAt'
singer.write_schema("workflows", schema, ["id"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "workflows", bookmark_key)
max_bk_value = start
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
LOGGER.info("sync_workflows from %s", start)
data = request(get_url("workflows")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data['workflows']:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("workflows", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例14: sync_owners
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def sync_owners(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("owners")
bookmark_key = 'updatedAt'
singer.write_schema("owners", schema, ["ownerId"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "owners", bookmark_key)
max_bk_value = start
LOGGER.info("sync_owners from %s", start)
params = {}
if CONFIG.get('include_inactives'):
params['includeInactives'] = "true"
data = request(get_url("owners"), params).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
if record[bookmark_key] >= start:
singer.write_record("owners", record, catalog.get('stream_alias'), time_extracted=time_extracted)
STATE = singer.write_bookmark(STATE, 'owners', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例15: setUp
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_bookmark [as 别名]
def setUp(self):
utils.verify_environment_vars()
utils.seed_tap_hubspot_config()
singer.write_bookmark = utils.our_write_bookmark
singer.write_state = utils.our_write_state
singer.write_record = utils.our_write_record
singer.write_schema = utils.our_write_schema
singer.set_offset = set_offset_with_exception
#NB> test accounts must have > 1 companies for this to work