本文整理汇总了Python中singer.write_state方法的典型用法代码示例。如果您正苦于以下问题:Python singer.write_state方法的具体用法?Python singer.write_state怎么用?Python singer.write_state使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer
的用法示例。
在下文中一共展示了singer.write_state方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync():
streams_.validate_dependencies()
# two loops through streams are necessary so that the schema is output
# BEFORE syncing any streams. Otherwise, the first stream might generate
# data for the second stream, but the second stream hasn't output its
# schema yet
for stream in streams_.ALL_STREAMS:
output_schema(stream)
for stream in streams_.ALL_STREAMS:
if not Context.is_selected(stream.tap_stream_id):
continue
# indirect_stream indicates the data for the stream comes from some
# other stream, so we don't sync it directly.
if stream.indirect_stream:
continue
Context.state["currently_syncing"] = stream.tap_stream_id
singer.write_state(Context.state)
stream.sync()
Context.state["currently_syncing"] = None
singer.write_state(Context.state)
示例2: sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync(self):
updated_bookmark = [self.tap_stream_id, "updated"]
last_updated = Context.update_start_date_bookmark(updated_bookmark)
while True:
ids_page = self._fetch_ids(last_updated)
if not ids_page["values"]:
break
ids = [x["worklogId"] for x in ids_page["values"]]
worklogs = self._fetch_worklogs(ids)
# Grab last_updated before transform in write_page
new_last_updated = advance_bookmark(worklogs)
self.write_page(worklogs)
last_updated = new_last_updated
Context.set_bookmark(updated_bookmark, last_updated)
singer.write_state(Context.state)
# lastPage is a boolean value based on
# https://developer.atlassian.com/cloud/jira/platform/rest/v3/?utm_source=%2Fcloud%2Fjira%2Fplatform%2Frest%2F&utm_medium=302#api-api-3-worklog-updated-get
last_page = ids_page.get("lastPage")
if last_page:
break
示例3: do_sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def do_sync(config, catalog, state):
LOGGER.info('Starting sync.')
for stream in catalog['streams']:
stream_name = stream['tap_stream_id']
mdata = metadata.to_map(stream['metadata'])
table_spec = next(s for s in config['tables'] if s['table_name'] == stream_name)
if not stream_is_selected(mdata):
LOGGER.info("%s: Skipping - not selected", stream_name)
continue
singer.write_state(state)
key_properties = metadata.get(mdata, (), 'table-key-properties')
singer.write_schema(stream_name, stream['schema'], key_properties)
LOGGER.info("%s: Starting sync", stream_name)
counter_value = sync_stream(config, state, table_spec, stream)
LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)
LOGGER.info('Done syncing.')
示例4: update_bookmark
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def update_bookmark(self, bookmark_value, bookmark_key=None):
# NOTE: Bookmarking can never be updated to not get the most
# recent thing it saw the next time you run, because the querying
# only allows greater than or equal semantics.
singer.write_bookmark(
Context.state,
# name is overridden by some substreams
self.name,
bookmark_key or self.replication_key,
bookmark_value
)
singer.write_state(Context.state)
# This function can be overridden by subclasses for specialized API
# interactions. If you override it you need to remember to decorate it
# with shopify_error_handling to get 429 and 500 handling.
示例5: sync_time_filtered
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_time_filtered(entity):
bookmark_property = 'updated_at'
singer.write_schema(entity,
utils.load_schema(entity),
["id"],
bookmark_properties=[bookmark_property])
start = get_start(entity)
logger.info("Syncing {} from {}".format(entity, start))
for row in gen_request(get_url(entity)):
if row[bookmark_property] >= start:
if 'custom_fields' in row:
row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True)
utils.update_state(STATE, entity, row[bookmark_property])
singer.write_record(entity, row, time_extracted=singer.utils.now())
singer.write_state(STATE)
示例6: sync_stream
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_stream(sf, catalog_entry, state):
stream = catalog_entry['stream']
with metrics.record_counter(stream) as counter:
try:
sync_records(sf, catalog_entry, state, counter)
singer.write_state(state)
except RequestException as ex:
raise Exception("Error syncing {}: {} Response: {}".format(
stream, ex, ex.response.text))
except Exception as ex:
raise Exception("Error syncing {}: {}".format(
stream, ex)) from ex
return counter
示例7: _bulk_query
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def _bulk_query(self, catalog_entry, state):
job_id = self._create_job(catalog_entry)
start_date = self.sf.get_start_date(state, catalog_entry)
batch_id = self._add_batch(catalog_entry, job_id, start_date)
self._close_job(job_id)
batch_status = self._poll_on_batch_status(job_id, batch_id)
if batch_status['state'] == 'Failed':
if self._can_pk_chunk_job(batch_status['stateMessage']):
batch_status = self._bulk_query_with_pk_chunking(catalog_entry, start_date)
job_id = batch_status['job_id']
# Set pk_chunking to True to indicate that we should write a bookmark differently
self.sf.pk_chunking = True
# Add the bulk Job ID and its batches to the state so it can be resumed if necessary
tap_stream_id = catalog_entry['tap_stream_id']
state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batch_status['completed'][:])
for completed_batch_id in batch_status['completed']:
for result in self.get_batch_results(job_id, completed_batch_id, catalog_entry):
yield result
# Remove the completed batch ID and write state
state['bookmarks'][catalog_entry['tap_stream_id']]["BatchIDs"].remove(completed_batch_id)
LOGGER.info("Finished syncing batch %s. Removing batch from state.", completed_batch_id)
LOGGER.info("Batches to go: %d", len(state['bookmarks'][catalog_entry['tap_stream_id']]["BatchIDs"]))
singer.write_state(state)
else:
raise TapSalesforceException(batch_status['stateMessage'])
else:
for result in self.get_batch_results(job_id, batch_id, catalog_entry):
yield result
示例8: gen_request
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def gen_request(STATE, tap_stream_id, url, params, path, more_key, offset_keys, offset_targets):
if len(offset_keys) != len(offset_targets):
raise ValueError("Number of offset_keys must match number of offset_targets")
if singer.get_offset(STATE, tap_stream_id):
params.update(singer.get_offset(STATE, tap_stream_id))
with metrics.record_counter(tap_stream_id) as counter:
while True:
data = request(url, params).json()
for row in data[path]:
counter.increment()
yield row
if not data.get(more_key, False):
break
STATE = singer.clear_offset(STATE, tap_stream_id)
for key, target in zip(offset_keys, offset_targets):
if key in data:
params[target] = data[key]
STATE = singer.set_offset(STATE, tap_stream_id, target, data[key])
singer.write_state(STATE)
STATE = singer.clear_offset(STATE, tap_stream_id)
singer.write_state(STATE)
示例9: sync_contacts
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_contacts(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
bookmark_key = 'versionTimestamp'
start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
LOGGER.info("sync_contacts from %s", start)
max_bk_value = start
schema = load_schema("contacts")
singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
url = get_url("contacts_all")
vids = []
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
modified_time = None
if bookmark_key in row:
modified_time = utils.strptime_with_tz(
_transform_datetime( # pylint: disable=protected-access
row[bookmark_key],
UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
if not modified_time or modified_time >= start:
vids.append(row['vid'])
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if len(vids) == 100:
_sync_contact_vids(catalog, vids, schema, bumble_bee)
vids = []
_sync_contact_vids(catalog, vids, schema, bumble_bee)
STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
示例10: sync_forms
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_forms(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("forms")
bookmark_key = 'updatedAt'
singer.write_schema("forms", schema, ["guid"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "forms", bookmark_key)
max_bk_value = start
LOGGER.info("sync_forms from %s", start)
data = request(get_url("forms")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("forms", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'forms', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例11: sync_workflows
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_workflows(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("workflows")
bookmark_key = 'updatedAt'
singer.write_schema("workflows", schema, ["id"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "workflows", bookmark_key)
max_bk_value = start
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
LOGGER.info("sync_workflows from %s", start)
data = request(get_url("workflows")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data['workflows']:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("workflows", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例12: sync_owners
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_owners(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("owners")
bookmark_key = 'updatedAt'
singer.write_schema("owners", schema, ["ownerId"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "owners", bookmark_key)
max_bk_value = start
LOGGER.info("sync_owners from %s", start)
params = {}
if CONFIG.get('include_inactives'):
params['includeInactives'] = "true"
data = request(get_url("owners"), params).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
if record[bookmark_key] >= start:
singer.write_record("owners", record, catalog.get('stream_alias'), time_extracted=time_extracted)
STATE = singer.write_bookmark(STATE, 'owners', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例13: sync_deal_pipelines
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def sync_deal_pipelines(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema('deal_pipelines')
singer.write_schema('deal_pipelines', schema, ['pipelineId'], catalog.get('stream_alias'))
LOGGER.info('sync_deal_pipelines')
data = request(get_url('deal_pipelines')).json()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
singer.write_record("deal_pipelines", record, catalog.get('stream_alias'), time_extracted=utils.now())
singer.write_state(STATE)
return STATE
示例14: setUp
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def setUp(self):
utils.verify_environment_vars()
utils.seed_tap_hubspot_config()
singer.write_bookmark = utils.our_write_bookmark
singer.write_state = utils.our_write_state
singer.write_record = utils.our_write_record
singer.write_schema = utils.our_write_schema
singer.set_offset = set_offset_with_exception
#NB> test accounts must have > 1 companies for this to work
示例15: do_sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_state [as 别名]
def do_sync(account, catalog, state):
streams_to_sync = get_streams_to_sync(account, catalog, state)
refs = load_shared_schema_refs()
for stream in streams_to_sync:
LOGGER.info('Syncing %s, fields %s', stream.name, stream.fields())
schema = singer.resolve_schema_references(load_schema(stream), refs)
metadata_map = metadata.to_map(stream.catalog_entry.metadata)
bookmark_key = BOOKMARK_KEYS.get(stream.name)
singer.write_schema(stream.name, schema, stream.key_properties, bookmark_key, stream.stream_alias)
# NB: The AdCreative stream is not an iterator
if stream.name == 'adcreative':
stream.sync()
continue
with Transformer(pre_hook=transform_date_hook) as transformer:
with metrics.record_counter(stream.name) as counter:
for message in stream:
if 'record' in message:
counter.increment()
time_extracted = utils.now()
record = transformer.transform(message['record'], schema, metadata=metadata_map)
singer.write_record(stream.name, record, stream.stream_alias, time_extracted)
elif 'state' in message:
singer.write_state(message['state'])
else:
raise TapFacebookException('Unrecognized message {}'.format(message))