本文整理汇总了Python中singer.write_schema方法的典型用法代码示例。如果您正苦于以下问题:Python singer.write_schema方法的具体用法?Python singer.write_schema怎么用?Python singer.write_schema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer
的用法示例。
在下文中一共展示了singer.write_schema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sync_campaigns
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_campaigns(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("campaigns")
singer.write_schema("campaigns", schema, ["id"], catalog.get('stream_alias'))
LOGGER.info("sync_campaigns(NO bookmarks)")
url = get_url("campaigns_all")
params = {'limit': 500}
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'campaigns', url, params, "campaigns", "hasMore", ["offset"], ["offset"]):
record = request(get_url("campaigns_detail", campaign_id=row['id'])).json()
record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
singer.write_record("campaigns", record, catalog.get('stream_alias'), time_extracted=utils.now())
return STATE
示例2: do_sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def do_sync(config, catalog, state):
LOGGER.info('Starting sync.')
for stream in catalog['streams']:
stream_name = stream['tap_stream_id']
mdata = metadata.to_map(stream['metadata'])
table_spec = next(s for s in config['tables'] if s['table_name'] == stream_name)
if not stream_is_selected(mdata):
LOGGER.info("%s: Skipping - not selected", stream_name)
continue
singer.write_state(state)
key_properties = metadata.get(mdata, (), 'table-key-properties')
singer.write_schema(stream_name, stream['schema'], key_properties)
LOGGER.info("%s: Starting sync", stream_name)
counter_value = sync_stream(config, state, table_spec, stream)
LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)
LOGGER.info('Done syncing.')
示例3: sync_time_filtered
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_time_filtered(entity):
bookmark_property = 'updated_at'
singer.write_schema(entity,
utils.load_schema(entity),
["id"],
bookmark_properties=[bookmark_property])
start = get_start(entity)
logger.info("Syncing {} from {}".format(entity, start))
for row in gen_request(get_url(entity)):
if row[bookmark_property] >= start:
if 'custom_fields' in row:
row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True)
utils.update_state(STATE, entity, row[bookmark_property])
singer.write_record(entity, row, time_extracted=singer.utils.now())
singer.write_state(STATE)
示例4: output_schema
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def output_schema(stream):
schema = load_schema(stream.tap_stream_id)
singer.write_schema(stream.tap_stream_id, schema, stream.pk_fields)
示例5: sync_contacts
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_contacts(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
bookmark_key = 'versionTimestamp'
start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
LOGGER.info("sync_contacts from %s", start)
max_bk_value = start
schema = load_schema("contacts")
singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
url = get_url("contacts_all")
vids = []
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
modified_time = None
if bookmark_key in row:
modified_time = utils.strptime_with_tz(
_transform_datetime( # pylint: disable=protected-access
row[bookmark_key],
UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
if not modified_time or modified_time >= start:
vids.append(row['vid'])
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if len(vids) == 100:
_sync_contact_vids(catalog, vids, schema, bumble_bee)
vids = []
_sync_contact_vids(catalog, vids, schema, bumble_bee)
STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
示例6: sync_forms
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_forms(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("forms")
bookmark_key = 'updatedAt'
singer.write_schema("forms", schema, ["guid"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "forms", bookmark_key)
max_bk_value = start
LOGGER.info("sync_forms from %s", start)
data = request(get_url("forms")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("forms", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'forms', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例7: sync_workflows
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_workflows(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("workflows")
bookmark_key = 'updatedAt'
singer.write_schema("workflows", schema, ["id"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "workflows", bookmark_key)
max_bk_value = start
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
LOGGER.info("sync_workflows from %s", start)
data = request(get_url("workflows")).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data['workflows']:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= start:
singer.write_record("workflows", record, catalog.get('stream_alias'), time_extracted=time_extracted)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例8: sync_owners
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_owners(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("owners")
bookmark_key = 'updatedAt'
singer.write_schema("owners", schema, ["ownerId"], [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, "owners", bookmark_key)
max_bk_value = start
LOGGER.info("sync_owners from %s", start)
params = {}
if CONFIG.get('include_inactives'):
params['includeInactives'] = "true"
data = request(get_url("owners"), params).json()
time_extracted = utils.now()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
if record[bookmark_key] >= max_bk_value:
max_bk_value = record[bookmark_key]
if record[bookmark_key] >= start:
singer.write_record("owners", record, catalog.get('stream_alias'), time_extracted=time_extracted)
STATE = singer.write_bookmark(STATE, 'owners', bookmark_key, max_bk_value)
singer.write_state(STATE)
return STATE
示例9: sync_deal_pipelines
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_deal_pipelines(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema('deal_pipelines')
singer.write_schema('deal_pipelines', schema, ['pipelineId'], catalog.get('stream_alias'))
LOGGER.info('sync_deal_pipelines')
data = request(get_url('deal_pipelines')).json()
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in data:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
singer.write_record("deal_pipelines", record, catalog.get('stream_alias'), time_extracted=utils.now())
singer.write_state(STATE)
return STATE
示例10: do_sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def do_sync(account, catalog, state):
streams_to_sync = get_streams_to_sync(account, catalog, state)
refs = load_shared_schema_refs()
for stream in streams_to_sync:
LOGGER.info('Syncing %s, fields %s', stream.name, stream.fields())
schema = singer.resolve_schema_references(load_schema(stream), refs)
metadata_map = metadata.to_map(stream.catalog_entry.metadata)
bookmark_key = BOOKMARK_KEYS.get(stream.name)
singer.write_schema(stream.name, schema, stream.key_properties, bookmark_key, stream.stream_alias)
# NB: The AdCreative stream is not an iterator
if stream.name == 'adcreative':
stream.sync()
continue
with Transformer(pre_hook=transform_date_hook) as transformer:
with metrics.record_counter(stream.name) as counter:
for message in stream:
if 'record' in message:
counter.increment()
time_extracted = utils.now()
record = transformer.transform(message['record'], schema, metadata=metadata_map)
singer.write_record(stream.name, record, stream.stream_alias, time_extracted)
elif 'state' in message:
singer.write_state(message['state'])
else:
raise TapFacebookException('Unrecognized message {}'.format(message))
示例11: write_schema
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def write_schema(stream_name, schema, primary_keys, bookmark_properties=None):
schema_copy = copy.deepcopy(schema)
singer.write_schema(stream_name, schema_copy, primary_keys, bookmark_properties=bookmark_properties)
# No rate limit here, since this request is only made once
# per discovery (not sync) job
示例12: sync_tickets
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_tickets():
bookmark_property = 'updated_at'
singer.write_schema("tickets",
utils.load_schema("tickets"),
["id"],
bookmark_properties=[bookmark_property])
singer.write_schema("conversations",
utils.load_schema("conversations"),
["id"],
bookmark_properties=[bookmark_property])
singer.write_schema("satisfaction_ratings",
utils.load_schema("satisfaction_ratings"),
["id"],
bookmark_properties=[bookmark_property])
singer.write_schema("time_entries",
utils.load_schema("time_entries"),
["id"],
bookmark_properties=[bookmark_property])
sync_tickets_by_filter(bookmark_property)
sync_tickets_by_filter(bookmark_property, "deleted")
sync_tickets_by_filter(bookmark_property, "spam")
示例13: test_write_schema
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def test_write_schema(self):
schema={'type': 'object',
'properties': {
'name': {'type': 'string'}}}
singer.write_schema("users", schema, ["name"])
示例14: sync
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync(config, state, catalog):
""" Sync data from tap source """
# Loop over selected streams in catalog
for stream in catalog.get_selected_streams(state):
LOGGER.info("Syncing stream:" + stream.tap_stream_id)
bookmark_column = stream.replication_key
is_sorted = True # TODO: indicate whether data is sorted ascending on bookmark value
singer.write_schema(
stream_name=stream.tap_stream_id,
schema=stream.schema,
key_properties=stream.key_properties,
)
# TODO: delete and replace this inline function with your own data retrieval process:
tap_data = lambda: [{"id": x, "name": "row${x}"} for x in range(1000)]
max_bookmark = None
for row in tap_data():
# TODO: place type conversions or transformations here
# write one or more rows to the stream:
singer.write_records(stream.tap_stream_id, [row])
if bookmark_column:
if is_sorted:
# update bookmark to latest value
singer.write_state({stream.tap_stream_id: row[bookmark_column]})
else:
# if data unsorted, save max value until end of writes
max_bookmark = max(max_bookmark, row[bookmark_column])
if bookmark_column and not is_sorted:
singer.write_state({stream.tap_stream_id: max_bookmark})
return
示例15: sync_companies
# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_companies(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
bumble_bee = Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
bookmark_key = 'hs_lastmodifieddate'
start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
LOGGER.info("sync_companies from %s", start)
schema = load_schema('companies')
singer.write_schema("companies", schema, ["companyId"], [bookmark_key], catalog.get('stream_alias'))
# Because this stream doesn't query by `lastUpdated`, it cycles
# through the data set every time. The issue with this is that there
# is a race condition by which records may be updated between the
# start of this table's sync and the end, causing some updates to not
# be captured, in order to combat this, we must store the current
# sync's start in the state and not move the bookmark past this value.
current_sync_start = get_current_sync_start(STATE, "companies") or utils.now()
STATE = write_current_sync_start(STATE, "companies", current_sync_start)
singer.write_state(STATE)
url = get_url("companies_all")
max_bk_value = start
if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])
with bumble_bee:
for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
row_properties = row['properties']
modified_time = None
if bookmark_key in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
elif 'createdate' in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if not modified_time or modified_time >= start:
record = request(get_url("companies_detail", company_id=row['companyId'])).json()
record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
# Don't bookmark past the start of this sync to account for updated records during the sync.
new_bookmark = min(max_bk_value, current_sync_start)
STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))
STATE = write_current_sync_start(STATE, 'companies', None)
singer.write_state(STATE)
return STATE