本文整理匯總了Python中singer.metadata.to_map方法的典型用法代碼示例。如果您正苦於以下問題:Python metadata.to_map方法的具體用法?Python metadata.to_map怎麽用?Python metadata.to_map使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類singer.metadata
的用法示例。
在下文中一共展示了metadata.to_map方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _sync_contacts_by_company
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def _sync_contacts_by_company(STATE, ctx, company_id):
schema = load_schema(CONTACTS_BY_COMPANY)
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
url = get_url("contacts_by_company", company_id=company_id)
path = 'vids'
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
data = request(url, default_contacts_by_company_params).json()
for row in data[path]:
counter.increment()
record = {'company-id' : company_id,
'contact-id' : row}
record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
singer.write_record("contacts_by_company", record, time_extracted=utils.now())
return STATE
示例2: sync_campaigns
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def sync_campaigns(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
schema = load_schema("campaigns")
singer.write_schema("campaigns", schema, ["id"], catalog.get('stream_alias'))
LOGGER.info("sync_campaigns(NO bookmarks)")
url = get_url("campaigns_all")
params = {'limit': 500}
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'campaigns', url, params, "campaigns", "hasMore", ["offset"], ["offset"]):
record = request(get_url("campaigns_detail", campaign_id=row['id'])).json()
record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
singer.write_record("campaigns", record, catalog.get('stream_alias'), time_extracted=utils.now())
return STATE
示例3: discover_schemas
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def discover_schemas():
# Load Facebook's shared schemas
refs = load_shared_schema_refs()
result = {'streams': []}
streams = initialize_streams_for_discovery()
for stream in streams:
LOGGER.info('Loading schema for %s', stream.name)
schema = singer.resolve_schema_references(load_schema(stream), refs)
mdata = metadata.to_map(metadata.get_standard_metadata(schema,
key_properties=stream.key_properties))
bookmark_key = BOOKMARK_KEYS.get(stream.name)
if bookmark_key == UPDATED_TIME_KEY:
mdata = metadata.write(mdata, ('properties', bookmark_key), 'inclusion', 'automatic')
result['streams'].append({'stream': stream.name,
'tap_stream_id': stream.name,
'schema': schema,
'metadata': metadata.to_list(mdata)})
return result
示例4: do_sync_incremental
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def do_sync_incremental(mysql_conn, catalog_entry, state, columns, optional_limit=None):
LOGGER.info("Stream %s is using incremental replication", catalog_entry.stream)
md_map = metadata.to_map(catalog_entry.metadata)
replication_key = md_map.get((), {}).get('replication-key')
if not replication_key:
raise Exception("Cannot use INCREMENTAL replication for table ({}) without a replication key.".format(catalog_entry.stream))
write_schema_message(catalog_entry=catalog_entry,
bookmark_properties=[replication_key])
if optional_limit:
LOGGER.info("Incremental Stream %s is using an optional limit clause of %d", catalog_entry.stream, int(optional_limit))
incremental.sync_table(mysql_conn, catalog_entry, state, columns, int(optional_limit))
else:
incremental.sync_table(mysql_conn, catalog_entry, state, columns)
singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
示例5: do_sync
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def do_sync(config, catalog, state):
LOGGER.info('Starting sync.')
for stream in catalog['streams']:
stream_name = stream['tap_stream_id']
mdata = metadata.to_map(stream['metadata'])
table_spec = next(s for s in config['tables'] if s['table_name'] == stream_name)
if not stream_is_selected(mdata):
LOGGER.info("%s: Skipping - not selected", stream_name)
continue
singer.write_state(state)
key_properties = metadata.get(mdata, (), 'table-key-properties')
singer.write_schema(stream_name, stream['schema'], key_properties)
LOGGER.info("%s: Starting sync", stream_name)
counter_value = sync_stream(config, state, table_spec, stream)
LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)
LOGGER.info('Done syncing.')
示例6: get_all_team_members
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_all_team_members(team_slug, schemas, repo_path, state, mdata):
org = repo_path.split('/')[0]
with metrics.record_counter('team_members') as counter:
for response in authed_get_all_pages(
'team_members',
'https://api.github.com/orgs/{}/teams/{}/members?sort=created_at&direction=desc'.format(org, team_slug)
):
team_members = response.json()
for r in team_members:
r['_sdc_repository'] = repo_path
# transform and write release record
with singer.Transformer() as transformer:
rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
counter.increment()
yield rec
return state
示例7: get_all_team_memberships
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_all_team_memberships(team_slug, schemas, repo_path, state, mdata):
org = repo_path.split('/')[0]
for response in authed_get_all_pages(
'team_members',
'https://api.github.com/orgs/{}/teams/{}/members?sort=created_at&direction=desc'.format(org, team_slug)
):
team_members = response.json()
with metrics.record_counter('team_memberships') as counter:
for r in team_members:
username = r['login']
for res in authed_get_all_pages(
'memberships',
'https://api.github.com/orgs/{}/teams/{}/memberships/{}'.format(org, team_slug, username)
):
team_membership = res.json()
team_membership['_sdc_repository'] = repo_path
with singer.Transformer() as transformer:
rec = transformer.transform(team_membership, schemas, metadata=metadata.to_map(mdata))
counter.increment()
yield rec
return state
示例8: get_all_releases
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_all_releases(schemas, repo_path, state, mdata):
# Releases doesn't seem to have an `updated_at` property, yet can be edited.
# For this reason and since the volume of release can safely be considered low,
# bookmarks were ignored for releases.
with metrics.record_counter('releases') as counter:
for response in authed_get_all_pages(
'releases',
'https://api.github.com/repos/{}/releases?sort=created_at&direction=desc'.format(repo_path)
):
releases = response.json()
extraction_time = singer.utils.now()
for r in releases:
r['_sdc_repository'] = repo_path
# transform and write release record
with singer.Transformer() as transformer:
rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
singer.write_record('releases', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'releases', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例9: get_commits_for_pr
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_commits_for_pr(pr_number, pr_id, schema, repo_path, state, mdata):
for response in authed_get_all_pages(
'pr_commits',
'https://api.github.com/repos/{}/pulls/{}/commits'.format(repo_path,pr_number)
):
commit_data = response.json()
extraction_time = singer.utils.now()
for commit in commit_data:
commit['_sdc_repository'] = repo_path
commit['pr_number'] = pr_number
commit['pr_id'] = pr_id
commit['id'] = '{}-{}'.format(pr_id, commit['sha'])
with singer.Transformer() as transformer:
rec = transformer.transform(commit, schema, metadata=metadata.to_map(mdata))
yield rec
return state
示例10: get_all_assignees
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_all_assignees(schema, repo_path, state, mdata):
'''
https://developer.github.com/v3/issues/assignees/#list-assignees
'''
with metrics.record_counter('assignees') as counter:
for response in authed_get_all_pages(
'assignees',
'https://api.github.com/repos/{}/assignees'.format(repo_path)
):
assignees = response.json()
extraction_time = singer.utils.now()
for assignee in assignees:
assignee['_sdc_repository'] = repo_path
with singer.Transformer() as transformer:
rec = transformer.transform(assignee, schema, metadata=metadata.to_map(mdata))
singer.write_record('assignees', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'assignees', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例11: get_all_collaborators
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_all_collaborators(schema, repo_path, state, mdata):
'''
https://developer.github.com/v3/repos/collaborators/#list-collaborators
'''
with metrics.record_counter('collaborators') as counter:
for response in authed_get_all_pages(
'collaborators',
'https://api.github.com/repos/{}/collaborators'.format(repo_path)
):
collaborators = response.json()
extraction_time = singer.utils.now()
for collaborator in collaborators:
collaborator['_sdc_repository'] = repo_path
with singer.Transformer() as transformer:
rec = transformer.transform(collaborator, schema, metadata=metadata.to_map(mdata))
singer.write_record('collaborators', rec, time_extracted=extraction_time)
singer.write_bookmark(state, repo_path, 'collaborator', {'since': singer.utils.strftime(extraction_time)})
counter.increment()
return state
示例12: build_state
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def build_state(raw_state, catalog):
state = {}
for catalog_entry in catalog['streams']:
tap_stream_id = catalog_entry['tap_stream_id']
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_method = catalog_metadata.get((), {}).get('replication-method')
version = singer.get_bookmark(raw_state,
tap_stream_id,
'version')
# Preserve state that deals with resuming an incomplete bulk job
if singer.get_bookmark(raw_state, tap_stream_id, 'JobID'):
job_id = singer.get_bookmark(raw_state, tap_stream_id, 'JobID')
batches = singer.get_bookmark(raw_state, tap_stream_id, 'BatchIDs')
current_bookmark = singer.get_bookmark(raw_state, tap_stream_id, 'JobHighestBookmarkSeen')
state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batches)
state = singer.write_bookmark(state, tap_stream_id, 'JobHighestBookmarkSeen', current_bookmark)
if replication_method == 'INCREMENTAL':
replication_key = catalog_metadata.get((), {}).get('replication-key')
replication_key_value = singer.get_bookmark(raw_state,
tap_stream_id,
replication_key)
if version is not None:
state = singer.write_bookmark(
state, tap_stream_id, 'version', version)
if replication_key_value is not None:
state = singer.write_bookmark(
state, tap_stream_id, replication_key, replication_key_value)
elif replication_method == 'FULL_TABLE' and version is None:
state = singer.write_bookmark(state, tap_stream_id, 'version', version)
return state
# pylint: disable=undefined-variable
示例13: get_stream_version
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_stream_version(catalog_entry, state):
tap_stream_id = catalog_entry['tap_stream_id']
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_key = catalog_metadata.get((), {}).get('replication-key')
if singer.get_bookmark(state, tap_stream_id, 'version') is None:
stream_version = int(time.time() * 1000)
else:
stream_version = singer.get_bookmark(state, tap_stream_id, 'version')
if replication_key:
return stream_version
return int(time.time() * 1000)
示例14: _get_selected_properties
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def _get_selected_properties(self, catalog_entry):
mdata = metadata.to_map(catalog_entry['metadata'])
properties = catalog_entry['schema'].get('properties', {})
return [k for k in properties.keys()
if singer.should_sync_field(metadata.get(mdata, ('properties', k), 'inclusion'),
metadata.get(mdata, ('properties', k), 'selected'),
self.select_fields_by_default)]
示例15: get_start_date
# 需要導入模塊: from singer import metadata [as 別名]
# 或者: from singer.metadata import to_map [as 別名]
def get_start_date(self, state, catalog_entry):
catalog_metadata = metadata.to_map(catalog_entry['metadata'])
replication_key = catalog_metadata.get((), {}).get('replication-key')
return (singer.get_bookmark(state,
catalog_entry['tap_stream_id'],
replication_key) or self.default_start_date)