当前位置: 首页>>代码示例>>Python>>正文


Python metadata.write方法代码示例

本文整理汇总了Python中singer.metadata.write方法的典型用法代码示例。如果您正苦于以下问题:Python metadata.write方法的具体用法?Python metadata.write怎么用?Python metadata.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在singer.metadata的用法示例。


在下文中一共展示了metadata.write方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_property_schema

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def create_property_schema(field, mdata):
    field_name = field['name']

    if field_name == "Id":
        mdata = metadata.write(
            mdata, ('properties', field_name), 'inclusion', 'automatic')
    else:
        mdata = metadata.write(
            mdata, ('properties', field_name), 'inclusion', 'available')

    property_schema, mdata = salesforce.field_to_property_schema(field, mdata)

    return (property_schema, mdata)


# pylint: disable=too-many-branches,too-many-statements 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:18,代码来源:__init__.py

示例2: load_discovered_schema

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def load_discovered_schema(stream):
    schema = load_schema(stream.tap_stream_id)
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', stream.key_properties)
    mdata = metadata.write(mdata, (), 'forced-replication-method', stream.replication_method)

    if stream.replication_key:
        mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key])

    for field_name, props in schema['properties'].items():
        if field_name in stream.key_properties or field_name == stream.replication_key:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    # The engagements stream has nested data that we synthesize; The engagement field needs to be automatic
    if stream.tap_stream_id == "engagements":
        mdata = metadata.write(mdata, ('properties', 'engagement'), 'inclusion', 'automatic')

    return schema, metadata.to_list(mdata) 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:23,代码来源:__init__.py

示例3: discover_schemas

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def discover_schemas():
    # Load Facebook's shared schemas
    refs = load_shared_schema_refs()

    result = {'streams': []}
    streams = initialize_streams_for_discovery()
    for stream in streams:
        LOGGER.info('Loading schema for %s', stream.name)
        schema = singer.resolve_schema_references(load_schema(stream), refs)

        mdata = metadata.to_map(metadata.get_standard_metadata(schema,
                                               key_properties=stream.key_properties))

        bookmark_key = BOOKMARK_KEYS.get(stream.name)
        if bookmark_key == UPDATED_TIME_KEY:
            mdata = metadata.write(mdata, ('properties', bookmark_key), 'inclusion', 'automatic')

        result['streams'].append({'stream': stream.name,
                                  'tap_stream_id': stream.name,
                                  'schema': schema,
                                  'metadata': metadata.to_list(mdata)})
    return result 
开发者ID:singer-io,项目名称:tap-facebook,代码行数:24,代码来源:__init__.py

示例4: get_all_team_members

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_team_members(team_slug, schemas, repo_path, state, mdata):
    org = repo_path.split('/')[0]
    with metrics.record_counter('team_members') as counter:
        for response in authed_get_all_pages(
                'team_members',
                'https://api.github.com/orgs/{}/teams/{}/members?sort=created_at&direction=desc'.format(org, team_slug)
        ):
            team_members = response.json()
            for r in team_members:
                r['_sdc_repository'] = repo_path

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                counter.increment()

                yield rec

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:21,代码来源:tap_github.py

示例5: get_all_releases

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_releases(schemas, repo_path, state, mdata):
    # Releases doesn't seem to have an `updated_at` property, yet can be edited.
    # For this reason and since the volume of release can safely be considered low,
    #    bookmarks were ignored for releases.

    with metrics.record_counter('releases') as counter:
        for response in authed_get_all_pages(
                'releases',
                'https://api.github.com/repos/{}/releases?sort=created_at&direction=desc'.format(repo_path)
        ):
            releases = response.json()
            extraction_time = singer.utils.now()
            for r in releases:
                r['_sdc_repository'] = repo_path

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('releases', rec, time_extracted=extraction_time)
                singer.write_bookmark(state, repo_path, 'releases', {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:25,代码来源:tap_github.py

示例6: generate_metadata

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def generate_metadata(stream, schema):
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties', stream.pk_fields)

    for field_name in schema.properties.keys():
        if field_name in stream.pk_fields:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata) 
开发者ID:singer-io,项目名称:tap-jira,代码行数:13,代码来源:__init__.py

示例7: create_field_metadata_for_report

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def create_field_metadata_for_report(stream, fields, field_name_lookup):
    mdata = {}
    mdata = metadata.write(mdata, (), 'inclusion', 'available')

    for field in fields:
        breadcrumb = ('properties', str(field['xmlAttributeName']))
        if  hasattr(field, "exclusiveFields"):
            mdata = metadata.write(mdata,
                                   breadcrumb,
                                   'fieldExclusions',
                                   [['properties', field_name_lookup[x]]
                                    for x
                                    in field['exclusiveFields']])
        mdata = metadata.write(mdata, breadcrumb, 'behavior', field['fieldBehavior'])
        mdata = metadata.write(mdata, breadcrumb, 'adwords.fieldName', field['fieldName'])

        #inclusion
        if field['xmlAttributeName'] == 'day':
            # Every report with this attribute errors with an empty
            # 400 if it is not included in the field list.
            mdata = metadata.write(mdata, breadcrumb, 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, breadcrumb, 'inclusion', 'available')

    if stream == 'GEO_PERFORMANCE_REPORT':
        # Requests for this report that don't include countryTerritory
        # fail with an empty 400. There's no evidence for this in the
        # docs but it is what it is.
        mdata = metadata.write(mdata, ('properties', 'countryTerritory'), 'inclusion', 'automatic')

    return mdata 
开发者ID:singer-io,项目名称:tap-adwords,代码行数:33,代码来源:__init__.py

示例8: create_column_metadata

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def create_column_metadata(cols):
    mdata = {}
    mdata = metadata.write(mdata, (), 'selected-by-default', False)
    for c in cols:
        schema = schema_for_column(c)
        mdata = metadata.write(mdata,
                               ('properties', c.column_name),
                               'selected-by-default',
                               schema.inclusion != 'unsupported')
        mdata = metadata.write(mdata,
                               ('properties', c.column_name),
                               'sql-datatype',
                               c.column_type.lower())

    return metadata.to_list(mdata) 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:17,代码来源:__init__.py

示例9: load_metadata

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def load_metadata(table_spec, schema):
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', table_spec['key_properties'])

    for field_name in schema.get('properties', {}).keys():
        if table_spec.get('key_properties', []) and field_name in table_spec.get('key_properties', []):
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata) 
开发者ID:singer-io,项目名称:tap-s3-csv,代码行数:14,代码来源:discover.py

示例10: populate_metadata

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def populate_metadata(schema_name, schema):
    mdata = metadata.new()
    #mdata = metadata.write(mdata, (), 'forced-replication-method', KEY_PROPERTIES[schema_name])
    mdata = metadata.write(mdata, (), 'table-key-properties', KEY_PROPERTIES[schema_name])

    for field_name in schema['properties'].keys():
        if field_name in KEY_PROPERTIES[schema_name]:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return mdata 
开发者ID:singer-io,项目名称:tap-github,代码行数:14,代码来源:tap_github.py

示例11: get_all_teams

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_teams(schemas, repo_path, state, mdata):
    org = repo_path.split('/')[0]
    with metrics.record_counter('teams') as counter:
        for response in authed_get_all_pages(
                'teams',
                'https://api.github.com/orgs/{}/teams?sort=created_at&direction=desc'.format(org)
        ):
            teams = response.json()
            extraction_time = singer.utils.now()

            for r in teams:
                r['_sdc_repository'] = repo_path

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('teams', rec, time_extracted=extraction_time)
                singer.write_bookmark(state, repo_path, 'teams', {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

                if schemas.get('team_members'):
                    team_slug = r['slug']
                    for team_members_rec in get_all_team_members(team_slug, schemas['team_members'], repo_path, state, mdata):
                        singer.write_record('team_members', team_members_rec, time_extracted=extraction_time)
                        singer.write_bookmark(state, repo_path, 'team_members', {'since': singer.utils.strftime(extraction_time)})

                if schemas.get('team_memberships'):
                    team_slug = r['slug']
                    for team_memberships_rec in get_all_team_memberships(team_slug, schemas['team_memberships'], repo_path, state, mdata):
                        singer.write_record('team_memberships', team_memberships_rec, time_extracted=extraction_time)

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:34,代码来源:tap_github.py

示例12: get_all_events

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_events(schemas, repo_path, state, mdata):
    # Incremental sync off `created_at`
    # https://developer.github.com/v3/issues/events/#list-events-for-a-repository
    # 'https://api.github.com/repos/{}/issues/events?sort=created_at&direction=desc'.format(repo_path)

    bookmark_value = get_bookmark(state, repo_path, "events", "since")
    if bookmark_value:
        bookmark_time = singer.utils.strptime_to_utc(bookmark_value)
    else:
        bookmark_time = 0

    with metrics.record_counter('events') as counter:
        for response in authed_get_all_pages(
                'events',
                'https://api.github.com/repos/{}/events?sort=created_at&direction=desc'.format(repo_path)
        ):
            events = response.json()
            extraction_time = singer.utils.now()
            for r in events:
                r['_sdc_repository'] = repo_path

                # skip records that haven't been updated since the last run
                # the GitHub API doesn't currently allow a ?since param for pulls
                # once we find the first piece of old data we can return, thanks to
                # the sorting
                updated_at = r.get('created_at') if r.get('updated_at') is None else r.get('updated_at')
                if bookmark_time and singer.utils.strptime_to_utc(updated_at) < bookmark_time:
                    return state

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('events', rec, time_extracted=extraction_time)
                singer.write_bookmark(state, repo_path, 'events', {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:39,代码来源:tap_github.py

示例13: get_all_issue_milestones

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_issue_milestones(schemas, repo_path, state, mdata):
    # Incremental sync off `due on` ??? confirm.
    # https://developer.github.com/v3/issues/milestones/#list-milestones-for-a-repository
    # 'https://api.github.com/repos/{}/milestones?sort=created_at&direction=desc'.format(repo_path)
    bookmark_value = get_bookmark(state, repo_path, "issue_milestones", "since")
    if bookmark_value:
        bookmark_time = singer.utils.strptime_to_utc(bookmark_value)
    else:
        bookmark_time = 0

    with metrics.record_counter('issue_milestones') as counter:
        for response in authed_get_all_pages(
                'milestones',
                'https://api.github.com/repos/{}/milestones?direction=desc'.format(repo_path)
        ):
            milestones = response.json()
            extraction_time = singer.utils.now()
            for r in milestones:
                r['_sdc_repository'] = repo_path

                # skip records that haven't been updated since the last run
                # the GitHub API doesn't currently allow a ?since param for pulls
                # once we find the first piece of old data we can return, thanks to
                # the sorting
                if bookmark_time and singer.utils.strptime_to_utc(r.get('due_on')) < bookmark_time:
                    return state

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('issue_milestones', rec, time_extracted=extraction_time)
                singer.write_bookmark(state, repo_path, 'issue_milestones', {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:37,代码来源:tap_github.py

示例14: get_all_commit_comments

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_commit_comments(schemas, repo_path, state, mdata):
    # https://developer.github.com/v3/repos/comments/
    # updated_at? incremental
    # 'https://api.github.com/repos/{}/comments?sort=created_at&direction=desc'.format(repo_path)
    bookmark_value = get_bookmark(state, repo_path, "commit_comments", "since")
    if bookmark_value:
        bookmark_time = singer.utils.strptime_to_utc(bookmark_value)
    else:
        bookmark_time = 0

    with metrics.record_counter('commit_comments') as counter:
        for response in authed_get_all_pages(
                'commit_comments',
                'https://api.github.com/repos/{}/comments?sort=created_at&direction=desc'.format(repo_path)
        ):
            commit_comments = response.json()
            extraction_time = singer.utils.now()
            for r in commit_comments:
                r['_sdc_repository'] = repo_path

                # skip records that haven't been updated since the last run
                # the GitHub API doesn't currently allow a ?since param for pulls
                # once we find the first piece of old data we can return, thanks to
                # the sorting
                if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time:
                    return state

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('commit_comments', rec, time_extracted=extraction_time)
                singer.write_bookmark(state, repo_path, 'commit_comments', {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:37,代码来源:tap_github.py

示例15: get_all_project_cards

# 需要导入模块: from singer import metadata [as 别名]
# 或者: from singer.metadata import write [as 别名]
def get_all_project_cards(project_id, schemas, repo_path, state, mdata):
    bookmark_value = get_bookmark(state, repo_path, "project_cards", "since")
    if bookmark_value:
        bookmark_time = singer.utils.strptime_to_utc(bookmark_value)
    else:
        bookmark_time = 0

    with metrics.record_counter('project_cards') as counter:
        for response in authed_get_all_pages(
                'project_cards',
                'https://api.github.com/projects/{}/columns?sort=created_at&direction=desc'.format(project_id)
        ):
            project_cards = response.json()
            for r in project_cards:
                r['_sdc_repository'] = repo_path

                # skip records that haven't been updated since the last run
                # the GitHub API doesn't currently allow a ?since param for pulls
                # once we find the first piece of old data we can return, thanks to
                # the sorting
                if bookmark_time and singer.utils.strptime_to_utc(r.get('updated_at')) < bookmark_time:
                    return state

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(r, schemas, metadata=metadata.to_map(mdata))
                # counter.increment()
                yield rec

    return state 
开发者ID:singer-io,项目名称:tap-github,代码行数:32,代码来源:tap_github.py


注:本文中的singer.metadata.write方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。