当前位置: 首页>>代码示例>>Python>>正文


Python singer.write_schema方法代码示例

本文整理汇总了Python中singer.write_schema方法的典型用法代码示例。如果您正苦于以下问题:Python singer.write_schema方法的具体用法?Python singer.write_schema怎么用?Python singer.write_schema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在singer的用法示例。


在下文中一共展示了singer.write_schema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sync_campaigns

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_campaigns(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    schema = load_schema("campaigns")
    singer.write_schema("campaigns", schema, ["id"], catalog.get('stream_alias'))
    LOGGER.info("sync_campaigns(NO bookmarks)")
    url = get_url("campaigns_all")
    params = {'limit': 500}

    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(STATE, 'campaigns', url, params, "campaigns", "hasMore", ["offset"], ["offset"]):
            record = request(get_url("campaigns_detail", campaign_id=row['id'])).json()
            record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
            singer.write_record("campaigns", record, catalog.get('stream_alias'), time_extracted=utils.now())

    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:18,代码来源:__init__.py

示例2: do_sync

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def do_sync(config, catalog, state):
    LOGGER.info('Starting sync.')

    for stream in catalog['streams']:
        stream_name = stream['tap_stream_id']
        mdata = metadata.to_map(stream['metadata'])
        table_spec = next(s for s in config['tables'] if s['table_name'] == stream_name)
        if not stream_is_selected(mdata):
            LOGGER.info("%s: Skipping - not selected", stream_name)
            continue

        singer.write_state(state)
        key_properties = metadata.get(mdata, (), 'table-key-properties')
        singer.write_schema(stream_name, stream['schema'], key_properties)

        LOGGER.info("%s: Starting sync", stream_name)
        counter_value = sync_stream(config, state, table_spec, stream)
        LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)

    LOGGER.info('Done syncing.') 
开发者ID:singer-io,项目名称:tap-s3-csv,代码行数:22,代码来源:__init__.py

示例3: sync_time_filtered

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_time_filtered(entity):
    bookmark_property = 'updated_at'

    singer.write_schema(entity,
                        utils.load_schema(entity),
                        ["id"],
                        bookmark_properties=[bookmark_property])
    start = get_start(entity)

    logger.info("Syncing {} from {}".format(entity, start))
    for row in gen_request(get_url(entity)):
        if row[bookmark_property] >= start:
            if 'custom_fields' in row:
                row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True)

            utils.update_state(STATE, entity, row[bookmark_property])
            singer.write_record(entity, row, time_extracted=singer.utils.now())

    singer.write_state(STATE) 
开发者ID:singer-io,项目名称:tap-freshdesk,代码行数:21,代码来源:__init__.py

示例4: output_schema

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def output_schema(stream):
    schema = load_schema(stream.tap_stream_id)
    singer.write_schema(stream.tap_stream_id, schema, stream.pk_fields) 
开发者ID:singer-io,项目名称:tap-jira,代码行数:5,代码来源:__init__.py

示例5: sync_contacts

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_contacts(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    bookmark_key = 'versionTimestamp'
    start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
    LOGGER.info("sync_contacts from %s", start)

    max_bk_value = start
    schema = load_schema("contacts")

    singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))

    url = get_url("contacts_all")

    vids = []
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
            modified_time = None
            if bookmark_key in row:
                modified_time = utils.strptime_with_tz(
                    _transform_datetime( # pylint: disable=protected-access
                        row[bookmark_key],
                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

            if not modified_time or modified_time >= start:
                vids.append(row['vid'])

            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if len(vids) == 100:
                _sync_contact_vids(catalog, vids, schema, bumble_bee)
                vids = []

        _sync_contact_vids(catalog, vids, schema, bumble_bee)

    STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:40,代码来源:__init__.py

示例6: sync_forms

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_forms(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    schema = load_schema("forms")
    bookmark_key = 'updatedAt'

    singer.write_schema("forms", schema, ["guid"], [bookmark_key], catalog.get('stream_alias'))
    start = get_start(STATE, "forms", bookmark_key)
    max_bk_value = start

    LOGGER.info("sync_forms from %s", start)

    data = request(get_url("forms")).json()
    time_extracted = utils.now()

    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in data:
            record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)

            if record[bookmark_key] >= start:
                singer.write_record("forms", record, catalog.get('stream_alias'), time_extracted=time_extracted)
            if record[bookmark_key] >= max_bk_value:
                max_bk_value = record[bookmark_key]

    STATE = singer.write_bookmark(STATE, 'forms', bookmark_key, max_bk_value)
    singer.write_state(STATE)

    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:30,代码来源:__init__.py

示例7: sync_workflows

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_workflows(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    schema = load_schema("workflows")
    bookmark_key = 'updatedAt'
    singer.write_schema("workflows", schema, ["id"], [bookmark_key], catalog.get('stream_alias'))
    start = get_start(STATE, "workflows", bookmark_key)
    max_bk_value = start

    STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
    singer.write_state(STATE)

    LOGGER.info("sync_workflows from %s", start)

    data = request(get_url("workflows")).json()
    time_extracted = utils.now()

    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in data['workflows']:
            record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
            if record[bookmark_key] >= start:
                singer.write_record("workflows", record, catalog.get('stream_alias'), time_extracted=time_extracted)
            if record[bookmark_key] >= max_bk_value:
                max_bk_value = record[bookmark_key]

    STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:30,代码来源:__init__.py

示例8: sync_owners

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_owners(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    schema = load_schema("owners")
    bookmark_key = 'updatedAt'

    singer.write_schema("owners", schema, ["ownerId"], [bookmark_key], catalog.get('stream_alias'))
    start = get_start(STATE, "owners", bookmark_key)
    max_bk_value = start

    LOGGER.info("sync_owners from %s", start)

    params = {}
    if CONFIG.get('include_inactives'):
        params['includeInactives'] = "true"
    data = request(get_url("owners"), params).json()

    time_extracted = utils.now()

    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in data:
            record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
            if record[bookmark_key] >= max_bk_value:
                max_bk_value = record[bookmark_key]

            if record[bookmark_key] >= start:
                singer.write_record("owners", record, catalog.get('stream_alias'), time_extracted=time_extracted)

    STATE = singer.write_bookmark(STATE, 'owners', bookmark_key, max_bk_value)
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:33,代码来源:__init__.py

示例9: sync_deal_pipelines

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_deal_pipelines(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    schema = load_schema('deal_pipelines')
    singer.write_schema('deal_pipelines', schema, ['pipelineId'], catalog.get('stream_alias'))
    LOGGER.info('sync_deal_pipelines')
    data = request(get_url('deal_pipelines')).json()
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in data:
            record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
            singer.write_record("deal_pipelines", record, catalog.get('stream_alias'), time_extracted=utils.now())
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:15,代码来源:__init__.py

示例10: do_sync

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def do_sync(account, catalog, state):
    streams_to_sync = get_streams_to_sync(account, catalog, state)
    refs = load_shared_schema_refs()
    for stream in streams_to_sync:
        LOGGER.info('Syncing %s, fields %s', stream.name, stream.fields())
        schema = singer.resolve_schema_references(load_schema(stream), refs)
        metadata_map = metadata.to_map(stream.catalog_entry.metadata)
        bookmark_key = BOOKMARK_KEYS.get(stream.name)
        singer.write_schema(stream.name, schema, stream.key_properties, bookmark_key, stream.stream_alias)

        # NB: The AdCreative stream is not an iterator
        if stream.name == 'adcreative':
            stream.sync()
            continue

        with Transformer(pre_hook=transform_date_hook) as transformer:
            with metrics.record_counter(stream.name) as counter:
                for message in stream:
                    if 'record' in message:
                        counter.increment()
                        time_extracted = utils.now()
                        record = transformer.transform(message['record'], schema, metadata=metadata_map)
                        singer.write_record(stream.name, record, stream.stream_alias, time_extracted)
                    elif 'state' in message:
                        singer.write_state(message['state'])
                    else:
                        raise TapFacebookException('Unrecognized message {}'.format(message)) 
开发者ID:singer-io,项目名称:tap-facebook,代码行数:29,代码来源:__init__.py

示例11: write_schema

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def write_schema(stream_name, schema, primary_keys, bookmark_properties=None):
    schema_copy = copy.deepcopy(schema)
    singer.write_schema(stream_name, schema_copy, primary_keys, bookmark_properties=bookmark_properties)

# No rate limit here, since this request is only made once
# per discovery (not sync) job 
开发者ID:singer-io,项目名称:tap-adwords,代码行数:8,代码来源:__init__.py

示例12: sync_tickets

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_tickets():
    bookmark_property = 'updated_at'

    singer.write_schema("tickets",
                        utils.load_schema("tickets"),
                        ["id"],
                        bookmark_properties=[bookmark_property])

    singer.write_schema("conversations",
                        utils.load_schema("conversations"),
                        ["id"],
                        bookmark_properties=[bookmark_property])

    singer.write_schema("satisfaction_ratings",
                        utils.load_schema("satisfaction_ratings"),
                        ["id"],
                        bookmark_properties=[bookmark_property])

    singer.write_schema("time_entries",
                        utils.load_schema("time_entries"),
                        ["id"],
                        bookmark_properties=[bookmark_property])

    sync_tickets_by_filter(bookmark_property)
    sync_tickets_by_filter(bookmark_property, "deleted")
    sync_tickets_by_filter(bookmark_property, "spam") 
开发者ID:singer-io,项目名称:tap-freshdesk,代码行数:28,代码来源:__init__.py

示例13: test_write_schema

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def test_write_schema(self):
        schema={'type': 'object',
                'properties': {
                    'name': {'type': 'string'}}}
        singer.write_schema("users", schema, ["name"]) 
开发者ID:singer-io,项目名称:singer-python,代码行数:7,代码来源:test_singer.py

示例14: sync

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync(config, state, catalog):
    """ Sync data from tap source """
    # Loop over selected streams in catalog
    for stream in catalog.get_selected_streams(state):
        LOGGER.info("Syncing stream:" + stream.tap_stream_id)

        bookmark_column = stream.replication_key
        is_sorted = True  # TODO: indicate whether data is sorted ascending on bookmark value

        singer.write_schema(
            stream_name=stream.tap_stream_id,
            schema=stream.schema,
            key_properties=stream.key_properties,
        )

        # TODO: delete and replace this inline function with your own data retrieval process:
        tap_data = lambda: [{"id": x, "name": "row${x}"} for x in range(1000)]

        max_bookmark = None
        for row in tap_data():
            # TODO: place type conversions or transformations here

            # write one or more rows to the stream:
            singer.write_records(stream.tap_stream_id, [row])
            if bookmark_column:
                if is_sorted:
                    # update bookmark to latest value
                    singer.write_state({stream.tap_stream_id: row[bookmark_column]})
                else:
                    # if data unsorted, save max value until end of writes
                    max_bookmark = max(max_bookmark, row[bookmark_column])
        if bookmark_column and not is_sorted:
            singer.write_state({stream.tap_stream_id: max_bookmark})
    return 
开发者ID:singer-io,项目名称:singer-tap-template,代码行数:36,代码来源:__init__.py

示例15: sync_companies

# 需要导入模块: import singer [as 别名]
# 或者: from singer import write_schema [as 别名]
def sync_companies(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    bumble_bee = Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
    bookmark_key = 'hs_lastmodifieddate'
    start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
    LOGGER.info("sync_companies from %s", start)
    schema = load_schema('companies')
    singer.write_schema("companies", schema, ["companyId"], [bookmark_key], catalog.get('stream_alias'))

    # Because this stream doesn't query by `lastUpdated`, it cycles
    # through the data set every time. The issue with this is that there
    # is a race condition by which records may be updated between the
    # start of this table's sync and the end, causing some updates to not
    # be captured, in order to combat this, we must store the current
    # sync's start in the state and not move the bookmark past this value.
    current_sync_start = get_current_sync_start(STATE, "companies") or utils.now()
    STATE = write_current_sync_start(STATE, "companies", current_sync_start)
    singer.write_state(STATE)

    url = get_url("companies_all")
    max_bk_value = start
    if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
        contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
        singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])

    with bumble_bee:
        for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
            row_properties = row['properties']
            modified_time = None
            if bookmark_key in row_properties:
                # Hubspot returns timestamps in millis
                timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
            elif 'createdate' in row_properties:
                # Hubspot returns timestamps in millis
                timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)

            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if not modified_time or modified_time >= start:
                record = request(get_url("companies_detail", company_id=row['companyId'])).json()
                record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
                singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
                if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
                    STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])

    # Don't bookmark past the start of this sync to account for updated records during the sync.
    new_bookmark = min(max_bk_value, current_sync_start)
    STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))
    STATE = write_current_sync_start(STATE, 'companies', None)
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:57,代码来源:__init__.py


注:本文中的singer.write_schema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。