当前位置: 首页>>代码示例>>Python>>正文


Python singer.get_bookmark方法代码示例

本文整理汇总了Python中singer.get_bookmark方法的典型用法代码示例。如果您正苦于以下问题:Python singer.get_bookmark方法的具体用法?Python singer.get_bookmark怎么用?Python singer.get_bookmark使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在singer的用法示例。


在下文中一共展示了singer.get_bookmark方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: binlog_stream_requires_historical

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def binlog_stream_requires_historical(catalog_entry, state):
    log_file = singer.get_bookmark(state,
                                   catalog_entry.tap_stream_id,
                                   'log_file')

    log_pos = singer.get_bookmark(state,
                                  catalog_entry.tap_stream_id,
                                  'log_pos')

    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    if (log_file and log_pos) and (not max_pk_values and not last_pk_fetched):
        return False

    return True 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:23,代码来源:__init__.py

示例2: build_state

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def build_state(raw_state, catalog):
    state = {}

    for catalog_entry in catalog['streams']:
        tap_stream_id = catalog_entry['tap_stream_id']
        catalog_metadata = metadata.to_map(catalog_entry['metadata'])
        replication_method = catalog_metadata.get((), {}).get('replication-method')

        version = singer.get_bookmark(raw_state,
                                      tap_stream_id,
                                      'version')

        # Preserve state that deals with resuming an incomplete bulk job
        if singer.get_bookmark(raw_state, tap_stream_id, 'JobID'):
            job_id = singer.get_bookmark(raw_state, tap_stream_id, 'JobID')
            batches = singer.get_bookmark(raw_state, tap_stream_id, 'BatchIDs')
            current_bookmark = singer.get_bookmark(raw_state, tap_stream_id, 'JobHighestBookmarkSeen')
            state = singer.write_bookmark(state, tap_stream_id, 'JobID', job_id)
            state = singer.write_bookmark(state, tap_stream_id, 'BatchIDs', batches)
            state = singer.write_bookmark(state, tap_stream_id, 'JobHighestBookmarkSeen', current_bookmark)

        if replication_method == 'INCREMENTAL':
            replication_key = catalog_metadata.get((), {}).get('replication-key')
            replication_key_value = singer.get_bookmark(raw_state,
                                                        tap_stream_id,
                                                        replication_key)
            if version is not None:
                state = singer.write_bookmark(
                    state, tap_stream_id, 'version', version)
            if replication_key_value is not None:
                state = singer.write_bookmark(
                    state, tap_stream_id, replication_key, replication_key_value)
        elif replication_method == 'FULL_TABLE' and version is None:
            state = singer.write_bookmark(state, tap_stream_id, 'version', version)

    return state

# pylint: disable=undefined-variable 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:40,代码来源:__init__.py

示例3: get_stream_version

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_stream_version(catalog_entry, state):
    tap_stream_id = catalog_entry['tap_stream_id']
    catalog_metadata = metadata.to_map(catalog_entry['metadata'])
    replication_key = catalog_metadata.get((), {}).get('replication-key')

    if singer.get_bookmark(state, tap_stream_id, 'version') is None:
        stream_version = int(time.time() * 1000)
    else:
        stream_version = singer.get_bookmark(state, tap_stream_id, 'version')

    if replication_key:
        return stream_version
    return int(time.time() * 1000) 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:15,代码来源:sync.py

示例4: get_start_date

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start_date(self, state, catalog_entry):
        catalog_metadata = metadata.to_map(catalog_entry['metadata'])
        replication_key = catalog_metadata.get((), {}).get('replication-key')

        return (singer.get_bookmark(state,
                                    catalog_entry['tap_stream_id'],
                                    replication_key) or self.default_start_date) 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:9,代码来源:__init__.py

示例5: get_start

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start(state, tap_stream_id, bookmark_key):
    current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
    if current_bookmark is None:
        return CONFIG['start_date']
    return current_bookmark 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:7,代码来源:__init__.py

示例6: get_current_sync_start

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_current_sync_start(state, tap_stream_id):
    current_sync_start_value = singer.get_bookmark(state, tap_stream_id, "current_sync_start")
    if current_sync_start_value is None:
        return current_sync_start_value
    return utils.strptime_to_utc(current_sync_start_value) 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:7,代码来源:__init__.py

示例7: get_start

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_start(stream, bookmark_key):
    tap_stream_id = stream.name
    state = stream.state or {}
    current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
    if current_bookmark is None:
        if isinstance(stream, IncrementalStream):
            return None
        else:
            LOGGER.info("no bookmark found for %s, using start_date instead...%s", tap_stream_id, CONFIG['start_date'])
            return pendulum.parse(CONFIG['start_date'])
    LOGGER.info("found current bookmark for %s:  %s", tap_stream_id, current_bookmark)
    return pendulum.parse(current_bookmark) 
开发者ID:singer-io,项目名称:tap-facebook,代码行数:14,代码来源:__init__.py

示例8: get_stream_version

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_stream_version(tap_stream_id, state):
    stream_version = singer.get_bookmark(state, tap_stream_id, 'version')

    if stream_version is None:
        stream_version = int(time.time() * 1000)

    return stream_version 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:9,代码来源:common.py

示例9: update_incremental_full_table_state

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def update_incremental_full_table_state(catalog_entry, state, cursor):
    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values') or get_max_pk_values(cursor, catalog_entry)


    if not max_pk_values:
        LOGGER.info("No max value for PK found for table {}".format(catalog_entry.table))
    else:
        state = singer.write_bookmark(state,
                                      catalog_entry.tap_stream_id,
                                      'max_pk_values',
                                      max_pk_values)

    return state 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:17,代码来源:full_table.py

示例10: sync_stream

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def sync_stream(config, state, table_spec, stream):
    table_name = table_spec['table_name']
    modified_since = utils.strptime_with_tz(singer.get_bookmark(state, table_name, 'modified_since') or
                                            config['start_date'])

    LOGGER.info('Syncing table "%s".', table_name)
    LOGGER.info('Getting files modified since %s.', modified_since)

    s3_files = s3.get_input_files_for_table(
        config, table_spec, modified_since)

    records_streamed = 0

    # We sort here so that tracking the modified_since bookmark makes
    # sense. This means that we can't sync s3 buckets that are larger than
    # we can sort in memory which is suboptimal. If we could bookmark
    # based on anything else then we could just sync files as we see them.
    for s3_file in sorted(s3_files, key=lambda item: item['last_modified']):
        records_streamed += sync_table_file(
            config, s3_file['key'], table_spec, stream)

        state = singer.write_bookmark(state, table_name, 'modified_since', s3_file['last_modified'].isoformat())
        singer.write_state(state)

    LOGGER.info('Wrote %s records for table "%s".', records_streamed, table_name)

    return records_streamed 
开发者ID:singer-io,项目名称:tap-s3-csv,代码行数:29,代码来源:sync.py

示例11: get_bookmark

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_bookmark(self):
        bookmark = (singer.get_bookmark(Context.state,
                                        # name is overridden by some substreams
                                        self.name,
                                        self.replication_key)
                    or Context.config["start_date"])
        return utils.strptime_with_tz(bookmark) 
开发者ID:singer-io,项目名称:tap-shopify,代码行数:9,代码来源:base.py

示例12: get_since_id

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def get_since_id(self):
        return singer.get_bookmark(Context.state,
                                   # name is overridden by some substreams
                                   self.name,
                                   'since_id') 
开发者ID:singer-io,项目名称:tap-shopify,代码行数:7,代码来源:base.py

示例13: resume_syncing_bulk_query

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter):
    bulk = Bulk(sf)
    current_bookmark = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen') or sf.get_start_date(state, catalog_entry)
    current_bookmark = singer_utils.strptime_with_tz(current_bookmark)
    batch_ids = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'BatchIDs')

    start_time = singer_utils.now()
    stream = catalog_entry['stream']
    stream_alias = catalog_entry.get('stream_alias')
    catalog_metadata = metadata.to_map(catalog_entry.get('metadata'))
    replication_key = catalog_metadata.get((), {}).get('replication-key')
    stream_version = get_stream_version(catalog_entry, state)
    schema = catalog_entry['schema']

    if not bulk.job_exists(job_id):
        LOGGER.info("Found stored Job ID that no longer exists, resetting bookmark and removing JobID from state.")
        return counter

    # Iterate over the remaining batches, removing them once they are synced
    for batch_id in batch_ids[:]:
        with Transformer(pre_hook=transform_bulk_data_hook) as transformer:
            for rec in bulk.get_batch_results(job_id, batch_id, catalog_entry):
                counter.increment()
                rec = transformer.transform(rec, schema)
                rec = fix_record_anytype(rec, schema)
                singer.write_message(
                    singer.RecordMessage(
                        stream=(
                            stream_alias or stream),
                        record=rec,
                        version=stream_version,
                        time_extracted=start_time))

                # Update bookmark if necessary
                replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key])
                if replication_key_value and replication_key_value <= start_time and replication_key_value > current_bookmark:
                    current_bookmark = singer_utils.strptime_with_tz(rec[replication_key])

        state = singer.write_bookmark(state,
                                      catalog_entry['tap_stream_id'],
                                      'JobHighestBookmarkSeen',
                                      singer_utils.strftime(current_bookmark))
        batch_ids.remove(batch_id)
        LOGGER.info("Finished syncing batch %s. Removing batch from state.", batch_id)
        LOGGER.info("Batches to go: %d", len(batch_ids))
        singer.write_state(state)

    return counter 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:50,代码来源:sync.py

示例14: generate_pk_clause

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)

    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    last_pk_clause = ''
    max_pk_comparisons = []


    if not max_pk_values:
        return ""

    if last_pk_fetched:
        for pk in key_properties:
            column_type = catalog_entry.schema.properties.get(pk).type

            # Add AND to interpolate along with max_pk_values clauses
            last_pk_clause = '({}) AND '.format(generate_pk_bookmark_clause(key_properties,
                                                                            last_pk_fetched,
                                                                            catalog_entry))
            max_pk_comparisons.append("{} <= {}".format(common.escape(pk),
                                                        quote_where_clause_value(max_pk_values[pk],
                                                                                 column_type)))
    else:
        for pk in key_properties:
            column_schema = catalog_entry.schema.properties.get(pk)
            column_type = column_schema.type

            pk_val = quote_where_clause_value(max_pk_values[pk],
                                              column_type)

            max_pk_comparisons.append("{} <= {}".format(common.escape(pk), pk_val))

    order_by_columns = [common.escape(c) for c in key_properties]
    sql = " WHERE {}{} ORDER BY {} ASC".format(last_pk_clause,
                                               " AND ".join(max_pk_comparisons),
                                               ", ".join(order_by_columns))

    return sql 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:47,代码来源:full_table.py

示例15: sync_table

# 需要导入模块: import singer [as 别名]
# 或者: from singer import get_bookmark [as 别名]
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(state,
                                                      catalog_entry.tap_stream_id,
                                                      'initial_full_table_complete')

    state_version = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream,
        version=stream_version
    )

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists and state_version is None):
        singer.write_message(activate_version_message)

    perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry)

    pk_clause = ""

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if perform_resumable_sync:
                LOGGER.info("Full table sync is resumable based on primary key definition, will replicate incrementally")

                state = update_incremental_full_table_state(catalog_entry, state, cur)
                pk_clause = generate_pk_clause(catalog_entry, state)

            select_sql += pk_clause
            params = {}

            common.sync_query(cur,
                              catalog_entry,
                              state,
                              select_sql,
                              columns,
                              stream_version,
                              params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched')

    singer.write_message(activate_version_message) 
开发者ID:singer-io,项目名称:tap-mysql,代码行数:56,代码来源:full_table.py


注:本文中的singer.get_bookmark方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。