当前位置: 首页>>代码示例>>Python>>正文


Python utils.strptime_with_tz方法代码示例

本文整理汇总了Python中singer.utils.strptime_with_tz方法的典型用法代码示例。如果您正苦于以下问题:Python utils.strptime_with_tz方法的具体用法?Python utils.strptime_with_tz怎么用?Python utils.strptime_with_tz使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在singer.utils的用法示例。


在下文中一共展示了utils.strptime_with_tz方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sync_contacts

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_contacts(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    bookmark_key = 'versionTimestamp'
    start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
    LOGGER.info("sync_contacts from %s", start)

    max_bk_value = start
    schema = load_schema("contacts")

    singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))

    url = get_url("contacts_all")

    vids = []
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
            modified_time = None
            if bookmark_key in row:
                modified_time = utils.strptime_with_tz(
                    _transform_datetime( # pylint: disable=protected-access
                        row[bookmark_key],
                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

            if not modified_time or modified_time >= start:
                vids.append(row['vid'])

            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if len(vids) == 100:
                _sync_contact_vids(catalog, vids, schema, bumble_bee)
                vids = []

        _sync_contact_vids(catalog, vids, schema, bumble_bee)

    STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:40,代码来源:__init__.py

示例2: record_to_bk_value

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def record_to_bk_value(self, stream, record):
        # Deals and Companies records have been transformed so the bookmark
        # is prefixed by "property_". There is a nest map structure beneath the value.

        if stream == 'companies':
            bk_value = record.get('property_hs_lastmodifieddate') or record.get('createdate')
            if bk_value is None:
                return None
            return bk_value.get('value')

        if stream == 'deals':
            bk_value = record.get('property_hs_lastmodifieddate')
            if bk_value is None:
                return None
            return bk_value.get('value')
        else:
            bk_columns = self.expected_bookmarks().get(stream, [])
            if len(bk_columns) == 0:
                return None

            bk_column = bk_columns[0] #only consider first bookmark

            bk_value = record.get(bk_column)
            if not bk_value:
                raise Exception("Record received without bookmark value for stream {}: {}".format(stream, record))
            return utils.strftime(utils.strptime_with_tz(bk_value)) 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:28,代码来源:test_hubspot_bookmarks1.py

示例3: get_attribution_window_bookmark

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_attribution_window_bookmark(customer_id, stream_name):
    mid_bk_value = bookmarks.get_bookmark(STATE,
                                          state_key_name(customer_id, stream_name),
                                          'last_attribution_window_date')
    return utils.strptime_with_tz(mid_bk_value) if mid_bk_value else None 
开发者ID:singer-io,项目名称:tap-adwords,代码行数:7,代码来源:__init__.py

示例4: get_start_for_stream

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_start_for_stream(customer_id, stream_name):
    bk_value = bookmarks.get_bookmark(STATE,
                                      state_key_name(customer_id, stream_name),
                                      'date')
    bk_start_date = utils.strptime_with_tz(bk_value or CONFIG['start_date'])
    return bk_start_date 
开发者ID:singer-io,项目名称:tap-adwords,代码行数:8,代码来源:__init__.py

示例5: get_end_date

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_end_date():
    if CONFIG.get('end_date'):
        return utils.strptime_with_tz(CONFIG.get('end_date'))

    return utils.now() 
开发者ID:singer-io,项目名称:tap-adwords,代码行数:7,代码来源:__init__.py

示例6: sync_stream

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_stream(config, state, table_spec, stream):
    table_name = table_spec['table_name']
    modified_since = utils.strptime_with_tz(singer.get_bookmark(state, table_name, 'modified_since') or
                                            config['start_date'])

    LOGGER.info('Syncing table "%s".', table_name)
    LOGGER.info('Getting files modified since %s.', modified_since)

    s3_files = s3.get_input_files_for_table(
        config, table_spec, modified_since)

    records_streamed = 0

    # We sort here so that tracking the modified_since bookmark makes
    # sense. This means that we can't sync s3 buckets that are larger than
    # we can sort in memory which is suboptimal. If we could bookmark
    # based on anything else then we could just sync files as we see them.
    for s3_file in sorted(s3_files, key=lambda item: item['last_modified']):
        records_streamed += sync_table_file(
            config, s3_file['key'], table_spec, stream)

        state = singer.write_bookmark(state, table_name, 'modified_since', s3_file['last_modified'].isoformat())
        singer.write_state(state)

    LOGGER.info('Wrote %s records for table "%s".', records_streamed, table_name)

    return records_streamed 
开发者ID:singer-io,项目名称:tap-s3-csv,代码行数:29,代码来源:sync.py

示例7: get_bookmark

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_bookmark(self):
        bookmark = (singer.get_bookmark(Context.state,
                                        # name is overridden by some substreams
                                        self.name,
                                        self.replication_key)
                    or Context.config["start_date"])
        return utils.strptime_with_tz(bookmark) 
开发者ID:singer-io,项目名称:tap-shopify,代码行数:9,代码来源:base.py

示例8: get_objects

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_objects(self):
        since_id = 1
        bookmark = self.get_bookmark()
        max_bookmark = utils.strftime(utils.now())
        while True:
            query_params = {
                "since_id": since_id,
                "limit": RESULTS_PER_PAGE,
            }

            objects = self.call_api(query_params)

            for obj in objects:
                # Syncing Collects is a full sync every time but emitting
                # records that have an updated_date greater than the
                # bookmark
                if not obj.updated_at and obj.id:
                    LOGGER.info('Collect with id: %d does not have an updated_at, syncing it!',
                                obj.id)
                if not obj.updated_at or utils.strptime_with_tz(obj.updated_at) > bookmark:
                    if obj.id < since_id:
                        raise OutOfOrderIdsError("obj.id < since_id: {} < {}".format(
                            obj.id, since_id))
                    yield obj

            if len(objects) < RESULTS_PER_PAGE:
                # Update the bookmark at the end of the last page
                self.update_bookmark(max_bookmark)
                break
            if objects[-1].id != max([o.id for o in objects]):
                raise OutOfOrderIdsError("{} is not the max id in objects ({})".format(
                    objects[-1].id, max([o.id for o in objects])))
            since_id = objects[-1].id 
开发者ID:singer-io,项目名称:tap-shopify,代码行数:35,代码来源:collects.py

示例9: resume_syncing_bulk_query

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter):
    bulk = Bulk(sf)
    current_bookmark = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen') or sf.get_start_date(state, catalog_entry)
    current_bookmark = singer_utils.strptime_with_tz(current_bookmark)
    batch_ids = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'BatchIDs')

    start_time = singer_utils.now()
    stream = catalog_entry['stream']
    stream_alias = catalog_entry.get('stream_alias')
    catalog_metadata = metadata.to_map(catalog_entry.get('metadata'))
    replication_key = catalog_metadata.get((), {}).get('replication-key')
    stream_version = get_stream_version(catalog_entry, state)
    schema = catalog_entry['schema']

    if not bulk.job_exists(job_id):
        LOGGER.info("Found stored Job ID that no longer exists, resetting bookmark and removing JobID from state.")
        return counter

    # Iterate over the remaining batches, removing them once they are synced
    for batch_id in batch_ids[:]:
        with Transformer(pre_hook=transform_bulk_data_hook) as transformer:
            for rec in bulk.get_batch_results(job_id, batch_id, catalog_entry):
                counter.increment()
                rec = transformer.transform(rec, schema)
                rec = fix_record_anytype(rec, schema)
                singer.write_message(
                    singer.RecordMessage(
                        stream=(
                            stream_alias or stream),
                        record=rec,
                        version=stream_version,
                        time_extracted=start_time))

                # Update bookmark if necessary
                replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key])
                if replication_key_value and replication_key_value <= start_time and replication_key_value > current_bookmark:
                    current_bookmark = singer_utils.strptime_with_tz(rec[replication_key])

        state = singer.write_bookmark(state,
                                      catalog_entry['tap_stream_id'],
                                      'JobHighestBookmarkSeen',
                                      singer_utils.strftime(current_bookmark))
        batch_ids.remove(batch_id)
        LOGGER.info("Finished syncing batch %s. Removing batch from state.", batch_id)
        LOGGER.info("Batches to go: %d", len(batch_ids))
        singer.write_state(state)

    return counter 
开发者ID:singer-io,项目名称:tap-salesforce,代码行数:50,代码来源:sync.py

示例10: copy_table

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def copy_table(self, table_name: str, file_path: str) -> None:
        """
        Copies data from all csv files that match the search_pattern and into the csv file in file_path
        :param table_name: Name of the table
        :param file_path: Path of the gzip compressed csv file into which data is copied
        :return: None
        """
        if not re.match(r'^.+\.csv\.gz$', file_path):
            raise Exception(f'Invalid file path: {file_path}')

        # find the specs of the table: search_pattern, key_properties ... etc
        table_spec = self._find_table_spec_by_name(table_name)

        # extract the start_date from the specs
        modified_since = strptime_with_tz(self.connection_config['start_date'])

        # get all the files in the bucket that match the criteria and were modified after start date
        s3_files = S3Helper.get_input_files_for_table(self.connection_config, table_spec, modified_since)

        # variable to hold all the records from all matching files
        records = []
        # variable to hold the set of column names from all matching files
        headers = set()

        # given that there might be several files matching the search pattern
        # we want to keep the most recent date one of them was modified to use it as state bookmark
        max_last_modified = None

        for s3_file in s3_files:

            # this function will add records to the `records` list passed to it and add to the `headers` set as well
            self._get_file_records(s3_file['key'], table_spec, records, headers)

            # check if the current file has the most recent modification date
            if max_last_modified is None or max_last_modified < s3_file['last_modified']:
                max_last_modified = s3_file['last_modified']

        # add the found last modified date to the dictionary
        self.tables_last_modified[table_name] = max_last_modified

        # write to the given compressed csv file
        with gzip.open(file_path, 'wt') as gzfile:

            writer = csv.DictWriter(gzfile,
                                    fieldnames=sorted(list(headers)),
                                    # we need to sort the headers so that copying into snowflake works
                                    delimiter=',',
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)
            # write the header
            writer.writeheader()
            # write all records at once
            writer.writerows(records)

    # pylint: disable=too-many-locals 
开发者ID:transferwise,项目名称:pipelinewise,代码行数:57,代码来源:tap_s3_csv.py

示例11: sync_deals

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_deals(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get('metadata'))
    bookmark_key = 'hs_lastmodifieddate'
    start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
    max_bk_value = start
    LOGGER.info("sync_deals from %s", start)
    most_recent_modified_time = start
    params = {'count': 250,
              'includeAssociations': False,
              'properties' : []}

    schema = load_schema("deals")
    singer.write_schema("deals", schema, ["dealId"], [bookmark_key], catalog.get('stream_alias'))

    # Check if we should  include associations
    for key in mdata.keys():
        if 'associations' in key:
            assoc_mdata = mdata.get(key)
            if (assoc_mdata.get('selected') and assoc_mdata.get('selected') == True):
                params['includeAssociations'] = True

    if mdata.get(('properties', 'properties'), {}).get('selected') or has_selected_custom_field(mdata):
        # On 2/12/20, hubspot added a lot of additional properties for
        # deals, and appending all of them to requests ended up leading to
        # 414 (url-too-long) errors. Hubspot recommended we use the
        # `includeAllProperties` and `allpropertiesFetchMode` params
        # instead.
        params['includeAllProperties'] = True
        params['allPropertiesFetchMode'] = 'latest_version'

    url = get_url('deals_all')
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(STATE, 'deals', url, params, 'deals', "hasMore", ["offset"], ["offset"]):
            row_properties = row['properties']
            modified_time = None
            if bookmark_key in row_properties:
                # Hubspot returns timestamps in millis
                timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
            elif 'createdate' in row_properties:
                # Hubspot returns timestamps in millis
                timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if not modified_time or modified_time >= start:
                record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
                singer.write_record("deals", record, catalog.get('stream_alias'), time_extracted=utils.now())

    STATE = singer.write_bookmark(STATE, 'deals', bookmark_key, utils.strftime(max_bk_value))
    singer.write_state(STATE)
    return STATE

#NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:58,代码来源:__init__.py

示例12: sync_entity_chunked

# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
    schema = load_schema(entity_name)
    bookmark_key = 'startTimestamp'

    singer.write_schema(entity_name, schema, key_properties, [bookmark_key], catalog.get('stream_alias'))

    start = get_start(STATE, entity_name, bookmark_key)
    LOGGER.info("sync_%s from %s", entity_name, start)

    now = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
    now_ts = int(now.timestamp() * 1000)

    start_ts = int(utils.strptime_with_tz(start).timestamp() * 1000)
    url = get_url(entity_name)

    mdata = metadata.to_map(catalog.get('metadata'))

    if entity_name == 'email_events':
        window_size = int(CONFIG['email_chunk_size'])
    elif entity_name == 'subscription_changes':
        window_size = int(CONFIG['subscription_chunk_size'])

    with metrics.record_counter(entity_name) as counter:
        while start_ts < now_ts:
            end_ts = start_ts + window_size
            params = {
                'startTimestamp': start_ts,
                'endTimestamp': end_ts,
                'limit': 1000,
            }
            with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
                while True:
                    our_offset = singer.get_offset(STATE, entity_name)
                    if bool(our_offset) and our_offset.get('offset') != None:
                        params[StateFields.offset] = our_offset.get('offset')

                    data = request(url, params).json()
                    time_extracted = utils.now()

                    for row in data[path]:
                        counter.increment()
                        record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
                        singer.write_record(entity_name,
                                            record,
                                            catalog.get('stream_alias'),
                                            time_extracted=time_extracted)
                    if data.get('hasMore'):
                        STATE = singer.set_offset(STATE, entity_name, 'offset', data['offset'])
                        singer.write_state(STATE)
                    else:
                        STATE = singer.clear_offset(STATE, entity_name)
                        singer.write_state(STATE)
                        break
            STATE = singer.write_bookmark(STATE, entity_name, 'startTimestamp', utils.strftime(datetime.datetime.fromtimestamp((start_ts / 1000), datetime.timezone.utc ))) # pylint: disable=line-too-long
            singer.write_state(STATE)
            start_ts = end_ts

    STATE = singer.clear_offset(STATE, entity_name)
    singer.write_state(STATE)
    return STATE 
开发者ID:singer-io,项目名称:tap-hubspot,代码行数:62,代码来源:__init__.py


注:本文中的singer.utils.strptime_with_tz方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。