本文整理汇总了Python中singer.utils.strptime_with_tz方法的典型用法代码示例。如果您正苦于以下问题:Python utils.strptime_with_tz方法的具体用法?Python utils.strptime_with_tz怎么用?Python utils.strptime_with_tz使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer.utils
的用法示例。
在下文中一共展示了utils.strptime_with_tz方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sync_contacts
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_contacts(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
bookmark_key = 'versionTimestamp'
start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
LOGGER.info("sync_contacts from %s", start)
max_bk_value = start
schema = load_schema("contacts")
singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
url = get_url("contacts_all")
vids = []
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
modified_time = None
if bookmark_key in row:
modified_time = utils.strptime_with_tz(
_transform_datetime( # pylint: disable=protected-access
row[bookmark_key],
UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
if not modified_time or modified_time >= start:
vids.append(row['vid'])
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if len(vids) == 100:
_sync_contact_vids(catalog, vids, schema, bumble_bee)
vids = []
_sync_contact_vids(catalog, vids, schema, bumble_bee)
STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
示例2: record_to_bk_value
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def record_to_bk_value(self, stream, record):
# Deals and Companies records have been transformed so the bookmark
# is prefixed by "property_". There is a nest map structure beneath the value.
if stream == 'companies':
bk_value = record.get('property_hs_lastmodifieddate') or record.get('createdate')
if bk_value is None:
return None
return bk_value.get('value')
if stream == 'deals':
bk_value = record.get('property_hs_lastmodifieddate')
if bk_value is None:
return None
return bk_value.get('value')
else:
bk_columns = self.expected_bookmarks().get(stream, [])
if len(bk_columns) == 0:
return None
bk_column = bk_columns[0] #only consider first bookmark
bk_value = record.get(bk_column)
if not bk_value:
raise Exception("Record received without bookmark value for stream {}: {}".format(stream, record))
return utils.strftime(utils.strptime_with_tz(bk_value))
示例3: get_attribution_window_bookmark
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_attribution_window_bookmark(customer_id, stream_name):
mid_bk_value = bookmarks.get_bookmark(STATE,
state_key_name(customer_id, stream_name),
'last_attribution_window_date')
return utils.strptime_with_tz(mid_bk_value) if mid_bk_value else None
示例4: get_start_for_stream
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_start_for_stream(customer_id, stream_name):
bk_value = bookmarks.get_bookmark(STATE,
state_key_name(customer_id, stream_name),
'date')
bk_start_date = utils.strptime_with_tz(bk_value or CONFIG['start_date'])
return bk_start_date
示例5: get_end_date
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_end_date():
if CONFIG.get('end_date'):
return utils.strptime_with_tz(CONFIG.get('end_date'))
return utils.now()
示例6: sync_stream
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_stream(config, state, table_spec, stream):
table_name = table_spec['table_name']
modified_since = utils.strptime_with_tz(singer.get_bookmark(state, table_name, 'modified_since') or
config['start_date'])
LOGGER.info('Syncing table "%s".', table_name)
LOGGER.info('Getting files modified since %s.', modified_since)
s3_files = s3.get_input_files_for_table(
config, table_spec, modified_since)
records_streamed = 0
# We sort here so that tracking the modified_since bookmark makes
# sense. This means that we can't sync s3 buckets that are larger than
# we can sort in memory which is suboptimal. If we could bookmark
# based on anything else then we could just sync files as we see them.
for s3_file in sorted(s3_files, key=lambda item: item['last_modified']):
records_streamed += sync_table_file(
config, s3_file['key'], table_spec, stream)
state = singer.write_bookmark(state, table_name, 'modified_since', s3_file['last_modified'].isoformat())
singer.write_state(state)
LOGGER.info('Wrote %s records for table "%s".', records_streamed, table_name)
return records_streamed
示例7: get_bookmark
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_bookmark(self):
bookmark = (singer.get_bookmark(Context.state,
# name is overridden by some substreams
self.name,
self.replication_key)
or Context.config["start_date"])
return utils.strptime_with_tz(bookmark)
示例8: get_objects
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def get_objects(self):
since_id = 1
bookmark = self.get_bookmark()
max_bookmark = utils.strftime(utils.now())
while True:
query_params = {
"since_id": since_id,
"limit": RESULTS_PER_PAGE,
}
objects = self.call_api(query_params)
for obj in objects:
# Syncing Collects is a full sync every time but emitting
# records that have an updated_date greater than the
# bookmark
if not obj.updated_at and obj.id:
LOGGER.info('Collect with id: %d does not have an updated_at, syncing it!',
obj.id)
if not obj.updated_at or utils.strptime_with_tz(obj.updated_at) > bookmark:
if obj.id < since_id:
raise OutOfOrderIdsError("obj.id < since_id: {} < {}".format(
obj.id, since_id))
yield obj
if len(objects) < RESULTS_PER_PAGE:
# Update the bookmark at the end of the last page
self.update_bookmark(max_bookmark)
break
if objects[-1].id != max([o.id for o in objects]):
raise OutOfOrderIdsError("{} is not the max id in objects ({})".format(
objects[-1].id, max([o.id for o in objects])))
since_id = objects[-1].id
示例9: resume_syncing_bulk_query
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter):
bulk = Bulk(sf)
current_bookmark = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen') or sf.get_start_date(state, catalog_entry)
current_bookmark = singer_utils.strptime_with_tz(current_bookmark)
batch_ids = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'BatchIDs')
start_time = singer_utils.now()
stream = catalog_entry['stream']
stream_alias = catalog_entry.get('stream_alias')
catalog_metadata = metadata.to_map(catalog_entry.get('metadata'))
replication_key = catalog_metadata.get((), {}).get('replication-key')
stream_version = get_stream_version(catalog_entry, state)
schema = catalog_entry['schema']
if not bulk.job_exists(job_id):
LOGGER.info("Found stored Job ID that no longer exists, resetting bookmark and removing JobID from state.")
return counter
# Iterate over the remaining batches, removing them once they are synced
for batch_id in batch_ids[:]:
with Transformer(pre_hook=transform_bulk_data_hook) as transformer:
for rec in bulk.get_batch_results(job_id, batch_id, catalog_entry):
counter.increment()
rec = transformer.transform(rec, schema)
rec = fix_record_anytype(rec, schema)
singer.write_message(
singer.RecordMessage(
stream=(
stream_alias or stream),
record=rec,
version=stream_version,
time_extracted=start_time))
# Update bookmark if necessary
replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key])
if replication_key_value and replication_key_value <= start_time and replication_key_value > current_bookmark:
current_bookmark = singer_utils.strptime_with_tz(rec[replication_key])
state = singer.write_bookmark(state,
catalog_entry['tap_stream_id'],
'JobHighestBookmarkSeen',
singer_utils.strftime(current_bookmark))
batch_ids.remove(batch_id)
LOGGER.info("Finished syncing batch %s. Removing batch from state.", batch_id)
LOGGER.info("Batches to go: %d", len(batch_ids))
singer.write_state(state)
return counter
示例10: copy_table
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def copy_table(self, table_name: str, file_path: str) -> None:
"""
Copies data from all csv files that match the search_pattern and into the csv file in file_path
:param table_name: Name of the table
:param file_path: Path of the gzip compressed csv file into which data is copied
:return: None
"""
if not re.match(r'^.+\.csv\.gz$', file_path):
raise Exception(f'Invalid file path: {file_path}')
# find the specs of the table: search_pattern, key_properties ... etc
table_spec = self._find_table_spec_by_name(table_name)
# extract the start_date from the specs
modified_since = strptime_with_tz(self.connection_config['start_date'])
# get all the files in the bucket that match the criteria and were modified after start date
s3_files = S3Helper.get_input_files_for_table(self.connection_config, table_spec, modified_since)
# variable to hold all the records from all matching files
records = []
# variable to hold the set of column names from all matching files
headers = set()
# given that there might be several files matching the search pattern
# we want to keep the most recent date one of them was modified to use it as state bookmark
max_last_modified = None
for s3_file in s3_files:
# this function will add records to the `records` list passed to it and add to the `headers` set as well
self._get_file_records(s3_file['key'], table_spec, records, headers)
# check if the current file has the most recent modification date
if max_last_modified is None or max_last_modified < s3_file['last_modified']:
max_last_modified = s3_file['last_modified']
# add the found last modified date to the dictionary
self.tables_last_modified[table_name] = max_last_modified
# write to the given compressed csv file
with gzip.open(file_path, 'wt') as gzfile:
writer = csv.DictWriter(gzfile,
fieldnames=sorted(list(headers)),
# we need to sort the headers so that copying into snowflake works
delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
# write the header
writer.writeheader()
# write all records at once
writer.writerows(records)
# pylint: disable=too-many-locals
示例11: sync_deals
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_deals(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
bookmark_key = 'hs_lastmodifieddate'
start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
max_bk_value = start
LOGGER.info("sync_deals from %s", start)
most_recent_modified_time = start
params = {'count': 250,
'includeAssociations': False,
'properties' : []}
schema = load_schema("deals")
singer.write_schema("deals", schema, ["dealId"], [bookmark_key], catalog.get('stream_alias'))
# Check if we should include associations
for key in mdata.keys():
if 'associations' in key:
assoc_mdata = mdata.get(key)
if (assoc_mdata.get('selected') and assoc_mdata.get('selected') == True):
params['includeAssociations'] = True
if mdata.get(('properties', 'properties'), {}).get('selected') or has_selected_custom_field(mdata):
# On 2/12/20, hubspot added a lot of additional properties for
# deals, and appending all of them to requests ended up leading to
# 414 (url-too-long) errors. Hubspot recommended we use the
# `includeAllProperties` and `allpropertiesFetchMode` params
# instead.
params['includeAllProperties'] = True
params['allPropertiesFetchMode'] = 'latest_version'
url = get_url('deals_all')
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'deals', url, params, 'deals', "hasMore", ["offset"], ["offset"]):
row_properties = row['properties']
modified_time = None
if bookmark_key in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
elif 'createdate' in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if not modified_time or modified_time >= start:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
singer.write_record("deals", record, catalog.get('stream_alias'), time_extracted=utils.now())
STATE = singer.write_bookmark(STATE, 'deals', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
#NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id
示例12: sync_entity_chunked
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strptime_with_tz [as 别名]
def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
schema = load_schema(entity_name)
bookmark_key = 'startTimestamp'
singer.write_schema(entity_name, schema, key_properties, [bookmark_key], catalog.get('stream_alias'))
start = get_start(STATE, entity_name, bookmark_key)
LOGGER.info("sync_%s from %s", entity_name, start)
now = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
now_ts = int(now.timestamp() * 1000)
start_ts = int(utils.strptime_with_tz(start).timestamp() * 1000)
url = get_url(entity_name)
mdata = metadata.to_map(catalog.get('metadata'))
if entity_name == 'email_events':
window_size = int(CONFIG['email_chunk_size'])
elif entity_name == 'subscription_changes':
window_size = int(CONFIG['subscription_chunk_size'])
with metrics.record_counter(entity_name) as counter:
while start_ts < now_ts:
end_ts = start_ts + window_size
params = {
'startTimestamp': start_ts,
'endTimestamp': end_ts,
'limit': 1000,
}
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
while True:
our_offset = singer.get_offset(STATE, entity_name)
if bool(our_offset) and our_offset.get('offset') != None:
params[StateFields.offset] = our_offset.get('offset')
data = request(url, params).json()
time_extracted = utils.now()
for row in data[path]:
counter.increment()
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
singer.write_record(entity_name,
record,
catalog.get('stream_alias'),
time_extracted=time_extracted)
if data.get('hasMore'):
STATE = singer.set_offset(STATE, entity_name, 'offset', data['offset'])
singer.write_state(STATE)
else:
STATE = singer.clear_offset(STATE, entity_name)
singer.write_state(STATE)
break
STATE = singer.write_bookmark(STATE, entity_name, 'startTimestamp', utils.strftime(datetime.datetime.fromtimestamp((start_ts / 1000), datetime.timezone.utc ))) # pylint: disable=line-too-long
singer.write_state(STATE)
start_ts = end_ts
STATE = singer.clear_offset(STATE, entity_name)
singer.write_state(STATE)
return STATE