本文整理汇总了Python中singer.utils.strftime方法的典型用法代码示例。如果您正苦于以下问题:Python utils.strftime方法的具体用法?Python utils.strftime怎么用?Python utils.strftime使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类singer.utils
的用法示例。
在下文中一共展示了utils.strftime方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUp
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def setUp(self):
self.schema = {
'type': 'object',
'properties': {
'id': {'type': 'integer'},
'color': {'type': 'string'}
}
}
self.colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
self.key_names = ['id']
self.bookmark_names = ['updated_at']
self.records = [{'id': i, 'color': color, 'updated_at': utils.strftime(utils.now())}
for i, color in enumerate(self.colors)]
self.messages = [RecordMessage(stream='colors', record=r) for r in self.records]
self.messages.append(ActivateVersionMessage(stream='colors', version=1))
示例2: to_utc_datetime_str
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def to_utc_datetime_str(val):
if isinstance(val, datetime.datetime):
the_datetime = val
elif isinstance(val, datetime.date):
the_datetime = datetime.datetime.combine(val, datetime.datetime.min.time())
elif isinstance(val, datetime.timedelta):
epoch = datetime.datetime.utcfromtimestamp(0)
the_datetime = epoch + val
else:
raise ValueError("{!r} is not a valid date or time type".format(val))
if the_datetime.tzinfo == None:
# The mysql-replication library creates naive date and datetime objects
# which will use the local timezone thus we must set tzinfo accordingly
# See: https://github.com/noplay/python-mysql-replication/blob/master/pymysqlreplication/row_event.py#L143-L145
#NB> this code will only work correctly when the local time is set to UTC because of the method timestamp()
the_datetime = datetime.datetime.fromtimestamp(the_datetime.timestamp(), pytz.timezone('UTC'))
return utils.strftime(the_datetime.astimezone(tz=pytz.UTC))
示例3: _serialize_datetime
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def _serialize_datetime(val):
"""
Serialize Bson and python datetime types
Args:
val: datetime value
Returns: serialized datetime value
"""
if isinstance(val, bson.datetime.datetime):
timezone = tzlocal.get_localzone()
try:
local_datetime = timezone.localize(val)
utc_datetime = local_datetime.astimezone(pytz.UTC)
except Exception as ex:
if str(ex) == 'year is out of range' and val.year == 0:
# NB: Since datetimes are persisted as strings, it doesn't
# make sense to blow up on invalid Python datetimes (e.g.,
# year=0). In this case we're formatting it as a string and
# passing it along down the pipeline.
return '{:04d}-{:02d}-{:02d}T{:02d}:{:02d}:{:02d}.{:06d}Z'.format(val.year,
val.month,
val.day,
val.hour,
val.minute,
val.second,
val.microsecond)
raise MongoDBInvalidDatetimeError('Found invalid datetime {}'.format(val))
return singer_strftime(utc_datetime)
if isinstance(val, datetime.datetime):
timezone = tzlocal.get_localzone()
local_datetime = timezone.localize(val)
utc_datetime = local_datetime.astimezone(pytz.UTC)
return singer_strftime(utc_datetime)
return None
示例4: default
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def default(self, o): # false positive complaint -> pylint: disable=E0202
"""
Custom function to serialize several sort of BSON and Python types
Args:
obj: Object to serialize
Returns: Serialized value
"""
encoding_map = {
bson.objectid.ObjectId: str,
uuid.UUID: str,
bson.int64.Int64: str,
bson.timestamp.Timestamp: lambda value: singer_strftime(value.as_datetime()),
bytes: lambda value: base64.b64encode(value).decode('utf-8'),
bson.decimal128.Decimal128: lambda val: val.to_decimal(),
bson.regex.Regex: lambda val: dict(pattern=val.pattern, flags=val.flags),
bson.code.Code: lambda val: dict(value=str(val), scope=str(val.scope)) if val.scope else str(val),
bson.dbref.DBRef: lambda val: dict(id=str(val.id), collection=val.collection, database=val.database),
datetime.datetime: self._serialize_datetime,
bson.datetime.datetime: self._serialize_datetime
}
if o.__class__ in encoding_map:
return encoding_map[o.__class__](o)
return super(MongoDBJsonEncoder, self).default(o)
示例5: set_bookmark
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def set_bookmark(cls, path, val):
if isinstance(val, datetime):
val = utils.strftime(val)
cls.bookmark(path[:-1])[path[-1]] = val
示例6: write_current_sync_start
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def write_current_sync_start(state, tap_stream_id, start):
value = start
if start is not None:
value = utils.strftime(start)
return singer.write_bookmark(state, tap_stream_id, "current_sync_start", value)
示例7: sync_contacts
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def sync_contacts(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
bookmark_key = 'versionTimestamp'
start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
LOGGER.info("sync_contacts from %s", start)
max_bk_value = start
schema = load_schema("contacts")
singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
url = get_url("contacts_all")
vids = []
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
modified_time = None
if bookmark_key in row:
modified_time = utils.strptime_with_tz(
_transform_datetime( # pylint: disable=protected-access
row[bookmark_key],
UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
if not modified_time or modified_time >= start:
vids.append(row['vid'])
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if len(vids) == 100:
_sync_contact_vids(catalog, vids, schema, bumble_bee)
vids = []
_sync_contact_vids(catalog, vids, schema, bumble_bee)
STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
示例8: record_to_bk_value
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def record_to_bk_value(self, stream, record):
# Deals and Companies records have been transformed so the bookmark
# is prefixed by "property_". There is a nest map structure beneath the value.
if stream == 'companies':
bk_value = record.get('property_hs_lastmodifieddate') or record.get('createdate')
if bk_value is None:
return None
return bk_value.get('value')
if stream == 'deals':
bk_value = record.get('property_hs_lastmodifieddate')
if bk_value is None:
return None
return bk_value.get('value')
else:
bk_columns = self.expected_bookmarks().get(stream, [])
if len(bk_columns) == 0:
return None
bk_column = bk_columns[0] #only consider first bookmark
bk_value = record.get(bk_column)
if not bk_value:
raise Exception("Record received without bookmark value for stream {}: {}".format(stream, record))
return utils.strftime(utils.strptime_with_tz(bk_value))
示例9: get_objects
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def get_objects(self):
since_id = 1
bookmark = self.get_bookmark()
max_bookmark = utils.strftime(utils.now())
while True:
query_params = {
"since_id": since_id,
"limit": RESULTS_PER_PAGE,
}
objects = self.call_api(query_params)
for obj in objects:
# Syncing Collects is a full sync every time but emitting
# records that have an updated_date greater than the
# bookmark
if not obj.updated_at and obj.id:
LOGGER.info('Collect with id: %d does not have an updated_at, syncing it!',
obj.id)
if not obj.updated_at or utils.strptime_with_tz(obj.updated_at) > bookmark:
if obj.id < since_id:
raise OutOfOrderIdsError("obj.id < since_id: {} < {}".format(
obj.id, since_id))
yield obj
if len(objects) < RESULTS_PER_PAGE:
# Update the bookmark at the end of the last page
self.update_bookmark(max_bookmark)
break
if objects[-1].id != max([o.id for o in objects]):
raise OutOfOrderIdsError("{} is not the max id in objects ({})".format(
objects[-1].id, max([o.id for o in objects])))
since_id = objects[-1].id
示例10: asdict
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def asdict(self):
result = {
'type': 'RECORD',
'stream': self.stream,
'record': self.record,
}
if self.version is not None:
result['version'] = self.version
if self.time_extracted:
as_utc = self.time_extracted.astimezone(pytz.utc)
result['time_extracted'] = u.strftime(as_utc)
return result
示例11: test_small_years
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def test_small_years(self):
self.assertEqual(u.strftime(dt(90, 1, 1, tzinfo=pytz.UTC)),
"0090-01-01T00:00:00.000000Z")
示例12: test_round_trip
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def test_round_trip(self):
now = dt.utcnow().replace(tzinfo=pytz.UTC)
dtime = u.strftime(now)
pdtime = u.strptime_to_utc(dtime)
fdtime = u.strftime(pdtime)
self.assertEqual(dtime, fdtime)
示例13: resume_syncing_bulk_query
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter):
bulk = Bulk(sf)
current_bookmark = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen') or sf.get_start_date(state, catalog_entry)
current_bookmark = singer_utils.strptime_with_tz(current_bookmark)
batch_ids = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'BatchIDs')
start_time = singer_utils.now()
stream = catalog_entry['stream']
stream_alias = catalog_entry.get('stream_alias')
catalog_metadata = metadata.to_map(catalog_entry.get('metadata'))
replication_key = catalog_metadata.get((), {}).get('replication-key')
stream_version = get_stream_version(catalog_entry, state)
schema = catalog_entry['schema']
if not bulk.job_exists(job_id):
LOGGER.info("Found stored Job ID that no longer exists, resetting bookmark and removing JobID from state.")
return counter
# Iterate over the remaining batches, removing them once they are synced
for batch_id in batch_ids[:]:
with Transformer(pre_hook=transform_bulk_data_hook) as transformer:
for rec in bulk.get_batch_results(job_id, batch_id, catalog_entry):
counter.increment()
rec = transformer.transform(rec, schema)
rec = fix_record_anytype(rec, schema)
singer.write_message(
singer.RecordMessage(
stream=(
stream_alias or stream),
record=rec,
version=stream_version,
time_extracted=start_time))
# Update bookmark if necessary
replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key])
if replication_key_value and replication_key_value <= start_time and replication_key_value > current_bookmark:
current_bookmark = singer_utils.strptime_with_tz(rec[replication_key])
state = singer.write_bookmark(state,
catalog_entry['tap_stream_id'],
'JobHighestBookmarkSeen',
singer_utils.strftime(current_bookmark))
batch_ids.remove(batch_id)
LOGGER.info("Finished syncing batch %s. Removing batch from state.", batch_id)
LOGGER.info("Batches to go: %d", len(batch_ids))
singer.write_state(state)
return counter
示例14: sync_companies
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def sync_companies(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
bumble_bee = Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
bookmark_key = 'hs_lastmodifieddate'
start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
LOGGER.info("sync_companies from %s", start)
schema = load_schema('companies')
singer.write_schema("companies", schema, ["companyId"], [bookmark_key], catalog.get('stream_alias'))
# Because this stream doesn't query by `lastUpdated`, it cycles
# through the data set every time. The issue with this is that there
# is a race condition by which records may be updated between the
# start of this table's sync and the end, causing some updates to not
# be captured, in order to combat this, we must store the current
# sync's start in the state and not move the bookmark past this value.
current_sync_start = get_current_sync_start(STATE, "companies") or utils.now()
STATE = write_current_sync_start(STATE, "companies", current_sync_start)
singer.write_state(STATE)
url = get_url("companies_all")
max_bk_value = start
if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])
with bumble_bee:
for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
row_properties = row['properties']
modified_time = None
if bookmark_key in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
elif 'createdate' in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if not modified_time or modified_time >= start:
record = request(get_url("companies_detail", company_id=row['companyId'])).json()
record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
# Don't bookmark past the start of this sync to account for updated records during the sync.
new_bookmark = min(max_bk_value, current_sync_start)
STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))
STATE = write_current_sync_start(STATE, 'companies', None)
singer.write_state(STATE)
return STATE
示例15: sync_deals
# 需要导入模块: from singer import utils [as 别名]
# 或者: from singer.utils import strftime [as 别名]
def sync_deals(STATE, ctx):
catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
mdata = metadata.to_map(catalog.get('metadata'))
bookmark_key = 'hs_lastmodifieddate'
start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
max_bk_value = start
LOGGER.info("sync_deals from %s", start)
most_recent_modified_time = start
params = {'count': 250,
'includeAssociations': False,
'properties' : []}
schema = load_schema("deals")
singer.write_schema("deals", schema, ["dealId"], [bookmark_key], catalog.get('stream_alias'))
# Check if we should include associations
for key in mdata.keys():
if 'associations' in key:
assoc_mdata = mdata.get(key)
if (assoc_mdata.get('selected') and assoc_mdata.get('selected') == True):
params['includeAssociations'] = True
if mdata.get(('properties', 'properties'), {}).get('selected') or has_selected_custom_field(mdata):
# On 2/12/20, hubspot added a lot of additional properties for
# deals, and appending all of them to requests ended up leading to
# 414 (url-too-long) errors. Hubspot recommended we use the
# `includeAllProperties` and `allpropertiesFetchMode` params
# instead.
params['includeAllProperties'] = True
params['allPropertiesFetchMode'] = 'latest_version'
url = get_url('deals_all')
with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
for row in gen_request(STATE, 'deals', url, params, 'deals', "hasMore", ["offset"], ["offset"]):
row_properties = row['properties']
modified_time = None
if bookmark_key in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
elif 'createdate' in row_properties:
# Hubspot returns timestamps in millis
timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
if modified_time and modified_time >= max_bk_value:
max_bk_value = modified_time
if not modified_time or modified_time >= start:
record = bumble_bee.transform(lift_properties_and_versions(row), schema, mdata)
singer.write_record("deals", record, catalog.get('stream_alias'), time_extracted=utils.now())
STATE = singer.write_bookmark(STATE, 'deals', bookmark_key, utils.strftime(max_bk_value))
singer.write_state(STATE)
return STATE
#NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id