本文整理汇总了Python中DbUtil类的典型用法代码示例。如果您正苦于以下问题:Python DbUtil类的具体用法?Python DbUtil怎么用?Python DbUtil使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DbUtil类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: update_flow_jobs
def update_flow_jobs(self, last_unixtime=None):
if last_unixtime:
flow_sql = """
SELECT a.app_code, f.* FROM flow f JOIN cfg_application a on f.app_id = a.app_id
WHERE from_unixtime(modified_time) >= DATE_SUB(from_unixtime(%f), INTERVAL 1 HOUR)
""" % last_unixtime
else:
flow_sql = """
SELECT a.app_code, f.* FROM flow f JOIN cfg_application a on f.app_id = a.app_id
"""
job_sql = """
SELECT * FROM flow_job WHERE app_id = %d and flow_id = %d
"""
url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/wherehows/flow_jobs/_bulk'
params = []
self.wh_cursor.execute(flow_sql)
rows = DbUtil.copy_dict_cursor(self.wh_cursor)
row_count = 1
for row in rows:
self.wh_cursor.execute(job_sql %(long(row['app_id']), long(row['flow_id'])))
jobs = []
job_rows = DbUtil.copy_dict_cursor(self.wh_cursor)
if job_rows:
for job_row in job_rows:
jobs.append({"app_id": job_row['app_id'], "flow_id": job_row['flow_id'], "job_id": job_row['job_id'],
"job_name": job_row['job_name'] if job_row['job_name'] else '',
"job_path": job_row['job_path'] if job_row['job_path'] else '',
"job_type_id": job_row['job_type_id'],
"job_type": job_row['job_type'] if job_row['job_type'] else '',
"pre_jobs": job_row['pre_jobs'] if job_row['pre_jobs'] else '',
"post_jobs": job_row['post_jobs'] if job_row['post_jobs'] else '',
"is_current": job_row['is_current'] if job_row['is_current'] else '',
"is_first": job_row['is_first'] if job_row['is_first'] else '',
"is_last": job_row['is_last'] if job_row['is_last'] else ''})
params.append('{ "index": { "_id": ' + str(long(row['flow_id'])*10000 + long(row['app_id'])) + ' }}')
if len(jobs) > 0:
params.append(
"""{"app_id": %d, "flow_id": %d, "app_code": "%s", "flow_name": "%s", "flow_group": "%s", "flow_path": "%s", "flow_level": %d, "is_active": "%s", "is_scheduled": "%s", "pre_flows": "%s", "jobs": %s}"""
% (row['app_id'], row['flow_id'], row['app_code'] if row['app_code'] else '',
row['flow_name'] if row['flow_name'] else '', row['flow_group'] if row['flow_group'] else '',
row['flow_path'] if row['flow_path'] else '', row['flow_level'],
row['is_active'] if row['is_active'] else '', row['is_scheduled'] if row['is_scheduled'] else '',
row['pre_flows'] if row['pre_flows'] else '', json.dumps(jobs)))
else:
params.append(
"""{"app_id": %d, "flow_id": %d, "app_code": "%s", "flow_name": "%s", "flow_group": "%s", "flow_path": "%s", "flow_level": %d, "is_active": "%s", "is_scheduled": "%s", "pre_flows": "%s", "jobs": ""}"""
% (row['app_id'], row['flow_id'], row['app_code'] if row['app_code'] else '',
row['flow_name'] if row['flow_name'] else '', row['flow_group'] if row['flow_group'] else '',
row['flow_path'] if row['flow_path'] else '', row['flow_level'],
row['is_active'] if row['is_active'] else '', row['is_scheduled'] if row['is_scheduled'] else '',
row['pre_flows'] if row['pre_flows'] else ''))
if row_count % 1000 == 0:
self.bulk_insert(params, url)
self.logger.info('flow jobs' + str(row_count))
params = []
row_count += 1
if len(params) > 0:
self.logger.info('flow_jobs' + str(len(params)))
self.bulk_insert(params, url)
示例2: collect_flow_jobs
def collect_flow_jobs(self, flow_file, job_file, dag_file):
self.logger.info("collect flow&jobs")
flow_writer = FileWriter(flow_file)
job_writer = FileWriter(job_file)
dag_writer = FileWriter(dag_file)
query = """
SELECT a.*, b.created_time FROM
(SELECT w.app_name, w.app_path, max(w.id) as source_version, max(unix_timestamp(w.last_modified_time)) as last_modified_time
from WF_JOBS w LEFT JOIN WF_JOBS s
ON w.app_path = s.app_path AND w.created_time < s.created_time
WHERE s.created_time IS NULL GROUP BY w.app_name, w.app_path) a
JOIN
(SELECT app_path, min(unix_timestamp(created_time)) as created_time FROM WF_JOBS GROUP BY app_path) b
ON a.app_path = b.app_path
"""
self.oz_cursor.execute(query)
rows = DbUtil.dict_cursor(self.oz_cursor)
for row in rows:
flow_record = OozieFlowRecord(self.app_id,
row['app_name'],
row['app_path'],
0,
row['source_version'],
row['created_time'],
row['last_modified_time'],
self.wh_exec_id)
flow_writer.append(flow_record)
query = """
select name, type, transition from WF_ACTIONS
where wf_id = '{source_version}'
""".format(source_version=row['source_version'])
new_oz_cursor = self.oz_con.cursor()
new_oz_cursor.execute(query)
nodes = DbUtil.dict_cursor(new_oz_cursor)
for node in nodes:
job_record = OozieJobRecord(self.app_id,
row['app_path'],
row['source_version'],
node['name'],
row['app_path'] + "/" + node['name'],
node['type'],
self.wh_exec_id)
job_writer.append(job_record)
if node['transition'] != "*" and node['transition'] is not None:
dag_edge = OozieFlowDagRecord(self.app_id,
row['app_path'],
row['source_version'],
row['app_path'] + "/" + node['name'],
row['app_path'] + "/" + node['transition'],
self.wh_exec_id)
dag_writer.append(dag_edge)
new_oz_cursor.close()
dag_writer.close()
job_writer.close()
flow_writer.close()
示例3: collect_flow_schedules
def collect_flow_schedules(self, schedule_file):
# load flow scheduling info from table triggers
self.logger.info("collect flow schedule")
timezone = "ALTER SESSION SET TIME_ZONE = 'US/Pacific'"
self.aw_cursor.execute(timezone)
schema = "ALTER SESSION SET CURRENT_SCHEMA=APPWORX"
self.aw_cursor.execute(schema)
schedule_writer = FileWriter(schedule_file)
query = \
"""SELECT J.SO_APPLICATION, J.SO_MODULE, S.AW_SCH_NAME, S.AW_SCH_INTERVAL, S.AW_ACTIVE,
ROUND((cast((FROM_TZ(CAST(S.AW_SCH_START as timestamp), 'US/Pacific') at time zone 'GMT') as date) -
to_date('01-JAN-1970','DD-MON-YYYY'))* (86400)) as EFFECT_STARTED,
ROUND((cast((FROM_TZ(CAST(S.AW_SCH_END as timestamp), 'US/Pacific') at time zone 'GMT') as date) -
to_date('01-JAN-1970','DD-MON-YYYY'))* (86400)) as EFFECT_END
FROM SO_JOB_TABLE J
JOIN AW_MODULE_SCHED S ON J.SO_JOB_SEQ = S.AW_JOB_SEQ
WHERE J.SO_COMMAND_TYPE = 'CHAIN' AND S.AW_ACTIVE = 'Y' """
self.aw_cursor.execute(query)
rows = DbUtil.dict_cursor(self.aw_cursor)
for row in rows:
schedule_record = AppworxFlowScheduleRecord(self.app_id,
row['SO_APPLICATION'] + ":" + row['SO_MODULE'],
row['AW_SCH_NAME'],
int(row['AW_SCH_INTERVAL']),
long(row['EFFECT_STARTED']),
long(row['EFFECT_END']),
'0',
self.wh_exec_id
)
schedule_writer.append(schedule_record)
schedule_writer.close()
示例4: update_dataset
def update_dataset(self, last_unixtime):
if last_unixtime:
sql = """
SELECT * FROM dict_dataset WHERE from_unixtime(modified_time) >= DATE_SUB(from_unixtime(%f), INTERVAL 1 HOUR)
""" % last_unixtime
else:
sql = """
SELECT * FROM dict_dataset
"""
url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/wherehows/dataset/_bulk'
params = []
self.wh_cursor.execute(sql)
rows = DbUtil.copy_dict_cursor(self.wh_cursor)
row_count = 1
for row in rows:
params.append('{ "index": { "_id": ' + str(row['id']) + ' }}')
params.append(
"""{ "name": "%s", "source": "%s", "urn": "%s", "location_prefix": "%s", "parent_name": "%s",
"schema_type": "%s", "properties": %s, "schema": %s , "fields": %s}"""
% (row['name'] if row['name'] else '', row['source'] if row['source'] else '',
row['urn'] if row['urn'] else '', row['location_prefix'] if row['location_prefix'] else '',
row['parent_name'] if row['parent_name'] else '', row['schema_type'] if row['schema_type'] else '',
json.dumps(row['properties']) if row['properties'] else '',
json.dumps(row['schema']) if row['schema'] else '', json.dumps(row['fields']) if row['fields'] else ''))
if row_count % 1000 == 0:
self.bulk_insert(params, url)
params = []
row_count += 1
if len(params) > 0:
self.bulk_insert(params, url)
示例5: update_comment
def update_comment(self, last_time):
if last_time:
sql = """
SELECT * FROM comments WHERE modified >= DATE_SUB(%s, INTERVAL 1 HOUR)
""" % last_time
else:
sql = """
SELECT * FROM comments
"""
url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/wherehows/comment/_bulk'
params = []
self.wh_cursor.execute(sql)
rows = DbUtil.copy_dict_cursor(self.wh_cursor)
row_count = 1
for row in rows:
params.append('{ "index": { "_id": ' + str(row['id']) + ', "parent": ' + str(row['dataset_id']) + ' }}')
params.append(
"""{ "text": %s, "user_id": %d, "dataset_id": %d, "comment_type": "%s"}"""
% (json.dumps(row['text']) if row['text'] else '', row['user_id'] if row['user_id'] else 0,
row['dataset_id'] if row['dataset_id'] else 0, row['comment_type'] if row['comment_type'] else ''))
if row_count % 1000 == 0:
self.bulk_insert(params, url)
params = []
row_count += 1
if len(params) > 0:
self.bulk_insert(params, url)
示例6: collect_flow_schedules
def collect_flow_schedules(self, schedule_file):
self.logger.info("collect flow schedule")
schedule_writer = FileWriter(schedule_file)
query = """
SELECT DISTINCT cj.id as ref_id, cj.frequency, cj.time_unit,
unix_timestamp(cj.start_time) as start_time, unix_timestamp(cj.end_time) as end_time,
wj.app_path
FROM COORD_JOBS cj JOIN COORD_ACTIONS ca ON ca.job_id = cj.id JOIN WF_JOBS wj ON ca.external_id = wj.id
WHERE cj.status = 'RUNNING'
"""
self.oz_cursor.execute(query)
rows = DbUtil.dict_cursor(self.oz_cursor)
for row in rows:
schedule_record = OozieFlowScheduleRecord(self.app_id,
row['app_path'],
row['time_unit'],
str(row['frequency']),
None,
row['start_time'],
row['end_time'],
row['ref_id'],
self.wh_exec_id)
schedule_writer.append(schedule_record)
schedule_writer.close()
示例7: collect_job_execs
def collect_job_execs(self, job_exec_file, lookback_period):
self.logger.info("collect job execs")
job_exec_writer = FileWriter(job_exec_file)
query = """
select a.id as job_exec_id, a.name as job_name, j.id as flow_exec_id, a.status, a.user_retry_count,
unix_timestamp(a.start_time) start_time, unix_timestamp(a.end_time) end_time,
j.app_name as jname, j.app_path, transition from WF_ACTIONS a JOIN WF_JOBS j on a.wf_id = j.id where j.end_time > now() - INTERVAL %d MINUTE
""" % (int(lookback_period))
self.oz_cursor.execute(query)
rows = DbUtil.dict_cursor(self.oz_cursor)
for row in rows:
job_exec_record = OozieJobExecRecord(self.app_id,
row['app_path'],
row['flow_exec_id'],
row['flow_exec_id'],
row['job_name'],
row['app_path'] + "/" + row['job_name'],
row['job_exec_id'],
row['status'],
row['user_retry_count'],
row['start_time'],
row['end_time'],
self.wh_exec_id)
job_exec_writer.append(job_exec_record)
job_exec_writer.close()
示例8: collect_flow_execs
def collect_flow_execs(self, flow_exec_file, job_exec_file, look_back_period):
self.logger.info( "collect flow&job executions")
flow_exec_writer = FileWriter(flow_exec_file)
job_exec_writer = FileWriter(job_exec_file)
cmd = """select * from execution_flows where end_time > UNIX_TIMESTAMP(now() - INTERVAL %d MINUTE) * 1000 """ % (int(look_back_period))
self.az_cursor.execute(cmd)
rows = DbUtil.dict_cursor(self.az_cursor)
row_count = 0
for row in rows:
json_column = 'flow_data'
unzipped_content = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(row[json_column].tostring())).read()
try:
row[json_column] = json.loads(unzipped_content)
except Exception as e:
self.logger.error(e)
pass
flow_data = row[json_column]
flow_path = flow_data['projectName'] + ":" + flow_data['flowId']
flow_exec_record = AzkabanFlowExecRecord(self.app_id,
flow_data['flowId'],
flow_path,
row['version'],
row['exec_id'],
flow_data['status'],
flow_data['attempt'],
row['submit_user'],
long(row['start_time']) / 1000,
long(row['end_time']) / 1000,
self.wh_exec_id)
flow_exec_writer.append(flow_exec_record)
nodes = flow_data['nodes']
job_exec_records = []
for node in nodes:
job_exec_record = AzkabanJobExecRecord(self.app_id,
flow_path,
row['version'],
row['exec_id'],
node['id'],
flow_path + "/" + node['id'],
None,
node['status'],
node['attempt'],
long(node['startTime']) / 1000,
long(node['endTime']) / 1000,
self.wh_exec_id)
job_exec_records.append(job_exec_record)
AzkabanJobExecUtil.sortAndSet(job_exec_records)
for r in job_exec_records:
job_exec_writer.append(r)
row_count += 1
if row_count % 10000 == 0:
flow_exec_writer.flush()
job_exec_writer.flush()
flow_exec_writer.close()
job_exec_writer.close()
示例9: submit_blog
def submit_blog():
title, blog = (request.form['title'], request.form['blog'])
# app.logger.debug(request.form['blog'])
# x=open('temp.txt','w')
# x.write(request.form['blog'])
# x.close()
time=datetime.datetime.now().ctime()
res = DbUtil.save_blog(title,time,blog)
return "res"
示例10: update_dataset_field
def update_dataset_field(self, last_time):
if last_time:
sql = """
SELECT * FROM dict_field_detail WHERE modified >= DATE_SUB(%s, INTERVAL 1 HOUR)
""" % last_time
else:
sql = """
SELECT * FROM dict_field_detail
"""
comment_query = """
SELECT d.field_id, d.dataset_id, f.comment FROM dict_dataset_field_comment d
LEFT JOIN field_comments f ON d.comment_id = f.id WHERE d.field_id = %d
"""
url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/wherehows/field/_bulk'
params = []
self.wh_cursor.execute(sql)
rows = DbUtil.copy_dict_cursor(self.wh_cursor)
row_count = 1
for row in rows:
self.wh_cursor.execute(comment_query % long(row['field_id']))
comments = []
comment_rows = DbUtil.copy_dict_cursor(self.wh_cursor)
for comment_row in comment_rows:
comments.append(comment_row['comment'])
params.append('{ "index": { "_id": ' +
str(row['field_id']) + ', "parent": ' + str(row['dataset_id']) + ' }}')
if len(comments) > 0:
params.append(
"""{ "comments": %s, "dataset_id": %d, "sort_id": %d, "field_name": "%s", "parent_path": "%s"}"""
% (json.dumps(comments) if comments else '', row['dataset_id'] if row['dataset_id'] else 0,
row['sort_id'] if row['sort_id'] else 0,
row['field_name'] if row['field_name'] else '', row['parent_path'] if row['parent_path'] else ''))
else:
params.append(
"""{ "comments": "", "dataset_id": %d, "sort_id": %d, "field_name": "%s", "parent_path": "%s"}"""
% (row['dataset_id'] if row['dataset_id'] else 0, row['sort_id'] if row['sort_id'] else 0,
row['field_name'] if row['field_name'] else '', row['parent_path'] if row['parent_path'] else ''))
if row_count % 1000 == 0:
self.bulk_insert(params, url)
params = []
row_count += 1
if len(params) > 0:
self.bulk_insert(params, url)
示例11: db_lookup
def db_lookup(self, dbname, default=None):
query = \
"""
SELECT db_id FROM cfg_database WHERE db_code = '%s' or short_connection_string = '%s'
"""
self.aw_cursor.execute(query % (dbname,dbname))
rows = DbUtil.dict_cursor(self.aw_cursor)
for row in rows:
return row['db_id']
return 0
示例12: update_metric
def update_metric(self):
sql = """
SELECT * FROM dict_business_metric
"""
url = self.elasticsearch_index_url + ':' + str(self.elasticsearch_port) + '/wherehows/metric/_bulk'
params = []
self.wh_cursor.execute(sql)
rows = DbUtil.copy_dict_cursor(self.wh_cursor)
row_count = 1
for row in rows:
params.append('{ "index": { "_id": ' + str(row['metric_id']) + ' }}')
params.append(
"""{"metric_id": %d, "metric_name": %s, "metric_description": %s, "dashboard_name": %s,
"metric_group": %s, "metric_category": %s, "metric_sub_category": %s, "metric_level": %s,
"metric_source_type": %s, "metric_source": %s, "metric_source_dataset_id": %d,
"metric_ref_id_type": %s, "metric_ref_id": %s, "metric_type": %s, "metric_additive_type": %s,
"metric_grain": %s, "metric_display_factor": %f, "metric_display_factor_sym": %s,
"metric_good_direction": %s, "metric_formula": %s, "dimensions": %s, "owners": %s, "tags": %s,
"urn": %s, "metric_url": %s, "wiki_url": %s, "scm_url": %s}"""
% (row['metric_id'], json.dumps(row['metric_name']) if row['metric_name'] else json.dumps(''),
json.dumps(row['metric_description']) if row['metric_description'] else json.dumps(''),
json.dumps(row['dashboard_name']) if row['dashboard_name'] else json.dumps(''),
json.dumps(row['metric_group']) if row['metric_group'] else json.dumps(''),
json.dumps(row['metric_category']) if row['metric_category'] else json.dumps(''),
json.dumps(row['metric_sub_category']) if row['metric_sub_category'] else json.dumps(''),
json.dumps(row['metric_level']) if row['metric_level'] else json.dumps(''),
json.dumps(row['metric_source_type']) if row['metric_source_type'] else json.dumps(''),
json.dumps(row['metric_source']) if row['metric_source'] else json.dumps(''),
row['metric_source_dataset_id'] if row['metric_source_dataset_id'] else 0,
json.dumps(row['metric_ref_id_type']) if row['metric_ref_id_type'] else json.dumps(''),
json.dumps(row['metric_ref_id']) if row['metric_ref_id'] else json.dumps(''),
json.dumps(row['metric_type']) if row['metric_type'] else json.dumps(''),
json.dumps(row['metric_additive_type']) if row['metric_additive_type'] else json.dumps(''),
json.dumps(row['metric_grain']) if row['metric_grain'] else json.dumps(''),
row['metric_display_factor'] if row['metric_display_factor'] else 0.0,
json.dumps(row['metric_display_factor_sym']) if row['metric_display_factor_sym'] else json.dumps(''),
json.dumps(row['metric_good_direction']) if row['metric_good_direction'] else json.dumps(''),
json.dumps(row['metric_formula']) if row['metric_formula'] else json.dumps(''),
json.dumps(row['dimensions']) if row['dimensions'] else json.dumps(''),
json.dumps(row['owners']) if row['owners'] else json.dumps(''),
json.dumps(row['tags']) if row['tags'] else json.dumps(''),
json.dumps(row['urn']) if row['urn'] else json.dumps(''),
json.dumps(row['metric_url']) if row['metric_url'] else json.dumps(''),
json.dumps(row['wiki_url']) if row['wiki_url'] else json.dumps(''),
json.dumps(row['scm_url']) if row['scm_url'] else json.dumps('')))
if row_count % 1000 == 0:
self.bulk_insert(params, url)
params = []
row_count += 1
if len(params) > 0:
self.bulk_insert(params, url)
示例13: collect_flow_owners
def collect_flow_owners(self, owner_file):
self.logger.info("collect owners")
owner_writer = FileWriter(owner_file)
query = "SELECT DISTINCT app_name, app_path, user_name from WF_JOBS"
self.oz_cursor.execute(query)
rows = DbUtil.dict_cursor(self.oz_cursor)
for row in rows:
owner_record = OozieFlowOwnerRecord(self.app_id,
row['app_path'],
row['user_name'],
self.wh_exec_id)
owner_writer.append(owner_record)
owner_writer.close()
示例14: collect_flow_schedules
def collect_flow_schedules(self, schedule_file):
# load flow scheduling info from table triggers
self.logger.info("collect flow schedule")
schedule_writer = FileWriter(schedule_file)
query = "select * from triggers"
self.az_cursor.execute(query)
rows = DbUtil.dict_cursor(self.az_cursor)
for row in rows:
json_column = "data"
if row[json_column] != None:
unzipped_content = gzip.GzipFile(
mode="r", fileobj=StringIO.StringIO(row[json_column].tostring())
).read()
try:
row[json_column] = json.loads(unzipped_content)
except Exception as e:
self.logger.error(e)
pass
if not "projectId" in row[json_column]["actions"][0]["actionJson"]:
continue
# print json.dumps(row[json_column], indent=4)
if row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]["isRecurring"] == "true":
unit, frequency, cron_expr = None, None, None
period = row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]["period"]
if period is not None and period != "null" and period[-1:] in self._period_unit_table:
unit = self._period_unit_table[period[-1:]]
frequency = int(
row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]["period"][:-1]
)
if "cronExpression" in row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]:
cron_expr = row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]["cronExpression"]
schedule_record = AzkabanFlowScheduleRecord(
self.app_id,
row[json_column]["actions"][0]["actionJson"]["projectName"]
+ ":"
+ row[json_column]["actions"][0]["actionJson"]["flowName"],
unit,
frequency,
cron_expr,
long(row[json_column]["triggerCondition"]["checkers"][0]["checkerJson"]["firstCheckTime"])
/ 1000,
int(time.mktime(datetime.date(2099, 12, 31).timetuple())),
"0",
self.wh_exec_id,
)
schedule_writer.append(schedule_record)
schedule_writer.close()
示例15: get_last_execution_unix_time
def get_last_execution_unix_time(self):
if self.last_execution_unix_time is None:
try:
query = """
SELECT MAX(end_time) as last_time FROM job_execution where app_id = %d
"""
self.wh_cursor.execute(query % self.app_id)
rows = DbUtil.dict_cursor(self.wh_cursor)
if rows:
for row in rows:
self.last_execution_unix_time = long(row['last_time'])
break
except:
self.logger.error("Get the last execution time from job_execution failed")
self.last_execution_unix_time = None
return self.last_execution_unix_time