本文整理汇总了Python中remotespark.utils.log.Log.debug方法的典型用法代码示例。如果您正苦于以下问题:Python Log.debug方法的具体用法?Python Log.debug怎么用?Python Log.debug使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类remotespark.utils.log.Log
的用法示例。
在下文中一共展示了Log.debug方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Command
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class Command(ObjectWithGuid):
def __init__(self, code, spark_events=None):
super(Command, self).__init__()
self.code = textwrap.dedent(code)
self.logger = Log(u"Command")
if spark_events is None:
spark_events = SparkEvents()
self._spark_events = spark_events
def __eq__(self, other):
return self.code == other.code
def __ne__(self, other):
return not self == other
def execute(self, session):
self._spark_events.emit_statement_execution_start_event(session.guid, session.kind, session.id, self.guid)
statement_id = -1
try:
session.wait_for_idle()
data = {u"code": self.code}
response = session.http_client.post_statement(session.id, data)
statement_id = response[u'id']
output = self._get_statement_output(session, statement_id)
except Exception as e:
self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
self.guid, statement_id, False, e.__class__.__name__,
str(e))
raise
else:
self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
self.guid, statement_id, True, "", "")
return output
def _get_statement_output(self, session, statement_id):
statement_running = True
out = u""
while statement_running:
statement = session.http_client.get_statement(session.id, statement_id)
status = statement[u"state"]
self.logger.debug(u"Status of statement {} is {}.".format(statement_id, status))
if status == u"running":
session.sleep()
else:
statement_running = False
statement_output = statement[u"output"]
if statement_output[u"status"] == u"ok":
out = (True, statement_output[u"data"][u"text/plain"])
elif statement_output[u"status"] == u"error":
out = (False,
statement_output[u"evalue"] + u"\n" + u"".join(statement_output[u"traceback"]))
else:
raise LivyUnexpectedStatusException(u"Unknown output status from Livy: '{}'"
.format(statement_output[u"status"]))
return out
示例2: Command
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class Command(ObjectWithGuid):
def __init__(self, code):
super(Command, self).__init__()
self.code = textwrap.dedent(code)
self.logger = Log("Command")
def __eq__(self, other):
return self.code == other.code
def __ne__(self, other):
return not self == other
def execute(self, session):
session.wait_for_idle()
data = {"code": self.code}
response = session.http_client.post_statement(session.id, data)
statement_id = response['id']
return self._get_statement_output(session, statement_id)
def _get_statement_output(self, session, statement_id):
statement_running = True
out = ""
while statement_running:
statement = session.http_client.get_statement(session.id, statement_id)
status = statement["state"]
self.logger.debug("Status of statement {} is {}.".format(statement_id, status))
if status == "running":
session.sleep()
else:
statement_running = False
statement_output = statement["output"]
if statement_output["status"] == "ok":
out = (True, statement_output["data"]["text/plain"])
elif statement_output["status"] == "error":
out = (False,
statement_output["evalue"] + "\n" + "".join(statement_output["traceback"]))
else:
raise ValueError("Unknown output status: '{}'".format(statement_output["status"]))
return out
示例3: SparkMagicBase
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class SparkMagicBase(Magics):
def __init__(self, shell, data=None):
# You must call the parent constructor
super(SparkMagicBase, self).__init__(shell)
self.logger = Log("SparkMagics")
self.ipython_display = IpythonDisplay()
self.spark_controller = SparkController(self.ipython_display)
try:
should_serialize = conf.serialize()
if should_serialize:
self.logger.debug("Serialization enabled.")
self.magics_home_path = get_magics_home_path()
path_to_serialize = join_paths(self.magics_home_path, "state.json")
self.logger.debug("Will serialize to {}.".format(path_to_serialize))
self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
else:
self.logger.debug("Serialization NOT enabled.")
except KeyError:
self.logger.error("Could not read env vars for serialization.")
self.logger.debug("Initialized spark magics.")
def execute_sqlquery(self, sqlquery, session, output_var, quiet):
try:
df = self.spark_controller.run_cell_sql(sqlquery, session)
if output_var is not None:
self.shell.user_ns[output_var] = df
if quiet:
return None
else:
return df
except DataFrameParseException as e:
self.ipython_display.send_error(e.out)
return None
@staticmethod
def print_endpoint_info(info_sessions):
sessions_info = [" {}".format(i) for i in info_sessions]
print("""Info for endpoint:
Sessions:
{}
""".format("\n".join(sessions_info)))
示例4: SparkMagicBase
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class SparkMagicBase(Magics):
def __init__(self, shell, data=None, spark_events=None):
# You must call the parent constructor
super(SparkMagicBase, self).__init__(shell)
self.logger = Log("SparkMagics")
self.ipython_display = IpythonDisplay()
self.spark_controller = SparkController(self.ipython_display)
self.logger.debug("Initialized spark magics.")
if spark_events is None:
spark_events = SparkEvents()
spark_events.emit_library_loaded_event()
def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
session, output_var, quiet):
sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
df = self.spark_controller.run_sqlquery(sqlquery, session)
if output_var is not None:
self.shell.user_ns[output_var] = df
if quiet:
return None
else:
return df
@staticmethod
def _sqlquery(cell, samplemethod, maxrows, samplefraction):
return SQLQuery(cell, samplemethod, maxrows, samplefraction)
@staticmethod
def print_endpoint_info(info_sessions):
sessions_info = [" {}".format(i) for i in info_sessions]
print("""Info for endpoint:
Sessions:
{}
""".format("\n".join(sessions_info)))
示例5: ReliableHttpClient
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class ReliableHttpClient(object):
"""Http client that is reliable in its requests. Uses requests library."""
def __init__(self, endpoint, headers, retry_policy):
self._endpoint = endpoint
self._headers = headers
self._retry_policy = retry_policy
self.logger = Log("ReliableHttpClient")
self.verify_ssl = not conf.ignore_ssl_errors()
if not self.verify_ssl:
self.logger.debug("ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
requests.packages.urllib3.disable_warnings()
def compose_url(self, relative_url):
r_u = "/{}".format(relative_url.rstrip("/").lstrip("/"))
return self._endpoint.url + r_u
def get(self, relative_url, accepted_status_codes):
"""Sends a get request. Returns a response."""
return self._send_request(relative_url, accepted_status_codes, requests.get)
def post(self, relative_url, accepted_status_codes, data):
"""Sends a post request. Returns a response."""
return self._send_request(relative_url, accepted_status_codes, requests.post, data)
def delete(self, relative_url, accepted_status_codes):
"""Sends a delete request. Returns a response."""
return self._send_request(relative_url, accepted_status_codes, requests.delete)
def _send_request(self, relative_url, accepted_status_codes, function, data=None):
return self._send_request_helper(self.compose_url(relative_url), accepted_status_codes, function, data, 0)
def _send_request_helper(self, url, accepted_status_codes, function, data, retry_count):
while True:
try:
if not self._endpoint.authenticate:
if data is None:
r = function(url, headers=self._headers, verify=self.verify_ssl)
else:
r = function(url, headers=self._headers, data=json.dumps(data), verify=self.verify_ssl)
else:
if data is None:
r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
verify=self.verify_ssl)
else:
r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
data=json.dumps(data), verify=self.verify_ssl)
except requests.exceptions.RequestException as e:
error = True
r = None
status = None
self.logger.error("Request to '{}' failed with '{}'".format(url, e))
else:
error = False
status = r.status_code
if error or status not in accepted_status_codes:
if self._retry_policy.should_retry(status, error, retry_count):
sleep(self._retry_policy.seconds_to_sleep(retry_count))
retry_count += 1
continue
else:
raise HttpClientException("Invalid status code '{}' or error '{}' from {}"
.format(status, error, url))
return r
示例6: ClientManagerStateSerializer
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class ClientManagerStateSerializer(object):
"""Livy client manager state serializer"""
def __init__(self, reader_writer):
assert reader_writer is not None
self.logger = Log("ClientManagerStateSerializer")
self._ipython_display = IpythonDisplay()
self._reader_writer = reader_writer
def deserialize_state(self):
self.logger.debug("Deserializing state.")
clients_to_return = []
lines = self._reader_writer.read_lines()
line = ''.join(lines).strip()
if line != '':
self.logger.debug("Read content. Converting to JSON.")
json_str = json.loads(line)
clients = json_str["clients"]
for client in clients:
# Ignore version for now
name = client["name"]
session_id = client["id"]
sql_context_created = client["sqlcontext"]
kind = client["kind"].lower()
connection_string = client["connectionstring"]
session = self._create_livy_session(connection_string, {"kind": kind}, self._ipython_display,
session_id, sql_context_created)
# Do not start session automatically. Just create it but skip is not existent.
try:
# Get status to know if it's alive or not.
status = session.status
if not session.is_final_status(status):
self.logger.debug("Adding session {}".format(session_id))
client_obj = self._create_livy_client(session)
clients_to_return.append((name, client_obj))
else:
self.logger.error("Skipping serialized session '{}' because session was in status {}."
.format(session.id, status))
except (ValueError, ConnectionError) as e:
self.logger.error("Skipping serialized session '{}' because {}".format(session.id, str(e)))
else:
self.logger.debug("Empty manager state found.")
return clients_to_return
def serialize_state(self, name_client_dictionary):
self.logger.debug("Serializing state.")
serialized_clients = []
for name in list(name_client_dictionary.keys()):
client = name_client_dictionary[name]
serialized_client = client.serialize()
serialized_client["name"] = name
serialized_clients.append(serialized_client)
serialized_str = json.dumps({"clients": serialized_clients})
self._reader_writer.overwrite_with_line(serialized_str)
def _create_livy_session(self, connection_string, properties, ipython_display,
session_id, sql_context_created):
return LivySession.from_connection_string(connection_string, properties, ipython_display,
session_id, sql_context_created)
def _create_livy_client(self, session):
return LivyClient(session)
示例7: SparkKernelBase
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class SparkKernelBase(IPythonKernel):
def __init__(self, implementation, implementation_version, language, language_version, language_info,
session_language, user_code_parser=None, **kwargs):
# Required by Jupyter - Override
self.implementation = implementation
self.implementation_version = implementation_version
self.language = language
self.language_version = language_version
self.language_info = language_info
# Override
self.session_language = session_language
super(SparkKernelBase, self).__init__(**kwargs)
self.logger = Log("_jupyter_kernel".format(self.session_language))
self._fatal_error = None
self.ipython_display = IpythonDisplay()
if user_code_parser is None:
self.user_code_parser = UserCodeParser()
else:
self.user_code_parser = user_code_parser
# Disable warnings for test env in HDI
requests.packages.urllib3.disable_warnings()
if not kwargs.get("testing", False):
self._load_magics_extension()
self._change_language()
if conf.use_auto_viz():
self._register_auto_viz()
def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
def f(self):
if self._fatal_error is not None:
return self._repeat_fatal_error()
return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
return wrap_unexpected_exceptions(f, self._complete_cell)(self)
def do_shutdown(self, restart):
# Cleanup
self._delete_session()
return self._do_shutdown_ipykernel(restart)
def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
code_to_run = self.user_code_parser.get_code_to_run(code)
res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)
return res
def _load_magics_extension(self):
register_magics_code = "%load_ext remotespark.kernels"
self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
log_if_error="Failed to load the Spark kernels magics library.")
self.logger.debug("Loaded magics.")
def _change_language(self):
register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
log_if_error="Failed to change language to {}.".format(self.session_language))
self.logger.debug("Changed language.")
def _register_auto_viz(self):
register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
log_if_error="Failed to register auto viz for notebook.")
self.logger.debug("Registered auto viz.")
def _delete_session(self):
code = "%%_do_not_call_delete_session\n "
self._execute_cell_for_user(code, True, False)
def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
shutdown_if_error=False, log_if_error=None):
reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)
if shutdown_if_error and reply_content[u"status"] == u"error":
error_from_reply = reply_content[u"evalue"]
if log_if_error is not None:
message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
return self._abort_with_fatal_error(message)
return reply_content
def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)
def _do_shutdown_ipykernel(self, restart):
return super(SparkKernelBase, self).do_shutdown(restart)
def _complete_cell(self):
"""A method that runs a cell with no effect. Call this and return the value it
returns when there's some sort of error preventing the user's cell from executing; this
will register the cell from the Jupyter UI as being completed."""
#.........这里部分代码省略.........
示例8: SparkKernelBase
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
#.........这里部分代码省略.........
def do_shutdown(self, restart):
# Cleanup
self._delete_session()
return self._do_shutdown_ipykernel(restart)
@staticmethod
def _get_code_transformer(subcommand):
if subcommand == UserCommandParser.run_command:
return SparkTransformer(subcommand)
elif subcommand == UserCommandParser.sql_command:
return SqlTransformer(subcommand)
elif subcommand == UserCommandParser.hive_command:
return HiveTransformer(subcommand)
elif subcommand == UserCommandParser.config_command:
return ConfigTransformer(subcommand)
elif subcommand == UserCommandParser.info_command:
return InfoTransformer(subcommand)
elif subcommand == UserCommandParser.delete_command:
return DeleteSessionTransformer(subcommand)
elif subcommand == UserCommandParser.clean_up_command:
return CleanUpTransformer(subcommand)
elif subcommand == UserCommandParser.logs_command:
return LogsTransformer(subcommand)
elif subcommand == UserCommandParser.local_command:
return PythonTransformer(subcommand)
else:
return NotSupportedTransformer(subcommand)
def _load_magics_extension(self):
register_magics_code = "%load_ext remotespark"
self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
log_if_error="Failed to load the Spark magics library.")
self._logger.debug("Loaded magics.")
def _register_auto_viz(self):
register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
log_if_error="Failed to register auto viz for notebook.")
self._logger.debug("Registered auto viz.")
def _start_session(self):
if not self._session_started:
self._session_started = True
add_session_code = "%spark add {} {} {} skip".format(
self.client_name, self.session_language, self.connection_string)
self._execute_cell(add_session_code, True, False, shutdown_if_error=True,
log_if_error="Failed to create a Livy session.")
self._logger.debug("Added session.")
def _delete_session(self):
if self._session_started:
code = "%spark cleanup"
self._execute_cell_for_user(code, True, False)
self._session_started = False
def _get_configuration(self):
"""Returns (username, password, url). If there is an error (missing configuration),
returns False."""
try:
credentials = getattr(conf, 'kernel_' + self.kernel_conf_name + '_credentials')()
ret = (credentials['username'], credentials['password'], credentials['url'])
示例9: SparkController
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class SparkController(object):
def __init__(self, ipython_display):
self.logger = Log(u"SparkController")
self.ipython_display = ipython_display
self.session_manager = SessionManager()
def get_app_id(self, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return session_to_use.get_app_id()
def get_driver_log_url(self, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return session_to_use.get_driver_log_url()
def get_logs(self, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return session_to_use.get_logs()
def get_spark_ui_url(self, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return session_to_use.get_spark_ui_url()
def run_command(self, command, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return command.execute(session_to_use)
def run_sqlquery(self, sqlquery, client_name=None):
session_to_use = self.get_session_by_name_or_default(client_name)
return sqlquery.execute(session_to_use)
def get_all_sessions_endpoint(self, endpoint):
http_client = self._http_client(endpoint)
sessions = http_client.get_sessions()[u"sessions"]
session_list = [self._livy_session(http_client, {u"kind": s[u"kind"]},
self.ipython_display, s[u"id"])
for s in sessions]
for s in session_list:
s.refresh_status()
return session_list
def get_all_sessions_endpoint_info(self, endpoint):
sessions = self.get_all_sessions_endpoint(endpoint)
return [str(s) for s in sessions]
def cleanup(self):
self.session_manager.clean_up_all()
def cleanup_endpoint(self, endpoint):
for session in self.get_all_sessions_endpoint(endpoint):
session.delete()
def delete_session_by_name(self, name):
self.session_manager.delete_client(name)
def delete_session_by_id(self, endpoint, session_id):
http_client = self._http_client(endpoint)
response = http_client.get_session(session_id)
http_client = self._http_client(endpoint)
session = self._livy_session(http_client, {u"kind": response[u"kind"]},
self.ipython_display, session_id, False)
session.delete()
def add_session(self, name, endpoint, skip_if_exists, properties):
if skip_if_exists and (name in self.session_manager.get_sessions_list()):
self.logger.debug(u"Skipping {} because it already exists in list of sessions.".format(name))
return
http_client = self._http_client(endpoint)
session = self._livy_session(http_client, properties, self.ipython_display)
self.session_manager.add_session(name, session)
session.start()
def get_session_id_for_client(self, name):
return self.session_manager.get_session_id_for_client(name)
def get_client_keys(self):
return self.session_manager.get_sessions_list()
def get_manager_sessions_str(self):
return self.session_manager.get_sessions_info()
def get_session_by_name_or_default(self, client_name):
if client_name is None:
return self.session_manager.get_any_session()
else:
client_name = client_name.lower()
return self.session_manager.get_session(client_name)
def get_managed_clients(self):
return self.session_manager.sessions
@staticmethod
def _livy_session(http_client, properties, ipython_display,
session_id=-1, sql_created=None):
return LivySession(http_client, properties, ipython_display,
session_id, sql_created)
@staticmethod
def _http_client(endpoint):
return LivyReliableHttpClient.from_endpoint(endpoint)
示例10: LivySession
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class LivySession(ObjectWithGuid):
"""Session that is livy specific."""
def __init__(self, http_client, properties, ipython_display,
session_id=-1, sql_created=None):
super(LivySession, self).__init__()
assert "kind" in list(properties.keys())
kind = properties["kind"]
self.properties = properties
self.ipython_display = ipython_display
self._spark_events = SparkEvents()
status_sleep_seconds = conf.status_sleep_seconds()
statement_sleep_seconds = conf.statement_sleep_seconds()
wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()
assert status_sleep_seconds > 0
assert statement_sleep_seconds > 0
assert wait_for_idle_timeout_seconds > 0
if session_id == -1 and sql_created is True:
raise ValueError("Cannot indicate sql state without session id.")
self.logger = Log("LivySession")
kind = kind.lower()
if kind not in constants.SESSION_KINDS_SUPPORTED:
raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
.format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))
if session_id == -1:
self.status = constants.NOT_STARTED_SESSION_STATUS
sql_created = False
else:
self.status = constants.BUSY_SESSION_STATUS
self._logs = ""
self._http_client = http_client
self._status_sleep_seconds = status_sleep_seconds
self._statement_sleep_seconds = statement_sleep_seconds
self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
self.kind = kind
self.id = session_id
self.created_sql_context = sql_created
def __str__(self):
return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status)
def start(self, create_sql_context=True):
"""Start the session against actual livy server."""
self._spark_events.emit_session_creation_start_event(self.guid, self.kind)
r = self._http_client.post_session(self.properties)
self.id = r["id"]
self.status = str(r["state"])
self.ipython_display.writeln("Creating SparkContext as 'sc'")
# We wait for livy_session_startup_timeout_seconds() for the session to start up.
try:
self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
except LivyClientTimeoutError:
raise LivyClientTimeoutError("Session {} did not start up in {} seconds."\
.format(self.id, conf.livy_session_startup_timeout_seconds()))
if create_sql_context:
self.create_sql_context()
self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status)
def create_sql_context(self):
"""Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
if self.created_sql_context:
return
self.logger.debug("Starting '{}' hive session.".format(self.kind))
self.ipython_display.writeln("Creating HiveContext as 'sqlContext'")
command = self._get_sql_context_creation_command()
try:
command.execute(self)
except LivyClientTimeoutError:
raise LivyClientTimeoutError("Failed to create the SqlContext in time. Timed out after {} seconds."
.format(self._wait_for_idle_timeout_seconds))
self.created_sql_context = True
def get_logs(self):
log_array = self._http_client.get_all_session_logs(self.id)['log']
self._logs = "\n".join(log_array)
return self._logs
@property
def http_client(self):
return self._http_client
@staticmethod
def is_final_status(status):
return status in constants.FINAL_STATUS
def delete(self):
self.logger.debug("Deleting session '{}'".format(self.id))
if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS:
self._http_client.delete_session(self.id)
#.........这里部分代码省略.........
示例11: ClientManager
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class ClientManager(object):
"""Livy client manager"""
def __init__(self, serializer=None):
serialize_periodically = False
serialize_period = 3
if serializer is not None:
serialize_periodically = conf.serialize_periodically()
serialize_period = conf.serialize_period_seconds()
self.logger = Log("ClientManager")
self._livy_clients = dict()
self._serializer = serializer
self._serialize_timer = None
if self._serializer is not None:
for (name, client) in self._serializer.deserialize_state():
self.add_client(name, client)
if serialize_periodically:
self._serialize_state_periodically(serialize_period)
def _serialize_state_periodically(self, serialize_period):
self.logger.debug("Starting state serialize timer.")
self._serialize_timer = Timer(serialize_period, self._serialize_state)
self._serialize_timer.start()
def _serialize_state(self):
self._serializer.serialize_state(self._livy_clients)
@property
def livy_clients(self):
return self._livy_clients
def get_sessions_list(self):
return list(self._livy_clients.keys())
def get_sessions_info(self):
return ["Name: {}\t{}".format(k, str(self._livy_clients[k])) for k in list(self._livy_clients.keys())]
def add_client(self, name, livy_client):
if name in self.get_sessions_list():
raise ValueError("Session with name '{}' already exists. Please delete the session"
" first if you intend to replace it.".format(name))
self._livy_clients[name] = livy_client
def get_any_client(self):
number_of_sessions = len(self._livy_clients)
if number_of_sessions == 1:
key = self.get_sessions_list()[0]
return self._livy_clients[key]
elif number_of_sessions == 0:
raise AssertionError("You need to have at least 1 client created to execute commands.")
else:
raise AssertionError("Please specify the client to use. Possible sessions are {}".format(
self.get_sessions_list()))
def get_client(self, name):
if name in self.get_sessions_list():
return self._livy_clients[name]
raise ValueError("Could not find '{}' session in list of saved sessions. Possible sessions are {}".format(
name, self.get_sessions_list()))
def get_session_id_for_client(self, name):
if name in self.get_sessions_list():
return self._livy_clients[name].session_id
return None
def delete_client(self, name):
self._remove_session(name)
def clean_up_all(self):
for name in self.get_sessions_list():
self._remove_session(name)
if self._serializer is not None:
self._serialize_state()
def _remove_session(self, name):
if name in self.get_sessions_list():
self._livy_clients[name].close_session()
del self._livy_clients[name]
else:
raise ValueError("Could not find '{}' session in list of saved sessions. Possible sessions are {}"
.format(name, self.get_sessions_list()))
示例12: SparkController
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class SparkController(object):
def __init__(self, ipython_display, serialize_path=None):
self.logger = Log("SparkController")
self.ipython_display = ipython_display
self.client_factory = LivyClientFactory()
if serialize_path is not None:
serializer = ClientManagerStateSerializer(self.client_factory, FileSystemReaderWriter(serialize_path))
self.client_manager = ClientManager(serializer)
else:
self.client_manager = ClientManager()
def get_logs(self, client_name=None):
client_to_use = self.get_client_by_name_or_default(client_name)
return client_to_use.get_logs()
def run_cell(self, cell, client_name=None):
client_to_use = self.get_client_by_name_or_default(client_name)
return client_to_use.execute(cell)
def run_cell_sql(self, cell, client_name=None):
client_to_use = self.get_client_by_name_or_default(client_name)
return client_to_use.execute_sql(cell)
def run_cell_hive(self, cell, client_name=None):
client_to_use = self.get_client_by_name_or_default(client_name)
return client_to_use.execute_hive(cell)
def get_all_sessions_endpoint(self, connection_string):
http_client = self.client_factory.create_http_client(connection_string)
r = http_client.get("/sessions", [200])
sessions = r.json()["sessions"]
session_list = [self.client_factory.create_session(self.ipython_display, connection_string, {"kind": s["kind"]}, s["id"])
for s in sessions]
for s in session_list:
s._refresh_status()
return session_list
def get_all_sessions_endpoint_info(self, connection_string):
sessions = self.get_all_sessions_endpoint(connection_string)
return [str(s) for s in sessions]
def cleanup(self):
self.client_manager.clean_up_all()
def cleanup_endpoint(self, connection_string):
for session in self.get_all_sessions_endpoint(connection_string):
session.delete()
def delete_session_by_name(self, name):
self.client_manager.delete_client(name)
def delete_session_by_id(self, connection_string, session_id):
http_client = self.client_factory.create_http_client(connection_string)
r = http_client.get("/sessions/{}".format(session_id), [200, 404])
if r.status_code != 404:
session = self.client_factory.create_session(self.ipython_display, connection_string, {"kind": r.json()["kind"]}, session_id, False)
session.delete()
def add_session(self, name, connection_string, skip_if_exists, properties):
if skip_if_exists and (name in self.client_manager.get_sessions_list()):
self.logger.debug("Skipping {} because it already exists in list of sessions.".format(name))
return
session = self.client_factory.create_session(self.ipython_display, connection_string, properties, "-1", False)
session.start()
livy_client = self.client_factory.build_client(session)
self.client_manager.add_client(name, livy_client)
livy_client.start()
def get_client_keys(self):
return self.client_manager.get_sessions_list()
def get_manager_sessions_str(self):
return self.client_manager.get_sessions_info()
def get_client_by_name_or_default(self, client_name):
if client_name is None:
return self.client_manager.get_any_client()
else:
client_name = client_name.lower()
return self.client_manager.get_client(client_name)
示例13: LivySession
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class LivySession(object):
"""Session that is livy specific."""
def __init__(self, ipython_display, http_client, session_id, sql_created, properties):
assert "kind" in properties.keys()
kind = properties["kind"]
self.properties = properties
self.ipython_display = ipython_display
status_sleep_seconds = conf.status_sleep_seconds()
statement_sleep_seconds = conf.statement_sleep_seconds()
create_sql_context_timeout_seconds = conf.create_sql_context_timeout_seconds()
assert status_sleep_seconds > 0
assert statement_sleep_seconds > 0
assert create_sql_context_timeout_seconds > 0
if session_id == "-1" and sql_created is True:
raise ValueError("Cannot indicate sql state without session id.")
self.logger = Log("LivySession")
kind = kind.lower()
if kind not in Constants.session_kinds_supported:
raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
.format(kind, ", ".join(Constants.session_kinds_supported)))
if session_id == "-1":
self._status = Constants.not_started_session_status
sql_created = False
else:
self._status = Constants.busy_session_status
self._logs = ""
self._http_client = http_client
self._status_sleep_seconds = status_sleep_seconds
self._statement_sleep_seconds = statement_sleep_seconds
self._create_sql_context_timeout_seconds = create_sql_context_timeout_seconds
self._state = LivySessionState(session_id, http_client.connection_string,
kind, sql_created)
def __str__(self):
return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self._status)
def get_state(self):
return self._state
def start(self):
"""Start the session against actual livy server."""
self.logger.debug("Starting '{}' session.".format(self.kind))
r = self._http_client.post("/sessions", [201], self.properties)
self._state.session_id = str(r.json()["id"])
self._status = str(r.json()["state"])
self.ipython_display.writeln("Creating SparkContext as 'sc'")
self.logger.debug("Session '{}' started.".format(self.kind))
def create_sql_context(self):
"""Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
if self.started_sql_context:
return
self.logger.debug("Starting '{}' sql and hive session.".format(self.kind))
self.ipython_display.writeln("Creating SqlContext as 'sqlContext'")
self._create_context(Constants.context_name_sql)
self.ipython_display.writeln("Creating HiveContext as 'hiveContext'")
self._create_context(Constants.context_name_hive)
self._state.sql_context_created = True
def _create_context(self, context_type):
if context_type == Constants.context_name_sql:
command = self._get_sql_context_creation_command()
elif context_type == Constants.context_name_hive:
command = self._get_hive_context_creation_command()
else:
raise ValueError("Cannot create context of type {}.".format(context_type))
try:
self.wait_for_idle(self._create_sql_context_timeout_seconds)
self.execute(command)
self.logger.debug("Started '{}' {} session.".format(self.kind, context_type))
except LivyClientTimeoutError:
raise LivyClientTimeoutError("Failed to create the {} context in time. Timed out after {} seconds."
.format(context_type, self._create_sql_context_timeout_seconds))
@property
def id(self):
return self._state.session_id
@property
def started_sql_context(self):
return self._state.sql_context_created
@property
def kind(self):
return self._state.kind
#.........这里部分代码省略.........
示例14: RemoteSparkMagics
# 需要导入模块: from remotespark.utils.log import Log [as 别名]
# 或者: from remotespark.utils.log.Log import debug [as 别名]
class RemoteSparkMagics(Magics):
def __init__(self, shell, data=None):
# You must call the parent constructor
super(RemoteSparkMagics, self).__init__(shell)
self.logger = Log("RemoteSparkMagics")
self.ipython_display = IpythonDisplay()
self.spark_controller = SparkController(self.ipython_display)
try:
should_serialize = conf.serialize()
if should_serialize:
self.logger.debug("Serialization enabled.")
self.magics_home_path = get_magics_home_path()
path_to_serialize = join_paths(self.magics_home_path, "state.json")
self.logger.debug("Will serialize to {}.".format(path_to_serialize))
self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
else:
self.logger.debug("Serialization NOT enabled.")
except KeyError:
self.logger.error("Could not read env vars for serialization.")
self.logger.debug("Initialized spark magics.")
@magic_arguments()
@argument("-c", "--context", type=str, default=Constants.context_name_spark,
help="Context to use: '{}' for spark, '{}' for sql queries, and '{}' for hive queries. "
"Default is '{}'.".format(Constants.context_name_spark,
Constants.context_name_sql,
Constants.context_name_hive,
Constants.context_name_spark))
@argument("-s", "--session", help="The name of the Livy session to use. "
"If only one session has been created, there's no need to specify one.")
@argument("-o", "--output", type=str, default=None, help="If present, output when using SQL or Hive "
"query will be stored in variable of this name.")
@argument("command", type=str, default=[""], nargs="*", help="Commands to execute.")
@needs_local_scope
@line_cell_magic
def spark(self, line, cell="", local_ns=None):
"""Magic to execute spark remotely.
This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can
be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session.
When the SQL context is used, the result will be a Pandas dataframe of a sample of the results.
If invoked with no subcommand, the cell will be executed against the specified session.
Subcommands
-----------
info
Display the available Livy sessions and other configurations for sessions.
add
Add a Livy session. First argument is the name of the session, second argument
is the language, and third argument is the connection string of the Livy endpoint.
A fourth argument specifying if session creation can be skipped if it already exists is optional:
"skip" or empty.
e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p skip`
or
e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p`
config
Override the livy session properties sent to Livy on session creation. All session creations will
contain these config settings from then on.
Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions
endpoint in Livy.
e.g. `%%spark config {"driverMemory":"1000M", "executorCores":4}`
run
Run Spark code against a session.
e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created
e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created
e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession
previously created and store the pandas dataframe created in the my_var variable in the
Python environment.
logs
Returns the logs for a given session.
e.g. `%%spark logs -s testsession` will return the logs for the testsession previously created
delete
Delete a Livy session. Argument is the name of the session to be deleted.
e.g. `%%spark delete defaultlivy`
cleanup
Delete all Livy sessions created by the notebook. No arguments required.
e.g. `%%spark cleanup`
"""
usage = "Please look at usage of %spark by executing `%spark?`."
user_input = line
args = parse_argstring(self.spark, user_input)
subcommand = args.command[0].lower()
try:
# info
if subcommand == "info":
if len(args.command) == 2:
connection_string = args.command[1]
info_sessions = self.spark_controller.get_all_sessions_endpoint_info(connection_string)
self._print_endpoint_info(info_sessions)
elif len(args.command) == 1:
self._print_local_info()
#.........这里部分代码省略.........