本文整理汇总了Python中pyspark.SparkContext._ensure_initialized方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext._ensure_initialized方法的具体用法?Python SparkContext._ensure_initialized怎么用?Python SparkContext._ensure_initialized使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkContext
的用法示例。
在下文中一共展示了SparkContext._ensure_initialized方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _ensure_initialized
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import _ensure_initialized [as 别名]
def _ensure_initialized(cls):
SparkContext._ensure_initialized()
gw = SparkContext._gateway
java_import(gw.jvm, "org.apache.spark.streaming.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
# start callback server
# getattr will fallback to JVM, so we cannot test by hasattr()
if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
gw.callback_server_parameters.eager_load = True
gw.callback_server_parameters.daemonize = True
gw.callback_server_parameters.daemonize_connections = True
gw.callback_server_parameters.port = 0
gw.start_callback_server(gw.callback_server_parameters)
cbport = gw._callback_server.server_socket.getsockname()[1]
gw._callback_server.port = cbport
# gateway with real port
gw._python_proxy_port = gw._callback_server.port
# get the GatewayServer object in JVM by ID
jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
# update the port of CallbackClient with real port
jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)
# register serializer for TransformFunction
# it happens before creating SparkContext when loading from checkpointing
cls._transformerSerializer = TransformFunctionSerializer(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
示例2: __init__
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import _ensure_initialized [as 别名]
def __init__(self, sc=None, appName="Hail", master=None, local='local[*]',
log='hail.log', quiet=False, append=False, parquet_compression='snappy',
min_block_size=1, branching_factor=50, tmp_dir='/tmp'):
if Env._hc:
raise FatalError('Hail Context has already been created, restart session '
'or stop Hail context to change configuration.')
from pyspark import SparkContext
SparkContext._ensure_initialized()
self._gateway = SparkContext._gateway
self._jvm = SparkContext._jvm
# hail package
self._hail = getattr(self._jvm, 'is').hail
Env._jvm = self._jvm
Env._gateway = self._gateway
jsc = sc._jsc.sc() if sc else None
self._jhc = scala_object(self._hail, 'HailContext').apply(
jsc, appName, joption(master), local, log, quiet, append,
parquet_compression, min_block_size, branching_factor, tmp_dir)
self._jsc = self._jhc.sc()
self.sc = sc if sc else SparkContext(gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc))
self._jsql_context = self._jhc.sqlContext()
self._sql_context = SQLContext(self.sc, self._jsql_context)
# do this at the end in case something errors, so we don't raise the above error without a real HC
Env._hc = self
示例3: _ensure_initialized
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import _ensure_initialized [as 别名]
def _ensure_initialized(cls):
SparkContext._ensure_initialized()
gw = SparkContext._gateway
java_import(gw.jvm, "org.apache.spark.streaming.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
# start callback server
# getattr will fallback to JVM, so we cannot test by hasattr()
if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
gw.callback_server_parameters.eager_load = True
gw.callback_server_parameters.daemonize = True
gw.callback_server_parameters.daemonize_connections = True
gw.callback_server_parameters.port = 0
gw.start_callback_server(gw.callback_server_parameters)
cbport = gw._callback_server.server_socket.getsockname()[1]
gw._callback_server.port = cbport
# gateway with real port
gw._python_proxy_port = gw._callback_server.port
# get the GatewayServer object in JVM by ID
jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
# update the port of CallbackClient with real port
gw.jvm.PythonDStream.updatePythonGatewayPort(jgws, gw._python_proxy_port)
_py4j_cleaner = Py4jCallbackConnectionCleaner(gw)
_py4j_cleaner.start()
# register serializer for TransformFunction
# it happens before creating SparkContext when loading from checkpointing
if cls._transformerSerializer is None:
transformer_serializer = TransformFunctionSerializer()
transformer_serializer.init(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
# SPARK-12511 streaming driver with checkpointing unable to finalize leading to OOM
# There is an issue that Py4J's PythonProxyHandler.finalize blocks forever.
# (https://github.com/bartdag/py4j/pull/184)
#
# Py4j will create a PythonProxyHandler in Java for "transformer_serializer" when
# calling "registerSerializer". If we call "registerSerializer" twice, the second
# PythonProxyHandler will override the first one, then the first one will be GCed and
# trigger "PythonProxyHandler.finalize". To avoid that, we should not call
# "registerSerializer" more than once, so that "PythonProxyHandler" in Java side won't
# be GCed.
#
# TODO Once Py4J fixes this issue, we should upgrade Py4j to the latest version.
transformer_serializer.gateway.jvm.PythonDStream.registerSerializer(
transformer_serializer)
cls._transformerSerializer = transformer_serializer
else:
cls._transformerSerializer.init(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
示例4: __init__
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import _ensure_initialized [as 别名]
def __init__(self, appName="PyHail", master=None, local='local[*]',
log='hail.log', quiet=False, append=False, parquet_compression='uncompressed',
block_size=1, branching_factor=50, tmp_dir='/tmp'):
from pyspark import SparkContext
SparkContext._ensure_initialized()
self.gateway = SparkContext._gateway
self.jvm = SparkContext._jvm
self.jsc = scala_package_object(self.jvm.org.broadinstitute.hail.driver).configureAndCreateSparkContext(
appName, joption(self.jvm, master), local,
log, quiet, append, parquet_compression,
block_size, branching_factor, tmp_dir)
self.sc = SparkContext(gateway=self.gateway, jsc=self.jvm.JavaSparkContext(self.jsc))
self.jsql_context = scala_package_object(self.jvm.org.broadinstitute.hail.driver).createSQLContext(self.jsc)
self.sql_context = SQLContext(self.sc, self.jsql_context)
示例5: __init__
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import _ensure_initialized [as 别名]
def __init__(self, sc=None, app_name="Hail", master=None, local='local[*]',
log=None, quiet=False, append=False,
min_block_size=1, branching_factor=50, tmp_dir=None,
default_reference="GRCh37", idempotent=False,
global_seed=6348563392232659379, _backend=None):
if Env._hc:
if idempotent:
return
else:
raise FatalError('Hail has already been initialized, restart session '
'or stop Hail to change configuration.')
if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"):
hail_jar_path = pkg_resources.resource_filename(__name__, "hail-all-spark.jar")
assert os.path.exists(hail_jar_path), f'{hail_jar_path} does not exist'
sys.stderr.write(f'using hail jar at {hail_jar_path}\n')
conf = SparkConf()
conf.set('spark.driver.extraClassPath', hail_jar_path)
conf.set('spark.executor.extraClassPath', hail_jar_path)
SparkContext._ensure_initialized(conf=conf)
else:
SparkContext._ensure_initialized()
self._gateway = SparkContext._gateway
self._jvm = SparkContext._jvm
# hail package
self._hail = getattr(self._jvm, 'is').hail
self._warn_cols_order = True
self._warn_entries_order = True
Env._jvm = self._jvm
Env._gateway = self._gateway
jsc = sc._jsc.sc() if sc else None
if _backend is None:
_backend = SparkBackend()
self._backend = _backend
tmp_dir = get_env_or_default(tmp_dir, 'TMPDIR', '/tmp')
version = read_version_info()
hail.__version__ = version
if log is None:
log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'),
suffix=f'-{version}.log')
self._log = log
# we always pass 'quiet' to the JVM because stderr output needs
# to be routed through Python separately.
# if idempotent:
if idempotent:
self._jhc = self._hail.HailContext.getOrCreate(
jsc, app_name, joption(master), local, log, True, append,
min_block_size, branching_factor, tmp_dir)
else:
self._jhc = self._hail.HailContext.apply(
jsc, app_name, joption(master), local, log, True, append,
min_block_size, branching_factor, tmp_dir)
self._jsc = self._jhc.sc()
self.sc = sc if sc else SparkContext(gateway=self._gateway, jsc=self._jvm.JavaSparkContext(self._jsc))
self._jsql_context = self._jhc.sqlContext()
self._sql_context = SQLContext(self.sc, jsqlContext=self._jsql_context)
super(HailContext, self).__init__()
# do this at the end in case something errors, so we don't raise the above error without a real HC
Env._hc = self
self._default_ref = None
Env.hail().variant.ReferenceGenome.setDefaultReference(self._jhc, default_reference)
jar_version = self._jhc.version()
if jar_version != version:
raise RuntimeError(f"Hail version mismatch between JAR and Python library\n"
f" JAR: {jar_version}\n"
f" Python: {version}")
if not quiet:
sys.stderr.write('Running on Apache Spark version {}\n'.format(self.sc.version))
if self._jsc.uiWebUrl().isDefined():
sys.stderr.write('SparkUI available at {}\n'.format(self._jsc.uiWebUrl().get()))
connect_logger('localhost', 12888)
self._hail.HailContext.startProgressBar(self._jsc)
sys.stderr.write(
'Welcome to\n'
' __ __ <>__\n'
' / /_/ /__ __/ /\n'
' / __ / _ `/ / /\n'
#.........这里部分代码省略.........