本文整理汇总了Python中pyspark.context.SparkContext._ensure_initialized方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext._ensure_initialized方法的具体用法?Python SparkContext._ensure_initialized怎么用?Python SparkContext._ensure_initialized使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.context.SparkContext
的用法示例。
在下文中一共展示了SparkContext._ensure_initialized方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _ensure_initialized
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def _ensure_initialized(cls):
SparkContext._ensure_initialized()
gw = SparkContext._gateway
java_import(gw.jvm, "org.apache.spark.streaming.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
# start callback server
# getattr will fallback to JVM, so we cannot test by hasattr()
if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
gw.callback_server_parameters.eager_load = True
gw.callback_server_parameters.daemonize = True
gw.callback_server_parameters.daemonize_connections = True
gw.callback_server_parameters.port = 0
gw.start_callback_server(gw.callback_server_parameters)
cbport = gw._callback_server.server_socket.getsockname()[1]
gw._callback_server.port = cbport
# gateway with real port
gw._python_proxy_port = gw._callback_server.port
# get the GatewayServer object in JVM by ID
jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
# update the port of CallbackClient with real port
jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)
# register serializer for TransformFunction
# it happens before creating SparkContext when loading from checkpointing
cls._transformerSerializer = TransformFunctionSerializer(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
示例2: __init__
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def __init__(self, loadDefaults=True, _jvm=None):
super(SparkConf, self).__init__()
self.arg = arg
from pyspark.context import SparkContext
SparkContext._ensure_initialized()
_jvm = _jvm or SparkContext._jvm
self._jconf = _jvm.SparkConf(loadDefaults)
示例3: __init__
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def __init__(self, millis, _jvm=None):
"""
Create new Duration.
@param millis: milisecond
"""
self._millis = millis
from pyspark.context import SparkContext
SparkContext._ensure_initialized()
_jvm = _jvm or SparkContext._jvm
self._jduration = _jvm.Duration(millis)
示例4: __init__
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def __init__(self, loadDefaults=True, _jvm=None):
"""
Create a new Spark configuration.
@param loadDefaults: whether to load values from Java system
properties (True by default)
@param _jvm: internal parameter used to pass a handle to the
Java VM; does not need to be set by users
"""
from pyspark.context import SparkContext
SparkContext._ensure_initialized()
_jvm = _jvm or SparkContext._jvm
self._jconf = _jvm.SparkConf(loadDefaults)
示例5: _ensure_initialized
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def _ensure_initialized(cls):
SparkContext._ensure_initialized()
gw = SparkContext._gateway
java_import(gw.jvm, "org.apache.spark.streaming.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
# start callback server
# getattr will fallback to JVM, so we cannot test by hasattr()
if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
gw.callback_server_parameters.eager_load = True
gw.callback_server_parameters.daemonize = True
gw.callback_server_parameters.daemonize_connections = True
gw.callback_server_parameters.port = 0
gw.start_callback_server(gw.callback_server_parameters)
cbport = gw._callback_server.server_socket.getsockname()[1]
gw._callback_server.port = cbport
# gateway with real port
gw._python_proxy_port = gw._callback_server.port
# get the GatewayServer object in JVM by ID
jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
# update the port of CallbackClient with real port
gw.jvm.PythonDStream.updatePythonGatewayPort(jgws, gw._python_proxy_port)
_py4j_cleaner = Py4jCallbackConnectionCleaner(gw)
_py4j_cleaner.start()
# register serializer for TransformFunction
# it happens before creating SparkContext when loading from checkpointing
if cls._transformerSerializer is None:
transformer_serializer = TransformFunctionSerializer()
transformer_serializer.init(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
# SPARK-12511 streaming driver with checkpointing unable to finalize leading to OOM
# There is an issue that Py4J's PythonProxyHandler.finalize blocks forever.
# (https://github.com/bartdag/py4j/pull/184)
#
# Py4j will create a PythonProxyHandler in Java for "transformer_serializer" when
# calling "registerSerializer". If we call "registerSerializer" twice, the second
# PythonProxyHandler will override the first one, then the first one will be GCed and
# trigger "PythonProxyHandler.finalize". To avoid that, we should not call
# "registerSerializer" more than once, so that "PythonProxyHandler" in Java side won't
# be GCed.
#
# TODO Once Py4J fixes this issue, we should upgrade Py4j to the latest version.
transformer_serializer.gateway.jvm.PythonDStream.registerSerializer(
transformer_serializer)
cls._transformerSerializer = transformer_serializer
else:
cls._transformerSerializer.init(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
示例6: _ensure_initialized
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
def _ensure_initialized(cls):
SparkContext._ensure_initialized()
gw = SparkContext._gateway
java_import(gw.jvm, "org.apache.spark.streaming.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
from pyspark.java_gateway import ensure_callback_server_started
ensure_callback_server_started(gw)
# register serializer for TransformFunction
# it happens before creating SparkContext when loading from checkpointing
cls._transformerSerializer = TransformFunctionSerializer(
SparkContext._active_spark_context, CloudPickleSerializer(), gw)
示例7:
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
import atexit
import os
import platform
import py4j
import pyspark
from pyspark.context import SparkContext
from pyspark.sql import SparkSession, SQLContext
from pyspark.storagelevel import StorageLevel
if os.environ.get("SPARK_EXECUTOR_URI"):
SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
SparkContext._ensure_initialized()
try:
# Try to access HiveConf, it will raise exception if Hive is not added
SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
spark = SparkSession.builder\
.enableHiveSupport()\
.getOrCreate()
except py4j.protocol.Py4JError:
spark = SparkSession.builder.getOrCreate()
except TypeError:
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
sql = spark.sql
atexit.register(lambda: sc.stop())