当前位置: 首页>>代码示例>>Python>>正文


Python SparkContext._ensure_initialized方法代码示例

本文整理汇总了Python中pyspark.context.SparkContext._ensure_initialized方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext._ensure_initialized方法的具体用法?Python SparkContext._ensure_initialized怎么用?Python SparkContext._ensure_initialized使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.context.SparkContext的用法示例。


在下文中一共展示了SparkContext._ensure_initialized方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _ensure_initialized

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        # start callback server
        # getattr will fallback to JVM, so we cannot test by hasattr()
        if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
            gw.callback_server_parameters.eager_load = True
            gw.callback_server_parameters.daemonize = True
            gw.callback_server_parameters.daemonize_connections = True
            gw.callback_server_parameters.port = 0
            gw.start_callback_server(gw.callback_server_parameters)
            cbport = gw._callback_server.server_socket.getsockname()[1]
            gw._callback_server.port = cbport
            # gateway with real port
            gw._python_proxy_port = gw._callback_server.port
            # get the GatewayServer object in JVM by ID
            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
            # update the port of CallbackClient with real port
            jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        cls._transformerSerializer = TransformFunctionSerializer(
            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
开发者ID:11wzy001,项目名称:spark,代码行数:31,代码来源:context.py

示例2: __init__

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
	def __init__(self, loadDefaults=True, _jvm=None):
		super(SparkConf, self).__init__()
		self.arg = arg
		from pyspark.context import SparkContext
		SparkContext._ensure_initialized()
		_jvm = _jvm or SparkContext._jvm
		self._jconf = _jvm.SparkConf(loadDefaults)
开发者ID:CaMeLCa5e,项目名称:dailysummer2015,代码行数:9,代码来源:spark_conf.py

示例3: __init__

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
    def __init__(self, millis, _jvm=None):
        """
        Create new Duration.

        @param millis: milisecond

        """
        self._millis = millis

        from pyspark.context import SparkContext
        SparkContext._ensure_initialized()
        _jvm = _jvm or SparkContext._jvm
        self._jduration = _jvm.Duration(millis)
开发者ID:giworld,项目名称:spark,代码行数:15,代码来源:duration.py

示例4: __init__

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
    def __init__(self, loadDefaults=True, _jvm=None):
        """
        Create a new Spark configuration.

        @param loadDefaults: whether to load values from Java system
               properties (True by default)
        @param _jvm: internal parameter used to pass a handle to the
               Java VM; does not need to be set by users
        """
        from pyspark.context import SparkContext
        SparkContext._ensure_initialized()
        _jvm = _jvm or SparkContext._jvm
        self._jconf = _jvm.SparkConf(loadDefaults)
开发者ID:AsylumCorp,项目名称:incubator-spark,代码行数:15,代码来源:conf.py

示例5: _ensure_initialized

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        # start callback server
        # getattr will fallback to JVM, so we cannot test by hasattr()
        if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
            gw.callback_server_parameters.eager_load = True
            gw.callback_server_parameters.daemonize = True
            gw.callback_server_parameters.daemonize_connections = True
            gw.callback_server_parameters.port = 0
            gw.start_callback_server(gw.callback_server_parameters)
            cbport = gw._callback_server.server_socket.getsockname()[1]
            gw._callback_server.port = cbport
            # gateway with real port
            gw._python_proxy_port = gw._callback_server.port
            # get the GatewayServer object in JVM by ID
            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
            # update the port of CallbackClient with real port
            gw.jvm.PythonDStream.updatePythonGatewayPort(jgws, gw._python_proxy_port)
            _py4j_cleaner = Py4jCallbackConnectionCleaner(gw)
            _py4j_cleaner.start()

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        if cls._transformerSerializer is None:
            transformer_serializer = TransformFunctionSerializer()
            transformer_serializer.init(
                SparkContext._active_spark_context, CloudPickleSerializer(), gw)
            # SPARK-12511 streaming driver with checkpointing unable to finalize leading to OOM
            # There is an issue that Py4J's PythonProxyHandler.finalize blocks forever.
            # (https://github.com/bartdag/py4j/pull/184)
            #
            # Py4j will create a PythonProxyHandler in Java for "transformer_serializer" when
            # calling "registerSerializer". If we call "registerSerializer" twice, the second
            # PythonProxyHandler will override the first one, then the first one will be GCed and
            # trigger "PythonProxyHandler.finalize". To avoid that, we should not call
            # "registerSerializer" more than once, so that "PythonProxyHandler" in Java side won't
            # be GCed.
            #
            # TODO Once Py4J fixes this issue, we should upgrade Py4j to the latest version.
            transformer_serializer.gateway.jvm.PythonDStream.registerSerializer(
                transformer_serializer)
            cls._transformerSerializer = transformer_serializer
        else:
            cls._transformerSerializer.init(
                SparkContext._active_spark_context, CloudPickleSerializer(), gw)
开发者ID:MartinWeindel,项目名称:spark,代码行数:53,代码来源:context.py

示例6: _ensure_initialized

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        from pyspark.java_gateway import ensure_callback_server_started
        ensure_callback_server_started(gw)

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        cls._transformerSerializer = TransformFunctionSerializer(
            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
开发者ID:WeichenXu123,项目名称:spark,代码行数:17,代码来源:context.py

示例7:

# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import _ensure_initialized [as 别名]
import atexit
import os
import platform

import py4j

import pyspark
from pyspark.context import SparkContext
from pyspark.sql import SparkSession, SQLContext
from pyspark.storagelevel import StorageLevel

if os.environ.get("SPARK_EXECUTOR_URI"):
    SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])

SparkContext._ensure_initialized()

try:
    # Try to access HiveConf, it will raise exception if Hive is not added
    SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
    spark = SparkSession.builder\
        .enableHiveSupport()\
        .getOrCreate()
except py4j.protocol.Py4JError:
    spark = SparkSession.builder.getOrCreate()
except TypeError:
    spark = SparkSession.builder.getOrCreate()

sc = spark.sparkContext
sql = spark.sql
atexit.register(lambda: sc.stop())
开发者ID:1574359445,项目名称:spark,代码行数:32,代码来源:shell.py


注:本文中的pyspark.context.SparkContext._ensure_initialized方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。