当前位置: 首页>>代码示例>>Python>>正文


Python java_gateway.launch_gateway函数代码示例

本文整理汇总了Python中pyspark.java_gateway.launch_gateway函数的典型用法代码示例。如果您正苦于以下问题:Python launch_gateway函数的具体用法?Python launch_gateway怎么用?Python launch_gateway使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了launch_gateway函数的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None, conf=None):
        """
        Checks whether a SparkContext is initialized or not.
        Throws error if a SparkContext is already running.
        """
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway(conf)
                SparkContext._jvm = SparkContext._gateway.jvm

            if instance:
                if (SparkContext._active_spark_context and
                        SparkContext._active_spark_context != instance):
                    currentMaster = SparkContext._active_spark_context.master
                    currentAppName = SparkContext._active_spark_context.appName
                    callsite = SparkContext._active_spark_context._callsite

                    # Raise error if there is already a running Spark context
                    raise ValueError(
                        "Cannot run multiple SparkContexts at once; "
                        "existing SparkContext(app=%s, master=%s)"
                        " created by %s at %s:%s "
                        % (currentAppName, currentMaster,
                            callsite.function, callsite.file, callsite.linenum))
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AllenShi,项目名称:spark,代码行数:26,代码来源:context.py

示例2: sparkSession

 def sparkSession(cls):
     if not hasattr(cls, "spark"):
         # We can't use the SparkSession Builder here, since we need to call
         # Scala side's SmvTestHive.createContext to create the HiveTestContext's
         # SparkSession.
         # So we need to
         #   * Create a java_gateway
         #   * Create a SparkConf using the jgw (since without it SparkContext will ignore the given conf)
         #   * Create python SparkContext using the SparkConf (so we can specify the warehouse.dir)
         #   * Create Scala side HiveTestContext SparkSession
         #   * Create python SparkSession
         jgw = launch_gateway(None)
         jvm = jgw.jvm
         import tempfile
         import getpass
         hivedir = "file://{0}/{1}/smv_hive_test".format(tempfile.gettempdir(), getpass.getuser())
         sConf = SparkConf(False, _jvm=jvm).set("spark.sql.test", "")\
                                           .set("spark.sql.hive.metastore.barrierPrefixes",
                                                "org.apache.spark.sql.hive.execution.PairSerDe")\
                                           .set("spark.sql.warehouse.dir", hivedir)\
                                           .set("spark.ui.enabled", "false")
         sc = SparkContext(master="local[1]", appName="SMV Python Test", conf=sConf, gateway=jgw).getOrCreate()
         jss = sc._jvm.org.apache.spark.sql.hive.test.SmvTestHive.createContext(sc._jsc.sc())
         cls.spark = SparkSession(sc, jss.sparkSession())
     return cls.spark
开发者ID:TresAmigosSD,项目名称:SMV,代码行数:25,代码来源:testconfig.py

示例3: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    raise ValueError("Cannot run multiple SparkContexts at once")
                else:
                    SparkContext._active_spark_context = instance
开发者ID:ComplexQubit,项目名称:incubator-spark,代码行数:12,代码来源:context.py

示例4: _ensure_initialized

    def _ensure_initialized(cls, instance=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeIteratorToPickleFile = \
                    SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
                SparkContext._takePartition = \
                    SparkContext._jvm.PythonRDD.takePartition

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    raise ValueError("Cannot run multiple SparkContexts at once")
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AustinBGibbons,项目名称:incubator-spark,代码行数:15,代码来源:context.py

示例5: _ensure_initialized

    def _ensure_initialized(cls, instance=None, gateway=None):
        with SparkContext._lock:
            if not SparkContext._gateway:
                SparkContext._gateway = gateway or launch_gateway()
                SparkContext._jvm = SparkContext._gateway.jvm
                SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile

            if instance:
                if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
                    currentMaster = SparkContext._active_spark_context.master
                    currentAppName = SparkContext._active_spark_context.appName
                    callsite = SparkContext._active_spark_context._callsite

                    # Raise error if there is already a running Spark context
                    raise ValueError("Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)" \
                        " created by %s at %s:%s " \
                        % (currentAppName, currentMaster, callsite.function, callsite.file, callsite.linenum))
                else:
                    SparkContext._active_spark_context = instance
开发者ID:AndreSchumacher,项目名称:spark,代码行数:19,代码来源:context.py

示例6: __init__

    def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
        environment=None, batchSize=1024):
        """
        Create a new SparkContext.

        @param master: Cluster URL to connect to
               (e.g. mesos://host:port, spark://host:port, local[4]).
        @param jobName: A name for your job, to display on the cluster web UI
        @param sparkHome: Location where Spark is installed on cluster nodes.
        @param pyFiles: Collection of .zip or .py files to send to the cluster
               and add to PYTHONPATH.  These can be paths on the local file
               system or HDFS, HTTP, HTTPS, or FTP URLs.
        @param environment: A dictionary of environment variables to set on
               worker nodes.
        @param batchSize: The number of Python objects represented as a single
               Java object.  Set 1 to disable batching or -1 to use an
               unlimited batch size.
        """
        with SparkContext._lock:
            if SparkContext._active_spark_context:
                raise ValueError("Cannot run multiple SparkContexts at once")
            else:
                SparkContext._active_spark_context = self
                if not SparkContext._gateway:
                    SparkContext._gateway = launch_gateway()
                    SparkContext._jvm = SparkContext._gateway.jvm
                    SparkContext._writeIteratorToPickleFile = \
                        SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
                    SparkContext._takePartition = \
                        SparkContext._jvm.PythonRDD.takePartition
        self.master = master
        self.jobName = jobName
        self.sparkHome = sparkHome or None # None becomes null in Py4J
        self.environment = environment or {}
        self.batchSize = batchSize  # -1 represents a unlimited batch size

        # Create the Java SparkContext through Py4J
        empty_string_array = self._gateway.new_array(self._jvm.String, 0)
        self._jsc = self._jvm.JavaSparkContext(master, jobName, sparkHome,
                                              empty_string_array)

        # Create a single Accumulator in Java that we'll send all our updates through;
        # they will be passed back to us through a TCP server
        self._accumulatorServer = accumulators._start_update_server()
        (host, port) = self._accumulatorServer.server_address
        self._javaAccumulator = self._jsc.accumulator(
                self._jvm.java.util.ArrayList(),
                self._jvm.PythonAccumulatorParam(host, port))

        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
        # Broadcast's __reduce__ method stores Broadcast instances here.
        # This allows other code to determine which Broadcast instances have
        # been pickled, so it can determine which Java broadcast objects to
        # send.
        self._pickled_broadcast_vars = set()

        # Deploy any code dependencies specified in the constructor
        for path in (pyFiles or []):
            self.addPyFile(path)
        SparkFiles._sc = self
        sys.path.append(SparkFiles.getRootDirectory())

        # Create a temporary directory inside spark.local.dir:
        local_dir = self._jvm.spark.Utils.getLocalDir()
        self._temp_dir = \
            self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath()
开发者ID:WuErPing,项目名称:spark,代码行数:66,代码来源:context.py

示例7: setUpClass

 def setUpClass(cls):
     gateway = launch_gateway(SparkConf())
     cls._jvm = gateway.jvm
     cls.longMessage = True
     random.seed(42)
开发者ID:JkSelf,项目名称:spark,代码行数:5,代码来源:test_broadcast.py

示例8: __init__

    def __init__(self, arglist, _sparkSession, py_module_hotload=True):
        self.smvHome = os.environ.get("SMV_HOME")
        if (self.smvHome is None):
            raise SmvRuntimeError("SMV_HOME env variable not set!")

        self.sparkSession = _sparkSession

        if (self.sparkSession is not None):
            sc = self.sparkSession.sparkContext
            sc.setLogLevel("ERROR")

            self.sc = sc
            self.sqlContext = self.sparkSession._wrapped
            self._jvm = sc._jvm
            self.j_smvPyClient = self._jvm.org.tresamigos.smv.python.SmvPyClientFactory.init(self.sparkSession._jsparkSession)
            self.j_smvApp = self.j_smvPyClient.j_smvApp()
        else:
            _gw = launch_gateway(None)
            self._jvm = _gw.jvm

        self.py_module_hotload = py_module_hotload

        java_import(self._jvm, "org.tresamigos.smv.ColumnHelper")
        java_import(self._jvm, "org.tresamigos.smv.SmvDFHelper")
        java_import(self._jvm, "org.tresamigos.smv.dqm.*")
        java_import(self._jvm, "org.tresamigos.smv.panel.*")
        java_import(self._jvm, "org.tresamigos.smv.python.SmvPythonHelper")
        java_import(self._jvm, "org.tresamigos.smv.SmvHDFS")
        java_import(self._jvm, "org.tresamigos.smv.DfCreator")

        self.smvSchemaObj = self._jvm.SmvPythonHelper.getSmvSchema()

        self.py_smvconf = SmvConfig(arglist)

        # configure spark sql params
        if (self.sparkSession is not None):
            for k, v in self.py_smvconf.spark_sql_props().items():
                self.sqlContext.setConf(k, v)

        # issue #429 set application name from smv config
        if (self.sparkSession is not None):
            sc._conf.setAppName(self.appName())

        # CmdLine is static, so can be an attribute
        cl = self.py_smvconf.cmdline
        self.cmd_line = namedtuple("CmdLine", cl.keys())(*cl.values())

        # shortcut is meant for internal use only
        self.dsm = DataSetMgr(self._jvm, self.py_smvconf)

        # computed df cache, keyed by m.versioned_fqn
        self.data_cache = {}

        # AFTER app is available but BEFORE stages,
        # use the dynamically configured app dir to set the source path, library path
        self.prependDefaultDirs()

        self.repoFactory = DataSetRepoFactory(self)
        self.dsm.register(self.repoFactory)

        # provider cache, keyed by providers' fqn
        self.provider_cache = {}
        self.refresh_provider_cache()

        # Initialize DataFrame and Column with helper methods
        smv.helpers.init_helpers()
开发者ID:TresAmigosSD,项目名称:SMV,代码行数:66,代码来源:smvapp.py


注:本文中的pyspark.java_gateway.launch_gateway函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。