本文整理汇总了Python中pyspark.java_gateway.launch_gateway函数的典型用法代码示例。如果您正苦于以下问题:Python launch_gateway函数的具体用法?Python launch_gateway怎么用?Python launch_gateway使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了launch_gateway函数的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _ensure_initialized
def _ensure_initialized(cls, instance=None, gateway=None, conf=None):
"""
Checks whether a SparkContext is initialized or not.
Throws error if a SparkContext is already running.
"""
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway(conf)
SparkContext._jvm = SparkContext._gateway.jvm
if instance:
if (SparkContext._active_spark_context and
SparkContext._active_spark_context != instance):
currentMaster = SparkContext._active_spark_context.master
currentAppName = SparkContext._active_spark_context.appName
callsite = SparkContext._active_spark_context._callsite
# Raise error if there is already a running Spark context
raise ValueError(
"Cannot run multiple SparkContexts at once; "
"existing SparkContext(app=%s, master=%s)"
" created by %s at %s:%s "
% (currentAppName, currentMaster,
callsite.function, callsite.file, callsite.linenum))
else:
SparkContext._active_spark_context = instance
示例2: sparkSession
def sparkSession(cls):
if not hasattr(cls, "spark"):
# We can't use the SparkSession Builder here, since we need to call
# Scala side's SmvTestHive.createContext to create the HiveTestContext's
# SparkSession.
# So we need to
# * Create a java_gateway
# * Create a SparkConf using the jgw (since without it SparkContext will ignore the given conf)
# * Create python SparkContext using the SparkConf (so we can specify the warehouse.dir)
# * Create Scala side HiveTestContext SparkSession
# * Create python SparkSession
jgw = launch_gateway(None)
jvm = jgw.jvm
import tempfile
import getpass
hivedir = "file://{0}/{1}/smv_hive_test".format(tempfile.gettempdir(), getpass.getuser())
sConf = SparkConf(False, _jvm=jvm).set("spark.sql.test", "")\
.set("spark.sql.hive.metastore.barrierPrefixes",
"org.apache.spark.sql.hive.execution.PairSerDe")\
.set("spark.sql.warehouse.dir", hivedir)\
.set("spark.ui.enabled", "false")
sc = SparkContext(master="local[1]", appName="SMV Python Test", conf=sConf, gateway=jgw).getOrCreate()
jss = sc._jvm.org.apache.spark.sql.hive.test.SmvTestHive.createContext(sc._jsc.sc())
cls.spark = SparkSession(sc, jss.sparkSession())
return cls.spark
示例3: _ensure_initialized
def _ensure_initialized(cls, instance=None, gateway=None):
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway()
SparkContext._jvm = SparkContext._gateway.jvm
SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile
if instance:
if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
raise ValueError("Cannot run multiple SparkContexts at once")
else:
SparkContext._active_spark_context = instance
示例4: _ensure_initialized
def _ensure_initialized(cls, instance=None):
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = launch_gateway()
SparkContext._jvm = SparkContext._gateway.jvm
SparkContext._writeIteratorToPickleFile = \
SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
SparkContext._takePartition = \
SparkContext._jvm.PythonRDD.takePartition
if instance:
if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
raise ValueError("Cannot run multiple SparkContexts at once")
else:
SparkContext._active_spark_context = instance
示例5: _ensure_initialized
def _ensure_initialized(cls, instance=None, gateway=None):
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway()
SparkContext._jvm = SparkContext._gateway.jvm
SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile
if instance:
if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
currentMaster = SparkContext._active_spark_context.master
currentAppName = SparkContext._active_spark_context.appName
callsite = SparkContext._active_spark_context._callsite
# Raise error if there is already a running Spark context
raise ValueError("Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)" \
" created by %s at %s:%s " \
% (currentAppName, currentMaster, callsite.function, callsite.file, callsite.linenum))
else:
SparkContext._active_spark_context = instance
示例6: __init__
def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
environment=None, batchSize=1024):
"""
Create a new SparkContext.
@param master: Cluster URL to connect to
(e.g. mesos://host:port, spark://host:port, local[4]).
@param jobName: A name for your job, to display on the cluster web UI
@param sparkHome: Location where Spark is installed on cluster nodes.
@param pyFiles: Collection of .zip or .py files to send to the cluster
and add to PYTHONPATH. These can be paths on the local file
system or HDFS, HTTP, HTTPS, or FTP URLs.
@param environment: A dictionary of environment variables to set on
worker nodes.
@param batchSize: The number of Python objects represented as a single
Java object. Set 1 to disable batching or -1 to use an
unlimited batch size.
"""
with SparkContext._lock:
if SparkContext._active_spark_context:
raise ValueError("Cannot run multiple SparkContexts at once")
else:
SparkContext._active_spark_context = self
if not SparkContext._gateway:
SparkContext._gateway = launch_gateway()
SparkContext._jvm = SparkContext._gateway.jvm
SparkContext._writeIteratorToPickleFile = \
SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
SparkContext._takePartition = \
SparkContext._jvm.PythonRDD.takePartition
self.master = master
self.jobName = jobName
self.sparkHome = sparkHome or None # None becomes null in Py4J
self.environment = environment or {}
self.batchSize = batchSize # -1 represents a unlimited batch size
# Create the Java SparkContext through Py4J
empty_string_array = self._gateway.new_array(self._jvm.String, 0)
self._jsc = self._jvm.JavaSparkContext(master, jobName, sparkHome,
empty_string_array)
# Create a single Accumulator in Java that we'll send all our updates through;
# they will be passed back to us through a TCP server
self._accumulatorServer = accumulators._start_update_server()
(host, port) = self._accumulatorServer.server_address
self._javaAccumulator = self._jsc.accumulator(
self._jvm.java.util.ArrayList(),
self._jvm.PythonAccumulatorParam(host, port))
self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
# Broadcast's __reduce__ method stores Broadcast instances here.
# This allows other code to determine which Broadcast instances have
# been pickled, so it can determine which Java broadcast objects to
# send.
self._pickled_broadcast_vars = set()
# Deploy any code dependencies specified in the constructor
for path in (pyFiles or []):
self.addPyFile(path)
SparkFiles._sc = self
sys.path.append(SparkFiles.getRootDirectory())
# Create a temporary directory inside spark.local.dir:
local_dir = self._jvm.spark.Utils.getLocalDir()
self._temp_dir = \
self._jvm.spark.Utils.createTempDir(local_dir).getAbsolutePath()
示例7: setUpClass
def setUpClass(cls):
gateway = launch_gateway(SparkConf())
cls._jvm = gateway.jvm
cls.longMessage = True
random.seed(42)
示例8: __init__
def __init__(self, arglist, _sparkSession, py_module_hotload=True):
self.smvHome = os.environ.get("SMV_HOME")
if (self.smvHome is None):
raise SmvRuntimeError("SMV_HOME env variable not set!")
self.sparkSession = _sparkSession
if (self.sparkSession is not None):
sc = self.sparkSession.sparkContext
sc.setLogLevel("ERROR")
self.sc = sc
self.sqlContext = self.sparkSession._wrapped
self._jvm = sc._jvm
self.j_smvPyClient = self._jvm.org.tresamigos.smv.python.SmvPyClientFactory.init(self.sparkSession._jsparkSession)
self.j_smvApp = self.j_smvPyClient.j_smvApp()
else:
_gw = launch_gateway(None)
self._jvm = _gw.jvm
self.py_module_hotload = py_module_hotload
java_import(self._jvm, "org.tresamigos.smv.ColumnHelper")
java_import(self._jvm, "org.tresamigos.smv.SmvDFHelper")
java_import(self._jvm, "org.tresamigos.smv.dqm.*")
java_import(self._jvm, "org.tresamigos.smv.panel.*")
java_import(self._jvm, "org.tresamigos.smv.python.SmvPythonHelper")
java_import(self._jvm, "org.tresamigos.smv.SmvHDFS")
java_import(self._jvm, "org.tresamigos.smv.DfCreator")
self.smvSchemaObj = self._jvm.SmvPythonHelper.getSmvSchema()
self.py_smvconf = SmvConfig(arglist)
# configure spark sql params
if (self.sparkSession is not None):
for k, v in self.py_smvconf.spark_sql_props().items():
self.sqlContext.setConf(k, v)
# issue #429 set application name from smv config
if (self.sparkSession is not None):
sc._conf.setAppName(self.appName())
# CmdLine is static, so can be an attribute
cl = self.py_smvconf.cmdline
self.cmd_line = namedtuple("CmdLine", cl.keys())(*cl.values())
# shortcut is meant for internal use only
self.dsm = DataSetMgr(self._jvm, self.py_smvconf)
# computed df cache, keyed by m.versioned_fqn
self.data_cache = {}
# AFTER app is available but BEFORE stages,
# use the dynamically configured app dir to set the source path, library path
self.prependDefaultDirs()
self.repoFactory = DataSetRepoFactory(self)
self.dsm.register(self.repoFactory)
# provider cache, keyed by providers' fqn
self.provider_cache = {}
self.refresh_provider_cache()
# Initialize DataFrame and Column with helper methods
smv.helpers.init_helpers()