本文整理汇总了Python中pyspark.SparkContext.getOrCreate方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.getOrCreate方法的具体用法?Python SparkContext.getOrCreate怎么用?Python SparkContext.getOrCreate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkContext
的用法示例。
在下文中一共展示了SparkContext.getOrCreate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def main():
"""Run the belief propagation algorithm for an example problem."""
# setup context
conf = SparkConf().setAppName("BeliefPropagation example")
sc = SparkContext.getOrCreate(conf)
sql = SQLContext.getOrCreate(sc)
with SuppressSparkLogs(sc):
# create graphical model g of size 3 x 3
g = graphframes.examples.Graphs(sql).gridIsingModel(3)
print("Original Ising model:")
g.vertices.show()
g.edges.show()
# run BP for 5 iterations
numIter = 5
results = BeliefPropagation.runBPwithGraphFrames(g, numIter)
# display beliefs
beliefs = results.vertices.select('id', 'belief')
print("Done with BP. Final beliefs after {} iterations:".format(numIter))
beliefs.show()
sc.stop()
示例2: optimize
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def optimize(self):
"""
Do an optimization.
"""
jmodel = callJavaFunc(SparkContext.getOrCreate(), self.value.optimize)
from nn.layer import Model
return Model.of(jmodel)
示例3: parse_raw_wikidata
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def parse_raw_wikidata(output):
spark_conf = SparkConf().setAppName('QB Wikidata').setMaster(QB_SPARK_MASTER)
sc = SparkContext.getOrCreate(spark_conf) # type: SparkContext
wikidata = sc.textFile('s3a://entilzha-us-west-2/wikidata/wikidata-20170306-all.json')
def parse_line(line):
if len(line) == 0:
return []
if line[0] == '[' or line[0] == ']':
return []
elif line.endswith(','):
return [json.loads(line[:-1])]
else:
return [json.loads(line)]
parsed_wikidata = wikidata.flatMap(parse_line).cache()
property_map = extract_property_map(parsed_wikidata)
b_property_map = sc.broadcast(property_map)
wikidata_items = parsed_wikidata.filter(lambda d: d['type'] == 'item').cache()
parsed_wikidata.unpersist()
item_page_map = extract_item_page_map(wikidata_items)
b_item_page_map = sc.broadcast(item_page_map)
parsed_item_map = extract_items(wikidata_items, b_property_map, b_item_page_map)
with open(output, 'wb') as f:
pickle.dump({
'parsed_item_map': parsed_item_map,
'item_page_map': item_page_map,
'property_map': property_map
}, f)
sc.stop()
示例4: _getScaleHintList
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _getScaleHintList():
featurizer = SparkContext.getOrCreate()._jvm.com.databricks.sparkdl.DeepImageFeaturizer
if isinstance(featurizer, py4j.java_gateway.JavaPackage):
# do not see DeepImageFeaturizer, possibly running without spark
# instead of failing return empty list
return []
return dict(featurizer.scaleHintsJava()).keys()
示例5: readImages
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImages(imageDirectory, numPartition=None):
"""
Read a directory of images (or a single image) into a DataFrame.
:param sc: spark context
:param imageDirectory: str, file path.
:param numPartition: int, number or partitions to use for reading files.
:return: DataFrame, with columns: (filepath: str, image: imageSchema).
"""
return _readImages(imageDirectory, numPartition, SparkContext.getOrCreate())
示例6: readImagesWithCustomFn
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImagesWithCustomFn(path, decode_f, numPartition=None):
"""
Read a directory of images (or a single image) into a DataFrame using a custom library to
decode the images.
:param path: str, file path.
:param decode_f: function to decode the raw bytes into an array compatible with one of the
supported OpenCv modes. see @imageIO.PIL_decode for an example.
:param numPartition: [optional] int, number or partitions to use for reading files.
:return: DataFrame with schema == ImageSchema.imageSchema.
"""
return _readImagesWithCustomFn(path, decode_f, numPartition, sc=SparkContext.getOrCreate())
示例7: load_spark_context
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def load_spark_context(application_name=None):
if application_name is None:
application_name = __name__
conf = SparkConf().setAppName(application_name)
sc = SparkContext.getOrCreate(conf=conf)
sql_context = SQLContext(sc)
# Close logger
# logger = sc._jvm.org.apache.log4j
# logger.LogManager.getLogger("org").setLevel(logger.Level.ERROR)
# logger.LogManager.getLogger("akka").setLevel(logger.Level.ERROR)
return sc, sql_context
示例8: callBigDlFunc
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def callBigDlFunc(bigdl_type, name, *args):
""" Call API in PythonBigDL """
sc = SparkContext.getOrCreate()
if bigdl_type == "float":
api = getattr(
sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofFloat(),
name)
elif bigdl_type == "double":
api = getattr(
sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofDouble(),
name)
else:
raise Exception("Not supported bigdl_type: %s" % bigdl_type)
return callJavaFunc(sc, api, *args)
示例9: __init__
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def __init__(self, layers, bias=1.0, act_func=None, act_func_prime=None):
if act_func is None:
self.act_func = sigmoid
self.act_func_prime = sigmoid_prime
else:
self.act_func = act_func
self.act_func_prime = act_func_prime
self.layers = layers
self.bias = bias
self.spark_context = SparkContext.getOrCreate()
log4jLogger = self.spark_context._jvm.org.apache.log4j
self.logger = log4jLogger.LogManager.getLogger(__name__)
示例10: installPackage
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def installPackage(self, artifact, base=None, sc=None):
artifact = self._toArtifact(artifact)
#Test if we already have a version installed
res=self.fetchArtifact(artifact)
fileLoc=None
if res:
fileLoc=res[1]
print("Package already installed: {0}".format(str(artifact)))
else:
#download package
art=[artifact]
def _doDownload(d):
artifact=art[0]
if not artifact.version or artifact.version=='0':
artifact.version = d.resolver._find_latest_version_available(artifact)
fileLoc = artifact.get_filename(self.DOWNLOAD_DIR)
if os.path.isfile(fileLoc):
os.remove(fileLoc)
results = d.download(artifact,filename=self.DOWNLOAD_DIR)
if not results[1]:
raise Exception("Error downloading package {0}".format(str(artifact)))
else:
artifact=results[0]
print("Artifact downloaded successfully {0}".format(str(artifact)))
printEx("Please restart Kernel to complete installation of the new package",PrintColors.RED)
fileLoc=self.storeArtifact(artifact,base)
return fileLoc
try:
fileLoc=_doDownload(downloader.Downloader(base) if base is not None else downloader.Downloader())
except RequestException as e:
#try another base
try:
fileLoc=_doDownload(downloader.Downloader("http://dl.bintray.com/spark-packages/maven"))
except RequestException as e:
print("Unable to install artifact {0}".format(e.msg))
raise
except:
print(str(sys.exc_info()[1]))
raise
if sc is None:
sc = SparkContext.getOrCreate()
if sc:
#convert to file uri for windows platform
if platform.system()=='Windows':
fileLoc="file://" + urllib.pathname2url(fileLoc)
sc.addPyFile(fileLoc)
return artifact
示例11: isImage
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def isImage(df, column):
"""
Returns True if the column contains images
Args:
df (DataFrame): The DataFrame to be processed
column (str): The name of the column being inspected
Returns:
bool: True if the colum is an image column
"""
jvm = SparkContext.getOrCreate()._jvm
schema = jvm.com.microsoft.ml.spark.schema.ImageSchema
return schema.isImage(df._jdf, column)
示例12: toPython
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def toPython(entity):
from py4j.java_gateway import JavaObject
if entity is None or not isinstance(entity, JavaObject):
return entity
clazz = entity.getClass().getName()
if clazz == "org.apache.spark.sql.Dataset":
entity = entity.toDF()
clazz = "org.apache.spark.sql.DataFrame"
if clazz == "org.apache.spark.sql.DataFrame":
from pyspark.sql import DataFrame, SQLContext
from pyspark import SparkContext
entity = DataFrame(entity, SQLContext(SparkContext.getOrCreate(), entity.sqlContext()))
return entity
示例13: readImages
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImages(sparkSession, path, recursive = False, sampleRatio = 1.0, inspectZip = True):
"""
Reads the directory of images from the local or remote (WASB) source.
This function is attached to SparkSession class.
Example: spark.readImages(path, recursive, ...)
Args:
sparkSession (SparkSession): Existing sparkSession
path (str): Path to the image directory
recursive (bool): Recursive search flag
sampleRatio (double): Fraction of the images loaded
Returns:
DataFrame: DataFrame with a single column of "images", see imageSchema for details
"""
ctx = SparkContext.getOrCreate()
reader = ctx._jvm.com.microsoft.ml.spark.ImageReader
sql_ctx = pyspark.SQLContext.getOrCreate(ctx)
jsession = sql_ctx.sparkSession._jsparkSession
jresult = reader.read(path, recursive, jsession, float(sampleRatio), inspectZip)
return DataFrame(jresult, sql_ctx)
示例14: train
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def train(filename):
global model
sc=SparkContext.getOrCreate()
user = sc.textFile(filename)
ratings=user.map(lambda l:l.split("\t")).map(lambda l:Rating(int(l[0]),int(l[1]),float(l[2])))
#split into training & test
(training, test) = ratings.randomSplit([0.8, 0.2])
testdata = test.map(lambda p: (p[0], p[1]))
rank = 10
numIterations = 10
#training the model
model = ALS.train(training, rank, numIterations)
#validating the model
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
ratesAndPreds = test.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
rmse = sqrt(MSE)
print rmse
print "training done"
return "OK"
示例15: test_active_session_with_None_and_not_None_context
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_active_session_with_None_and_not_None_context(self):
from pyspark.context import SparkContext
from pyspark.conf import SparkConf
sc = None
session = None
try:
sc = SparkContext._active_spark_context
self.assertEqual(sc, None)
activeSession = SparkSession.getActiveSession()
self.assertEqual(activeSession, None)
sparkConf = SparkConf()
sc = SparkContext.getOrCreate(sparkConf)
activeSession = sc._jvm.SparkSession.getActiveSession()
self.assertFalse(activeSession.isDefined())
session = SparkSession(sc)
activeSession = sc._jvm.SparkSession.getActiveSession()
self.assertTrue(activeSession.isDefined())
activeSession2 = SparkSession.getActiveSession()
self.assertNotEqual(activeSession2, None)
finally:
if session is not None:
session.stop()
if sc is not None:
sc.stop()