当前位置: 首页>>代码示例>>Python>>正文


Python SparkContext.getOrCreate方法代码示例

本文整理汇总了Python中pyspark.SparkContext.getOrCreate方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.getOrCreate方法的具体用法?Python SparkContext.getOrCreate怎么用?Python SparkContext.getOrCreate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkContext的用法示例。


在下文中一共展示了SparkContext.getOrCreate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def main():
    """Run the belief propagation algorithm for an example problem."""
    # setup context
    conf = SparkConf().setAppName("BeliefPropagation example")
    sc = SparkContext.getOrCreate(conf)
    sql = SQLContext.getOrCreate(sc)

    with SuppressSparkLogs(sc):

        # create graphical model g of size 3 x 3
        g = graphframes.examples.Graphs(sql).gridIsingModel(3)
        print("Original Ising model:")
        g.vertices.show()
        g.edges.show()

        # run BP for 5 iterations
        numIter = 5
        results = BeliefPropagation.runBPwithGraphFrames(g, numIter)

        # display beliefs
        beliefs = results.vertices.select('id', 'belief')
        print("Done with BP. Final beliefs after {} iterations:".format(numIter))
        beliefs.show()

    sc.stop()
开发者ID:mengxr,项目名称:graphframes,代码行数:27,代码来源:belief_propagation.py

示例2: optimize

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
 def optimize(self):
     """
     Do an optimization. 
     """
     jmodel = callJavaFunc(SparkContext.getOrCreate(), self.value.optimize)
     from nn.layer import Model
     return Model.of(jmodel)
开发者ID:Kim-Seongjung,项目名称:BigDL,代码行数:9,代码来源:optimizer.py

示例3: parse_raw_wikidata

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def parse_raw_wikidata(output):
    spark_conf = SparkConf().setAppName('QB Wikidata').setMaster(QB_SPARK_MASTER)
    sc = SparkContext.getOrCreate(spark_conf)  # type: SparkContext

    wikidata = sc.textFile('s3a://entilzha-us-west-2/wikidata/wikidata-20170306-all.json')

    def parse_line(line):
        if len(line) == 0:
            return []
        if line[0] == '[' or line[0] == ']':
            return []
        elif line.endswith(','):
            return [json.loads(line[:-1])]
        else:
            return [json.loads(line)]

    parsed_wikidata = wikidata.flatMap(parse_line).cache()
    property_map = extract_property_map(parsed_wikidata)
    b_property_map = sc.broadcast(property_map)

    wikidata_items = parsed_wikidata.filter(lambda d: d['type'] == 'item').cache()
    parsed_wikidata.unpersist()
    item_page_map = extract_item_page_map(wikidata_items)
    b_item_page_map = sc.broadcast(item_page_map)

    parsed_item_map = extract_items(wikidata_items, b_property_map, b_item_page_map)

    with open(output, 'wb') as f:
        pickle.dump({
            'parsed_item_map': parsed_item_map,
            'item_page_map': item_page_map,
            'property_map': property_map
        }, f)

    sc.stop()
开发者ID:Pinafore,项目名称:qb,代码行数:37,代码来源:wikidata.py

示例4: _getScaleHintList

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _getScaleHintList():
    featurizer = SparkContext.getOrCreate()._jvm.com.databricks.sparkdl.DeepImageFeaturizer
    if isinstance(featurizer, py4j.java_gateway.JavaPackage):
        # do not see DeepImageFeaturizer, possibly running without spark
        # instead of failing return empty list
        return []
    return dict(featurizer.scaleHintsJava()).keys()
开发者ID:pawanrana,项目名称:spark-deep-learning,代码行数:9,代码来源:named_image.py

示例5: readImages

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImages(imageDirectory, numPartition=None):
    """
    Read a directory of images (or a single image) into a DataFrame.

    :param sc: spark context
    :param imageDirectory: str, file path.
    :param numPartition: int, number or partitions to use for reading files.
    :return: DataFrame, with columns: (filepath: str, image: imageSchema).
    """
    return _readImages(imageDirectory, numPartition, SparkContext.getOrCreate())
开发者ID:mateiz,项目名称:spark-deep-learning,代码行数:12,代码来源:imageIO.py

示例6: readImagesWithCustomFn

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImagesWithCustomFn(path, decode_f, numPartition=None):
    """
    Read a directory of images (or a single image) into a DataFrame using a custom library to
    decode the images.

    :param path: str, file path.
    :param decode_f: function to decode the raw bytes into an array compatible with one of the
        supported OpenCv modes. see @imageIO.PIL_decode for an example.
    :param numPartition: [optional] int, number or partitions to use for reading files.
    :return: DataFrame with schema == ImageSchema.imageSchema.
    """
    return _readImagesWithCustomFn(path, decode_f, numPartition, sc=SparkContext.getOrCreate())
开发者ID:pawanrana,项目名称:spark-deep-learning,代码行数:14,代码来源:imageIO.py

示例7: load_spark_context

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def load_spark_context(application_name=None):
    if application_name is None:
        application_name = __name__

    conf = SparkConf().setAppName(application_name)
    sc = SparkContext.getOrCreate(conf=conf)
    sql_context = SQLContext(sc)

    # Close logger
    # logger = sc._jvm.org.apache.log4j
    # logger.LogManager.getLogger("org").setLevel(logger.Level.ERROR)
    # logger.LogManager.getLogger("akka").setLevel(logger.Level.ERROR)
    return sc, sql_context
开发者ID:WarnWang,项目名称:Dissertation,代码行数:15,代码来源:__init__.py

示例8: callBigDlFunc

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def callBigDlFunc(bigdl_type, name, *args):
    """ Call API in PythonBigDL """
    sc = SparkContext.getOrCreate()
    if bigdl_type == "float":
        api = getattr(
            sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofFloat(),
            name)
    elif bigdl_type == "double":
        api = getattr(
            sc._jvm.com.intel.analytics.bigdl.python.api.PythonBigDL.ofDouble(),
            name)
    else:
        raise Exception("Not supported bigdl_type: %s" % bigdl_type)
    return callJavaFunc(sc, api, *args)
开发者ID:Kim-Seongjung,项目名称:BigDL,代码行数:16,代码来源:common.py

示例9: __init__

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
    def __init__(self, layers, bias=1.0, act_func=None, act_func_prime=None):
        if act_func is None:
            self.act_func = sigmoid
            self.act_func_prime = sigmoid_prime
        else:
            self.act_func = act_func
            self.act_func_prime = act_func_prime
        self.layers = layers

        self.bias = bias
        self.spark_context = SparkContext.getOrCreate()

        log4jLogger = self.spark_context._jvm.org.apache.log4j
        self.logger = log4jLogger.LogManager.getLogger(__name__)
开发者ID:WarnWang,项目名称:Dissertation,代码行数:16,代码来源:distributed_neural_network.py

示例10: installPackage

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
 def installPackage(self, artifact, base=None, sc=None):
     artifact = self._toArtifact(artifact)
     #Test if we already have a version installed
     res=self.fetchArtifact(artifact)
     fileLoc=None
     if res:
         fileLoc=res[1]
         print("Package already installed: {0}".format(str(artifact)))
     else:
         #download package
         art=[artifact]
         def _doDownload(d):
             artifact=art[0]
             if not artifact.version or artifact.version=='0':
                 artifact.version = d.resolver._find_latest_version_available(artifact)
             fileLoc = artifact.get_filename(self.DOWNLOAD_DIR)
             if os.path.isfile(fileLoc):
                 os.remove(fileLoc)
             results = d.download(artifact,filename=self.DOWNLOAD_DIR)
             if not results[1]:
                 raise Exception("Error downloading package {0}".format(str(artifact)))
             else:
                 artifact=results[0]
                 print("Artifact downloaded successfully {0}".format(str(artifact)))
                 printEx("Please restart Kernel to complete installation of the new package",PrintColors.RED)
             fileLoc=self.storeArtifact(artifact,base)
             return fileLoc
         
         try:
             fileLoc=_doDownload(downloader.Downloader(base) if base is not None else downloader.Downloader())
         except RequestException as e:
             #try another base
             try:
                 fileLoc=_doDownload(downloader.Downloader("http://dl.bintray.com/spark-packages/maven"))
             except RequestException as e:
                 print("Unable to install artifact {0}".format(e.msg))
                 raise
         except:
             print(str(sys.exc_info()[1]))
             raise
     if sc is None:
         sc = SparkContext.getOrCreate()
         
     if sc:
         #convert to file uri for windows platform
         if platform.system()=='Windows':
             fileLoc="file://" + urllib.pathname2url(fileLoc)
         sc.addPyFile(fileLoc)
         
     return artifact
开发者ID:ygoverdhan,项目名称:pixiedust,代码行数:52,代码来源:packageManager.py

示例11: isImage

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def isImage(df, column):
    """
    Returns True if the column contains images

    Args:
        df (DataFrame): The DataFrame to be processed
        column  (str): The name of the column being inspected

    Returns:
        bool: True if the colum is an image column
    """

    jvm = SparkContext.getOrCreate()._jvm
    schema = jvm.com.microsoft.ml.spark.schema.ImageSchema
    return schema.isImage(df._jdf, column)
开发者ID:donghaima,项目名称:mmlspark,代码行数:17,代码来源:ImageReader.py

示例12: toPython

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
        def toPython(entity):
            from py4j.java_gateway import JavaObject
            if entity is None or not isinstance(entity, JavaObject):
                return entity

            clazz = entity.getClass().getName()
            if clazz == "org.apache.spark.sql.Dataset":
                entity = entity.toDF()
                clazz = "org.apache.spark.sql.DataFrame"

            if clazz == "org.apache.spark.sql.DataFrame":
                from pyspark.sql import DataFrame, SQLContext
                from pyspark import SparkContext
                entity = DataFrame(entity, SQLContext(SparkContext.getOrCreate(), entity.sqlContext()))

            return entity
开发者ID:ibm-cds-labs,项目名称:pixiedust,代码行数:18,代码来源:__init__.py

示例13: readImages

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImages(sparkSession, path, recursive = False, sampleRatio = 1.0, inspectZip = True):
    """
    Reads the directory of images from the local or remote (WASB) source.
    This function is attached to SparkSession class.
    Example: spark.readImages(path, recursive, ...)

    Args:
        sparkSession (SparkSession): Existing sparkSession
        path (str): Path to the image directory
        recursive (bool): Recursive search flag
        sampleRatio (double): Fraction of the images loaded

    Returns:
        DataFrame: DataFrame with a single column of "images", see imageSchema for details
    """
    ctx = SparkContext.getOrCreate()
    reader = ctx._jvm.com.microsoft.ml.spark.ImageReader
    sql_ctx = pyspark.SQLContext.getOrCreate(ctx)
    jsession = sql_ctx.sparkSession._jsparkSession
    jresult = reader.read(path, recursive, jsession, float(sampleRatio), inspectZip)
    return DataFrame(jresult, sql_ctx)
开发者ID:donghaima,项目名称:mmlspark,代码行数:23,代码来源:ImageReader.py

示例14: train

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def train(filename):
    global model
    sc=SparkContext.getOrCreate()
    user = sc.textFile(filename)

    ratings=user.map(lambda l:l.split("\t")).map(lambda l:Rating(int(l[0]),int(l[1]),float(l[2])))
    #split into training & test 
    (training, test) = ratings.randomSplit([0.8, 0.2])
    testdata = test.map(lambda p: (p[0], p[1]))
    rank = 10
    numIterations = 10

    #training the model
    model = ALS.train(training, rank, numIterations)

    #validating the model
    predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
    ratesAndPreds = test.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
    MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
    rmse = sqrt(MSE)
    print rmse 
    print "training done"  
    return "OK"
开发者ID:naresh242,项目名称:tools,代码行数:25,代码来源:recommendation.py

示例15: test_active_session_with_None_and_not_None_context

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
 def test_active_session_with_None_and_not_None_context(self):
     from pyspark.context import SparkContext
     from pyspark.conf import SparkConf
     sc = None
     session = None
     try:
         sc = SparkContext._active_spark_context
         self.assertEqual(sc, None)
         activeSession = SparkSession.getActiveSession()
         self.assertEqual(activeSession, None)
         sparkConf = SparkConf()
         sc = SparkContext.getOrCreate(sparkConf)
         activeSession = sc._jvm.SparkSession.getActiveSession()
         self.assertFalse(activeSession.isDefined())
         session = SparkSession(sc)
         activeSession = sc._jvm.SparkSession.getActiveSession()
         self.assertTrue(activeSession.isDefined())
         activeSession2 = SparkSession.getActiveSession()
         self.assertNotEqual(activeSession2, None)
     finally:
         if session is not None:
             session.stop()
         if sc is not None:
             sc.stop()
开发者ID:Brett-A,项目名称:spark,代码行数:26,代码来源:test_session.py


注:本文中的pyspark.SparkContext.getOrCreate方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。