当前位置: 首页>>代码示例>>Python>>正文


Python SparkContext.getOrCreate方法代码示例

本文整理汇总了Python中pyspark.SparkContext.getOrCreate方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.getOrCreate方法的具体用法?Python SparkContext.getOrCreate怎么用?Python SparkContext.getOrCreate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkContext的用法示例。


在下文中一共展示了SparkContext.getOrCreate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_spark_context

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def create_spark_context(app_name="Quiz Bowl", configs=None) -> SparkContext:
    if QB_SPARK_MASTER != "":
        log.info("Spark master is %s" % QB_SPARK_MASTER)
        spark_conf = SparkConf()\
            .set('spark.rpc.message.maxSize', 300)\
            .setAppName(app_name)\
            .setMaster(QB_SPARK_MASTER)
    else:
        spark_conf = SparkConf()\
            .set('spark.rpc.message.maxSize', 300)\
            .setAppName(app_name)
    if configs is not None:
        for key, value in configs:
            if key in ('spark.executor.cores', 'spark.max.cores'):
                if value > QB_MAX_CORES:
                    log.info('Requested {r_cores} cores when the machine only has {n_cores} cores, reducing number of '
                             'cores to {n_cores}'.format(r_cores=value, n_cores=QB_MAX_CORES))
                    value = QB_MAX_CORES
            spark_conf = spark_conf.set(key, value)
    return SparkContext.getOrCreate(spark_conf) 
开发者ID:Pinafore,项目名称:qb,代码行数:22,代码来源:spark.py

示例2: parallelize

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def parallelize(self,
                    data: Iterable,
                    name,
                    namespace,
                    partition,
                    include_key,
                    persistent,
                    chunk_size,
                    in_place_computing,
                    create_if_missing,
                    error_if_exist):
        _iter = data if include_key else enumerate(data)
        from pyspark import SparkContext
        rdd = SparkContext.getOrCreate().parallelize(_iter, partition)
        rdd = util.materialize(rdd)
        if namespace is None:
            namespace = self._session_id
        return RDDTable.from_rdd(rdd=rdd, job_id=self._session_id, namespace=namespace, name=name) 
开发者ID:FederatedAI,项目名称:FATE,代码行数:20,代码来源:session.py

示例3: test_lf_applier_spark_preprocessor_memoized

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_lf_applier_spark_preprocessor_memoized(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)

        @preprocessor(memoize=True)
        def square_memoize(x: DataPoint) -> DataPoint:
            return Row(num=x.num, num_squared=x.num ** 2)

        @labeling_function(pre=[square_memoize])
        def fp_memoized(x: DataPoint) -> int:
            return 0 if x.num_squared > 42 else -1

        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, fp_memoized])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_PREPROCESS_EXPECTED) 
开发者ID:snorkel-team,项目名称:snorkel,代码行数:19,代码来源:test_spark.py

示例4: _load_pyfunc

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _load_pyfunc(path):
    """
    Load PyFunc implementation. Called by ``pyfunc.load_pyfunc``.

    :param path: Local filesystem path to the MLflow Model with the ``spark`` flavor.
    """
    # NOTE: The getOrCreate() call below may change settings of the active session which we do not
    # intend to do here. In particular, setting master to local[1] can break distributed clusters.
    # To avoid this problem, we explicitly check for an active session. This is not ideal but there
    # is no good workaround at the moment.
    import pyspark

    spark = pyspark.sql.SparkSession._instantiatedSession
    if spark is None:
        spark = pyspark.sql.SparkSession.builder.config("spark.python.worker.reuse", True) \
            .master("local[1]").getOrCreate()
    return _PyFuncModelWrapper(spark, _load_model(model_uri=path)) 
开发者ID:mlflow,项目名称:mlflow,代码行数:19,代码来源:spark.py

示例5: parse_raw_wikidata

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def parse_raw_wikidata(output):
    spark_conf = SparkConf().setAppName('QB Wikidata').setMaster(QB_SPARK_MASTER)
    sc = SparkContext.getOrCreate(spark_conf)  # type: SparkContext

    wikidata = sc.textFile('s3a://entilzha-us-west-2/wikidata/wikidata-20170306-all.json')

    def parse_line(line):
        if len(line) == 0:
            return []
        if line[0] == '[' or line[0] == ']':
            return []
        elif line.endswith(','):
            return [json.loads(line[:-1])]
        else:
            return [json.loads(line)]

    parsed_wikidata = wikidata.flatMap(parse_line).cache()
    property_map = extract_property_map(parsed_wikidata)
    b_property_map = sc.broadcast(property_map)

    wikidata_items = parsed_wikidata.filter(lambda d: d['type'] == 'item').cache()
    parsed_wikidata.unpersist()
    item_page_map = extract_item_page_map(wikidata_items)
    b_item_page_map = sc.broadcast(item_page_map)

    parsed_item_map = extract_items(wikidata_items, b_property_map, b_item_page_map)

    with open(output, 'wb') as f:
        pickle.dump({
            'parsed_item_map': parsed_item_map,
            'item_page_map': item_page_map,
            'property_map': property_map
        }, f)

    sc.stop() 
开发者ID:Pinafore,项目名称:qb,代码行数:37,代码来源:wikidata.py

示例6: create_spark_session

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def create_spark_session(app_name='Quiz Bowl', configs=None) -> SparkSession:
    create_spark_context(app_name=app_name, configs=configs)
    return SparkSession.builder.getOrCreate() 
开发者ID:Pinafore,项目名称:qb,代码行数:5,代码来源:spark.py

示例7: _getScaleHintList

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _getScaleHintList():
    featurizer = SparkContext.getOrCreate()._jvm.com.databricks.sparkdl.DeepImageFeaturizer
    if isinstance(featurizer, py4j.java_gateway.JavaPackage):
        # do not see DeepImageFeaturizer, possibly running without spark
        # instead of failing return empty list
        return []
    return dict(featurizer.scaleHintsJava()).keys() 
开发者ID:databricks,项目名称:spark-deep-learning,代码行数:9,代码来源:named_image.py

示例8: readImagesWithCustomFn

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def readImagesWithCustomFn(path, decode_f, numPartition=None):
    """
    Read a directory of images (or a single image) into a DataFrame using a custom library to
    decode the images.

    :param path: str, file path.
    :param decode_f: function to decode the raw bytes into an array compatible with one of the
        supported OpenCv modes. see @imageIO.PIL_decode for an example.
    :param numPartition: [optional] int, number or partitions to use for reading files.
    :return: DataFrame with schema == ImageSchema.imageSchema.
    """
    warnings.warn("readImagesWithCustomFn() will be removed in the next release of sparkdl. "
                  "Please use pillow and Pandas UDF instead.", DeprecationWarning)
    return _readImagesWithCustomFn(path, decode_f, numPartition, sc=SparkContext.getOrCreate()) 
开发者ID:databricks,项目名称:spark-deep-learning,代码行数:16,代码来源:imageIO.py

示例9: test_start_sentry_listener

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_start_sentry_listener():
    spark_context = SparkContext.getOrCreate()

    gateway = spark_context._gateway
    assert gateway._callback_server is None

    _start_sentry_listener(spark_context)

    assert gateway._callback_server is not None 
开发者ID:getsentry,项目名称:sentry-python,代码行数:11,代码来源:test_spark.py

示例10: _rdd_from_dtable

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _rdd_from_dtable(self):
        storage_iterator = self._dtable.get_all()
        if self._dtable.count() <= 0:
            storage_iterator = []

        num_partition = self._dtable.get_partitions()

        from pyspark import SparkContext
        self._rdd = SparkContext.getOrCreate() \
            .parallelize(storage_iterator, num_partition) \
            .persist(util.get_storage_level())
        return self._rdd 
开发者ID:FederatedAI,项目名称:FATE,代码行数:14,代码来源:table.py

示例11: broadcast_eggroll_session

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def broadcast_eggroll_session(work_mode, eggroll_session):
    import pickle
    pickled_client = pickle.dumps((work_mode.value, eggroll_session)).hex()
    from pyspark import SparkContext
    SparkContext.getOrCreate().setLocalProperty(_EGGROLL_CLIENT, pickled_client)


# noinspection PyProtectedMember,PyUnresolvedReferences 
开发者ID:FederatedAI,项目名称:FATE,代码行数:10,代码来源:util.py

示例12: _rdd_from_dtable

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def _rdd_from_dtable(self):
        storage_iterator = self._dtable.collect(use_serialize=True)
        if self._dtable.count() <= 0:
            storage_iterator = []

        num_partition = self._dtable._partitions
        from pyspark import SparkContext
        self._rdd = SparkContext.getOrCreate() \
            .parallelize(storage_iterator, num_partition) \
            .persist(util.get_storage_level())
        return self._rdd 
开发者ID:FederatedAI,项目名称:FATE,代码行数:13,代码来源:table.py

示例13: test_lf_applier_spark

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_lf_applier_spark(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, g])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_EXPECTED) 
开发者ID:snorkel-team,项目名称:snorkel,代码行数:10,代码来源:test_spark.py

示例14: test_lf_applier_spark_fault

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_lf_applier_spark_fault(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, f_bad])
        with self.assertRaises(Exception):
            applier.apply(rdd)
        L = applier.apply(rdd, fault_tolerant=True)
        np.testing.assert_equal(L, L_EXPECTED_BAD) 
开发者ID:snorkel-team,项目名称:snorkel,代码行数:12,代码来源:test_spark.py

示例15: test_lf_applier_spark_preprocessor

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getOrCreate [as 别名]
def test_lf_applier_spark_preprocessor(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, fp])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_PREPROCESS_EXPECTED) 
开发者ID:snorkel-team,项目名称:snorkel,代码行数:10,代码来源:test_spark.py


注:本文中的pyspark.SparkContext.getOrCreate方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。