當前位置: 首頁>>代碼示例>>Python>>正文


Python SparkContext.getOrCreate方法代碼示例

本文整理匯總了Python中pyspark.SparkContext.getOrCreate方法的典型用法代碼示例。如果您正苦於以下問題:Python SparkContext.getOrCreate方法的具體用法?Python SparkContext.getOrCreate怎麽用?Python SparkContext.getOrCreate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyspark.SparkContext的用法示例。


在下文中一共展示了SparkContext.getOrCreate方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: create_spark_context

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def create_spark_context(app_name="Quiz Bowl", configs=None) -> SparkContext:
    if QB_SPARK_MASTER != "":
        log.info("Spark master is %s" % QB_SPARK_MASTER)
        spark_conf = SparkConf()\
            .set('spark.rpc.message.maxSize', 300)\
            .setAppName(app_name)\
            .setMaster(QB_SPARK_MASTER)
    else:
        spark_conf = SparkConf()\
            .set('spark.rpc.message.maxSize', 300)\
            .setAppName(app_name)
    if configs is not None:
        for key, value in configs:
            if key in ('spark.executor.cores', 'spark.max.cores'):
                if value > QB_MAX_CORES:
                    log.info('Requested {r_cores} cores when the machine only has {n_cores} cores, reducing number of '
                             'cores to {n_cores}'.format(r_cores=value, n_cores=QB_MAX_CORES))
                    value = QB_MAX_CORES
            spark_conf = spark_conf.set(key, value)
    return SparkContext.getOrCreate(spark_conf) 
開發者ID:Pinafore,項目名稱:qb,代碼行數:22,代碼來源:spark.py

示例2: parallelize

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def parallelize(self,
                    data: Iterable,
                    name,
                    namespace,
                    partition,
                    include_key,
                    persistent,
                    chunk_size,
                    in_place_computing,
                    create_if_missing,
                    error_if_exist):
        _iter = data if include_key else enumerate(data)
        from pyspark import SparkContext
        rdd = SparkContext.getOrCreate().parallelize(_iter, partition)
        rdd = util.materialize(rdd)
        if namespace is None:
            namespace = self._session_id
        return RDDTable.from_rdd(rdd=rdd, job_id=self._session_id, namespace=namespace, name=name) 
開發者ID:FederatedAI,項目名稱:FATE,代碼行數:20,代碼來源:session.py

示例3: test_lf_applier_spark_preprocessor_memoized

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def test_lf_applier_spark_preprocessor_memoized(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)

        @preprocessor(memoize=True)
        def square_memoize(x: DataPoint) -> DataPoint:
            return Row(num=x.num, num_squared=x.num ** 2)

        @labeling_function(pre=[square_memoize])
        def fp_memoized(x: DataPoint) -> int:
            return 0 if x.num_squared > 42 else -1

        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, fp_memoized])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_PREPROCESS_EXPECTED) 
開發者ID:snorkel-team,項目名稱:snorkel,代碼行數:19,代碼來源:test_spark.py

示例4: _load_pyfunc

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def _load_pyfunc(path):
    """
    Load PyFunc implementation. Called by ``pyfunc.load_pyfunc``.

    :param path: Local filesystem path to the MLflow Model with the ``spark`` flavor.
    """
    # NOTE: The getOrCreate() call below may change settings of the active session which we do not
    # intend to do here. In particular, setting master to local[1] can break distributed clusters.
    # To avoid this problem, we explicitly check for an active session. This is not ideal but there
    # is no good workaround at the moment.
    import pyspark

    spark = pyspark.sql.SparkSession._instantiatedSession
    if spark is None:
        spark = pyspark.sql.SparkSession.builder.config("spark.python.worker.reuse", True) \
            .master("local[1]").getOrCreate()
    return _PyFuncModelWrapper(spark, _load_model(model_uri=path)) 
開發者ID:mlflow,項目名稱:mlflow,代碼行數:19,代碼來源:spark.py

示例5: parse_raw_wikidata

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def parse_raw_wikidata(output):
    spark_conf = SparkConf().setAppName('QB Wikidata').setMaster(QB_SPARK_MASTER)
    sc = SparkContext.getOrCreate(spark_conf)  # type: SparkContext

    wikidata = sc.textFile('s3a://entilzha-us-west-2/wikidata/wikidata-20170306-all.json')

    def parse_line(line):
        if len(line) == 0:
            return []
        if line[0] == '[' or line[0] == ']':
            return []
        elif line.endswith(','):
            return [json.loads(line[:-1])]
        else:
            return [json.loads(line)]

    parsed_wikidata = wikidata.flatMap(parse_line).cache()
    property_map = extract_property_map(parsed_wikidata)
    b_property_map = sc.broadcast(property_map)

    wikidata_items = parsed_wikidata.filter(lambda d: d['type'] == 'item').cache()
    parsed_wikidata.unpersist()
    item_page_map = extract_item_page_map(wikidata_items)
    b_item_page_map = sc.broadcast(item_page_map)

    parsed_item_map = extract_items(wikidata_items, b_property_map, b_item_page_map)

    with open(output, 'wb') as f:
        pickle.dump({
            'parsed_item_map': parsed_item_map,
            'item_page_map': item_page_map,
            'property_map': property_map
        }, f)

    sc.stop() 
開發者ID:Pinafore,項目名稱:qb,代碼行數:37,代碼來源:wikidata.py

示例6: create_spark_session

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def create_spark_session(app_name='Quiz Bowl', configs=None) -> SparkSession:
    create_spark_context(app_name=app_name, configs=configs)
    return SparkSession.builder.getOrCreate() 
開發者ID:Pinafore,項目名稱:qb,代碼行數:5,代碼來源:spark.py

示例7: _getScaleHintList

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def _getScaleHintList():
    featurizer = SparkContext.getOrCreate()._jvm.com.databricks.sparkdl.DeepImageFeaturizer
    if isinstance(featurizer, py4j.java_gateway.JavaPackage):
        # do not see DeepImageFeaturizer, possibly running without spark
        # instead of failing return empty list
        return []
    return dict(featurizer.scaleHintsJava()).keys() 
開發者ID:databricks,項目名稱:spark-deep-learning,代碼行數:9,代碼來源:named_image.py

示例8: readImagesWithCustomFn

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def readImagesWithCustomFn(path, decode_f, numPartition=None):
    """
    Read a directory of images (or a single image) into a DataFrame using a custom library to
    decode the images.

    :param path: str, file path.
    :param decode_f: function to decode the raw bytes into an array compatible with one of the
        supported OpenCv modes. see @imageIO.PIL_decode for an example.
    :param numPartition: [optional] int, number or partitions to use for reading files.
    :return: DataFrame with schema == ImageSchema.imageSchema.
    """
    warnings.warn("readImagesWithCustomFn() will be removed in the next release of sparkdl. "
                  "Please use pillow and Pandas UDF instead.", DeprecationWarning)
    return _readImagesWithCustomFn(path, decode_f, numPartition, sc=SparkContext.getOrCreate()) 
開發者ID:databricks,項目名稱:spark-deep-learning,代碼行數:16,代碼來源:imageIO.py

示例9: test_start_sentry_listener

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def test_start_sentry_listener():
    spark_context = SparkContext.getOrCreate()

    gateway = spark_context._gateway
    assert gateway._callback_server is None

    _start_sentry_listener(spark_context)

    assert gateway._callback_server is not None 
開發者ID:getsentry,項目名稱:sentry-python,代碼行數:11,代碼來源:test_spark.py

示例10: _rdd_from_dtable

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def _rdd_from_dtable(self):
        storage_iterator = self._dtable.get_all()
        if self._dtable.count() <= 0:
            storage_iterator = []

        num_partition = self._dtable.get_partitions()

        from pyspark import SparkContext
        self._rdd = SparkContext.getOrCreate() \
            .parallelize(storage_iterator, num_partition) \
            .persist(util.get_storage_level())
        return self._rdd 
開發者ID:FederatedAI,項目名稱:FATE,代碼行數:14,代碼來源:table.py

示例11: broadcast_eggroll_session

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def broadcast_eggroll_session(work_mode, eggroll_session):
    import pickle
    pickled_client = pickle.dumps((work_mode.value, eggroll_session)).hex()
    from pyspark import SparkContext
    SparkContext.getOrCreate().setLocalProperty(_EGGROLL_CLIENT, pickled_client)


# noinspection PyProtectedMember,PyUnresolvedReferences 
開發者ID:FederatedAI,項目名稱:FATE,代碼行數:10,代碼來源:util.py

示例12: _rdd_from_dtable

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def _rdd_from_dtable(self):
        storage_iterator = self._dtable.collect(use_serialize=True)
        if self._dtable.count() <= 0:
            storage_iterator = []

        num_partition = self._dtable._partitions
        from pyspark import SparkContext
        self._rdd = SparkContext.getOrCreate() \
            .parallelize(storage_iterator, num_partition) \
            .persist(util.get_storage_level())
        return self._rdd 
開發者ID:FederatedAI,項目名稱:FATE,代碼行數:13,代碼來源:table.py

示例13: test_lf_applier_spark

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def test_lf_applier_spark(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, g])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_EXPECTED) 
開發者ID:snorkel-team,項目名稱:snorkel,代碼行數:10,代碼來源:test_spark.py

示例14: test_lf_applier_spark_fault

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def test_lf_applier_spark_fault(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, f_bad])
        with self.assertRaises(Exception):
            applier.apply(rdd)
        L = applier.apply(rdd, fault_tolerant=True)
        np.testing.assert_equal(L, L_EXPECTED_BAD) 
開發者ID:snorkel-team,項目名稱:snorkel,代碼行數:12,代碼來源:test_spark.py

示例15: test_lf_applier_spark_preprocessor

# 需要導入模塊: from pyspark import SparkContext [as 別名]
# 或者: from pyspark.SparkContext import getOrCreate [as 別名]
def test_lf_applier_spark_preprocessor(self) -> None:
        sc = SparkContext.getOrCreate()
        sql = SQLContext(sc)
        df = pd.DataFrame(dict(num=DATA))
        rdd = sql.createDataFrame(df).rdd
        applier = SparkLFApplier([f, fp])
        L = applier.apply(rdd)
        np.testing.assert_equal(L, L_PREPROCESS_EXPECTED) 
開發者ID:snorkel-team,項目名稱:snorkel,代碼行數:10,代碼來源:test_spark.py


注:本文中的pyspark.SparkContext.getOrCreate方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。