当前位置: 首页>>代码示例>>Python>>正文


Python pyspark.__version__方法代码示例

本文整理汇总了Python中pyspark.__version__方法的典型用法代码示例。如果您正苦于以下问题:Python pyspark.__version__方法的具体用法?Python pyspark.__version__怎么用?Python pyspark.__version__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark的用法示例。


在下文中一共展示了pyspark.__version__方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: assert_pyspark_version

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def assert_pyspark_version():
    import logging

    pyspark_ver = None
    try:
        import pyspark
    except ImportError:
        raise ImportError(
            "Unable to import pyspark - consider doing a pip install with [spark] "
            "extra to install pyspark with pip"
        )
    else:
        pyspark_ver = getattr(pyspark, "__version__")
        if pyspark_ver is None or pyspark_ver < "2.4":
            logging.warning(
                'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format(
                    pyspark_ver if pyspark_ver is not None else "<unknown version>"
                )
            ) 
开发者ID:databricks,项目名称:koalas,代码行数:21,代码来源:__init__.py

示例2: test_rfloordiv

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_rfloordiv(self):
        pdf = pd.DataFrame(
            {"angles": [0, 3, 4], "degrees": [360, 180, 360]},
            index=["circle", "triangle", "rectangle"],
            columns=["angles", "degrees"],
        )
        kdf = ks.from_pandas(pdf)

        if LooseVersion(pd.__version__) < LooseVersion("1.0.0") and LooseVersion(
            pd.__version__
        ) >= LooseVersion("0.24.0"):
            expected_result = pd.DataFrame(
                {"angles": [np.inf, 3.0, 2.0], "degrees": [0.0, 0.0, 0.0]},
                index=["circle", "triangle", "rectangle"],
                columns=["angles", "degrees"],
            )
        else:
            expected_result = pdf.rfloordiv(10)

        self.assert_eq(kdf.rfloordiv(10), expected_result) 
开发者ID:databricks,项目名称:koalas,代码行数:22,代码来源:test_dataframe.py

示例3: test_repeat

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_repeat(self):
        pser = pd.Series(["a", "b", "c"], name="0", index=np.random.rand(3))
        kser = ks.from_pandas(pser)

        self.assert_eq(kser.repeat(3).sort_index(), pser.repeat(3).sort_index())
        self.assert_eq(kser.repeat(0).sort_index(), pser.repeat(0).sort_index())

        self.assertRaises(ValueError, lambda: kser.repeat(-1))
        self.assertRaises(ValueError, lambda: kser.repeat("abc"))

        pdf = pd.DataFrame({"a": ["a", "b", "c"], "rep": [10, 20, 30]}, index=np.random.rand(3))
        kdf = ks.from_pandas(pdf)

        if LooseVersion(pyspark.__version__) < LooseVersion("2.4"):
            self.assertRaises(ValueError, lambda: kdf.a.repeat(kdf.rep))
        else:
            self.assert_eq(kdf.a.repeat(kdf.rep).sort_index(), pdf.a.repeat(pdf.rep).sort_index()) 
开发者ID:databricks,项目名称:koalas,代码行数:19,代码来源:test_series.py

示例4: test_div_zero_and_nan

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_div_zero_and_nan(self):
        pser = pd.Series([100, None, -300, None, 500, -700, np.inf, -np.inf], name="Koalas")
        kser = ks.from_pandas(pser)

        self.assert_eq(repr(pser.div(0)), repr(kser.div(0)))
        self.assert_eq(repr(pser.truediv(0)), repr(kser.truediv(0)))
        self.assert_eq(repr(pser / 0), repr(kser / 0))
        self.assert_eq(repr(pser.div(np.nan)), repr(kser.div(np.nan)))
        self.assert_eq(repr(pser.truediv(np.nan)), repr(kser.truediv(np.nan)))
        self.assert_eq(repr(pser / np.nan), repr(kser / np.nan))

        # floordiv has different behavior in pandas > 1.0.0 when divide by 0
        if LooseVersion(pd.__version__) >= LooseVersion("1.0.0"):
            self.assert_eq(repr(pser.floordiv(0)), repr(kser.floordiv(0)))
            self.assert_eq(repr(pser // 0), repr(kser // 0))
        else:
            result = pd.Series(
                [np.inf, np.nan, -np.inf, np.nan, np.inf, -np.inf, np.inf, -np.inf], name="Koalas"
            )
            self.assert_eq(repr(kser.floordiv(0)), repr(result))
            self.assert_eq(repr(kser // 0), repr(result))
        self.assert_eq(repr(pser.floordiv(np.nan)), repr(kser.floordiv(np.nan))) 
开发者ID:databricks,项目名称:koalas,代码行数:24,代码来源:test_series.py

示例5: apply_async

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def apply_async(self, func, callback=None):
        # Note the `func` args is a batch here. (BatchedCalls type)
        # See joblib.parallel.Parallel._dispatch
        def run_on_worker_and_fetch_result():
            # TODO: handle possible spark exception here. # pylint: disable=fixme
            rdd = self._spark.sparkContext.parallelize([0], 1) \
                .map(lambda _: cloudpickle.dumps(func()))
            if VersionUtils.majorMinorVersion(pyspark.__version__)[0] < 3:
                ser_res = rdd.collect()[0]
            else:
                ser_res = rdd.collectWithJobGroup(self._job_group, "joblib spark jobs")[0]
            return cloudpickle.loads(ser_res)

        return self._get_pool().apply_async(
            SafeFunction(run_on_worker_and_fetch_result),
            callback=callback
        ) 
开发者ID:joblib,项目名称:joblib-spark,代码行数:19,代码来源:backend.py

示例6: get_default_conda_env

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def get_default_conda_env():
    """
    :return: The default Conda environment for MLflow Models produced by calls to
             :func:`save_model()` and :func:`log_model()`. This Conda environment
             contains the current version of PySpark that is installed on the caller's
             system. ``dev`` versions of PySpark are replaced with stable versions in
             the resulting Conda environment (e.g., if you are running PySpark version
             ``2.4.5.dev0``, invoking this method produces a Conda environment with a
             dependency on PySpark version ``2.4.5``).
    """
    import pyspark
    # Strip the suffix from `dev` versions of PySpark, which are not
    # available for installation from Anaconda or PyPI
    pyspark_version = re.sub(r"(\.?)dev.*", "", pyspark.__version__)

    return _mlflow_conda_env(
        additional_conda_deps=[
            "pyspark={}".format(pyspark_version),
        ],
        additional_pip_deps=None,
        additional_conda_channels=None) 
开发者ID:mlflow,项目名称:mlflow,代码行数:23,代码来源:spark.py

示例7: version

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def version(self):
        return parse_version(ps.__version__) 
开发者ID:ibis-project,项目名称:ibis,代码行数:4,代码来源:client.py

示例8: __enter__

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def __enter__(self):
        # import locally to avoid importing tensorflow globally.
        from petastorm.tf_utils import make_petastorm_dataset
        import tensorflow.compat.v1 as tf  # pylint: disable=import-error

        _wait_file_available(self.parquet_file_url_list)
        self.reader = make_batch_reader(self.parquet_file_url_list, **self.petastorm_reader_kwargs)

        # unroll dataset
        dataset = make_petastorm_dataset(self.reader).flat_map(
            tf.data.Dataset.from_tensor_slices)

        # TODO: auto tune best batch size in default case.
        batch_size = self.batch_size or 32
        dataset = dataset.batch(batch_size=batch_size)

        prefetch = self.prefetch

        if prefetch is None:
            if LooseVersion(tf.__version__) >= LooseVersion('1.14'):
                # We can make prefetch optimization
                prefetch = tf.data.experimental.AUTOTUNE
            else:
                prefetch = 1

        dataset = dataset.prefetch(prefetch)

        return dataset 
开发者ID:uber,项目名称:petastorm,代码行数:30,代码来源:spark_dataset_converter.py

示例9: default_session

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def default_session(conf=None):
    if conf is None:
        conf = dict()
    should_use_legacy_ipc = False
    if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion(
        pyspark.__version__
    ) < LooseVersion("3.0"):
        conf["spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        should_use_legacy_ipc = True

    builder = spark.SparkSession.builder.appName("Koalas")
    for key, value in conf.items():
        builder = builder.config(key, value)
    # Currently, Koalas is dependent on such join due to 'compute.ops_on_diff_frames'
    # configuration. This is needed with Spark 3.0+.
    builder.config("spark.sql.analyzer.failAmbiguousSelfJoin", False)
    session = builder.getOrCreate()

    if not should_use_legacy_ipc:
        is_legacy_ipc_set = any(
            v == "1"
            for v in [
                session.conf.get("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
            ]
        )
        if is_legacy_ipc_set:
            raise RuntimeError(
                "Please explicitly unset 'ARROW_PRE_0_15_IPC_FORMAT' environment variable in "
                "both driver and executor sides. Check your spark.executorEnv.*, "
                "spark.yarn.appMasterEnv.*, spark.mesos.driverEnv.* and "
                "spark.kubernetes.driverEnv.* configurations. It is required to set this "
                "environment variable only when you use pyarrow>=0.15 and pyspark<3.0."
            )
    return session 
开发者ID:databricks,项目名称:koalas,代码行数:42,代码来源:utils.py

示例10: value_counts

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
        if (
            LooseVersion(pyspark.__version__) < LooseVersion("2.4")
            and default_session().conf.get("spark.sql.execution.arrow.enabled") == "true"
            and isinstance(self, MultiIndex)
        ):
            raise RuntimeError(
                "if you're using pyspark < 2.4, set conf "
                "'spark.sql.execution.arrow.enabled' to 'false' "
                "for using this function with MultiIndex"
            )
        return super(MultiIndex, self).value_counts(
            normalize=normalize, sort=sort, ascending=ascending, bins=bins, dropna=dropna
        ) 
开发者ID:databricks,项目名称:koalas,代码行数:16,代码来源:indexes.py

示例11: test_udt

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_udt(self):
        sparse_values = {0: 0.1, 1: 1.1}
        sparse_vector = SparseVector(len(sparse_values), sparse_values)
        pdf = pd.DataFrame({"a": [sparse_vector], "b": [10]})

        if LooseVersion(pyspark.__version__) < LooseVersion("2.4"):
            with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
                kdf = ks.from_pandas(pdf)
                self.assert_eq(kdf, pdf)
        else:
            kdf = ks.from_pandas(pdf)
            self.assert_eq(kdf, pdf) 
开发者ID:databricks,项目名称:koalas,代码行数:14,代码来源:test_dataframe.py

示例12: test_to_frame

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_to_frame(self):
        pidx = self.pdf.index
        kidx = self.kdf.index

        self.assert_eq(repr(kidx.to_frame()), repr(pidx.to_frame()))
        self.assert_eq(repr(kidx.to_frame(index=False)), repr(pidx.to_frame(index=False)))

        pidx.name = "a"
        kidx.name = "a"

        self.assert_eq(repr(kidx.to_frame()), repr(pidx.to_frame()))
        self.assert_eq(repr(kidx.to_frame(index=False)), repr(pidx.to_frame(index=False)))

        if LooseVersion(pd.__version__) >= LooseVersion("0.24"):
            # The `name` argument is added in pandas 0.24.
            self.assert_eq(repr(kidx.to_frame(name="x")), repr(pidx.to_frame(name="x")))
            self.assert_eq(
                repr(kidx.to_frame(index=False, name="x")),
                repr(pidx.to_frame(index=False, name="x")),
            )

        pidx = self.pdf.set_index("b", append=True).index
        kidx = self.kdf.set_index("b", append=True).index

        self.assert_eq(repr(kidx.to_frame()), repr(pidx.to_frame()))
        self.assert_eq(repr(kidx.to_frame(index=False)), repr(pidx.to_frame(index=False)))

        if LooseVersion(pd.__version__) >= LooseVersion("0.24"):
            # The `name` argument is added in pandas 0.24.
            self.assert_eq(
                repr(kidx.to_frame(name=["x", "y"])), repr(pidx.to_frame(name=["x", "y"]))
            )
            self.assert_eq(
                repr(kidx.to_frame(index=False, name=["x", "y"])),
                repr(pidx.to_frame(index=False, name=["x", "y"])),
            ) 
开发者ID:databricks,项目名称:koalas,代码行数:38,代码来源:test_indexes.py

示例13: test_multi_index_names

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_multi_index_names(self):
        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
        kdf = ks.from_pandas(pdf)

        self.assertEqual(kdf.index.names, pdf.index.names)

        pidx = pdf.index
        kidx = kdf.index
        pidx.names = ["renamed_number", "renamed_color"]
        kidx.names = ["renamed_number", "renamed_color"]
        self.assertEqual(kidx.names, pidx.names)

        pidx.names = ["renamed_number", None]
        kidx.names = ["renamed_number", None]
        self.assertEqual(kidx.names, pidx.names)
        if LooseVersion(pyspark.__version__) < LooseVersion("2.4"):
            # PySpark < 2.4 does not support struct type with arrow enabled.
            with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
                self.assert_eq(kidx, pidx)
        else:
            self.assert_eq(kidx, pidx)

        with self.assertRaises(PandasNotImplementedError):
            kidx.name
        with self.assertRaises(PandasNotImplementedError):
            kidx.name = "renamed" 
开发者ID:databricks,项目名称:koalas,代码行数:30,代码来源:test_indexes.py

示例14: test_value_counts

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_value_counts(self):
        if LooseVersion(pyspark.__version__) < LooseVersion("2.4"):
            with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
                self._test_value_counts()
            self.assertRaises(
                RuntimeError,
                lambda: ks.MultiIndex.from_tuples([("x", "a"), ("x", "b")]).value_counts(),
            )
        else:
            self._test_value_counts() 
开发者ID:databricks,项目名称:koalas,代码行数:12,代码来源:test_series.py

示例15: test_to_list

# 需要导入模块: import pyspark [as 别名]
# 或者: from pyspark import __version__ [as 别名]
def test_to_list(self):
        if LooseVersion(pd.__version__) >= LooseVersion("0.24.0"):
            self.assertEqual(self.kser.to_list(), self.pser.to_list()) 
开发者ID:databricks,项目名称:koalas,代码行数:5,代码来源:test_series.py


注:本文中的pyspark.__version__方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。