当前位置: 首页>>代码示例>>Python>>正文


Python dataframe.from_pandas方法代码示例

本文整理汇总了Python中dask.dataframe.from_pandas方法的典型用法代码示例。如果您正苦于以下问题:Python dataframe.from_pandas方法的具体用法?Python dataframe.from_pandas怎么用?Python dataframe.from_pandas使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dask.dataframe的用法示例。


在下文中一共展示了dataframe.from_pandas方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lf_applier_pandas_spacy_preprocessor_memoized

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_lf_applier_pandas_spacy_preprocessor_memoized(self) -> None:
        spacy = SpacyPreprocessor(text_field="text", doc_field="doc")
        spacy.memoize = True

        @labeling_function(pre=[spacy])
        def first_is_name(x: DataPoint) -> int:
            return 0 if x.doc[0].pos_ == "PROPN" else -1

        @labeling_function(pre=[spacy])
        def has_verb(x: DataPoint) -> int:
            return 0 if sum(t.pos_ == "VERB" for t in x.doc) > 0 else -1

        df = pd.DataFrame(dict(text=TEXT_DATA))
        df = dd.from_pandas(df, npartitions=2)
        applier = DaskLFApplier([first_is_name, has_verb])
        L = applier.apply(df)
        np.testing.assert_equal(L, L_TEXT_EXPECTED) 
开发者ID:snorkel-team,项目名称:snorkel,代码行数:19,代码来源:test_lf_applier.py

示例2: test_to_holomap_dask

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_to_holomap_dask(self):
        if dd is None:
            raise SkipTest("Dask required to test .to with dask dataframe.")
        ddf = dd.from_pandas(self.df, npartitions=2)
        dds = Dataset(
            ddf,
            kdims=[
                Dimension('a', label="The a Column"),
                Dimension('b', label="The b Column"),
                Dimension('c', label="The c Column"),
                Dimension('d', label="The d Column"),
            ]
        )

        curve_hmap = dds.to(Curve, 'a', 'b', groupby=['c'])

        # Check HoloMap element datasets
        for v in self.df.c.drop_duplicates():
            curve = curve_hmap.data[(v,)]
            self.assertEqual(
                curve.dataset, self.ds
            )

            # Execute pipeline
            self.assertEqual(curve.pipeline(curve.dataset), curve) 
开发者ID:holoviz,项目名称:holoviews,代码行数:27,代码来源:testdatasetproperty.py

示例3: mock_dask_fit_data

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def mock_dask_fit_data(
    periods=DEF_N,
    start_date=None,
    ids=[0],
    embedding_dim=DEF_EMB_DIM,
    seq_length=DEF_SEQ_LENGTH
):
    """Create example fit data as a dask DataFrame.

    DataFrame is partitioned by ID.
    """
    df = mock_fit_data(
        periods=periods,
        start_date=start_date,
        ids=ids,
        embedding_dim=embedding_dim,
        seq_length=seq_length
    )
    ddf = dd.from_pandas(df, chunksize=periods)
    return ddf 
开发者ID:octoenergy,项目名称:timeserio,代码行数:22,代码来源:mock.py

示例4: mock_dask_raw_data

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def mock_dask_raw_data(
    periods=DEF_N,
    start_date=None,
    ids=[0]
):
    """Create example fit data as a dask DataFrame.

    DataFrame is partitioned by ID.
    """
    df = mock_raw_data(
        periods=periods,
        start_date=start_date,
        ids=ids,
    )
    ddf = dd.from_pandas(df, chunksize=periods)
    return ddf 
开发者ID:octoenergy,项目名称:timeserio,代码行数:18,代码来源:mock.py

示例5: test_typed_dask_dataframe

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_typed_dask_dataframe(builder):
    df_value = pd.DataFrame()
    df_value["int"] = [1, 2, 3]
    df_value["float"] = [1.0, 1.5, float("nan")]
    df_value["str"] = ["red", "blue", None]
    df_value["time"] = pd.to_datetime(["2011-02-07", "2011-03-17", "2011-04-27"])
    dask_df = dd.from_pandas(df_value, npartitions=1)

    @builder
    @bn.protocol.dask
    def df():
        return dask_df

    assert equal_frame_and_index_content(
        builder.build().get("df").compute(), dask_df.compute()
    )
    assert (
        builder.build().get("df").compute().dtypes.to_dict()
        == dask_df.compute().dtypes.to_dict()
    ) 
开发者ID:square,项目名称:bionic,代码行数:22,代码来源:test_protocols.py

示例6: test_incremental_text_pipeline

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_incremental_text_pipeline(container):
    X = pd.Series(["a list", "of words", "for classification"] * 100)
    X = dd.from_pandas(X, npartitions=3)

    if container == "bag":
        X = X.to_bag()

    y = da.from_array(np.array([0, 0, 1] * 100), chunks=(100,) * 3)

    assert tuple(X.map_partitions(len).compute()) == y.chunks[0]

    sgd = SGDClassifier(max_iter=5, tol=1e-3)
    clf = Incremental(sgd, scoring="accuracy", assume_equal_chunks=True)
    vect = dask_ml.feature_extraction.text.HashingVectorizer()
    pipe = make_pipeline(vect, clf)

    pipe.fit(X, y, incremental__classes=[0, 1])
    X2 = pipe.steps[0][1].transform(X)
    assert hasattr(clf, "coef_")

    X2.compute_chunk_sizes()
    assert X2.shape == (300, vect.n_features) 
开发者ID:dask,项目名称:dask-ml,代码行数:24,代码来源:test_incremental.py

示例7: test_grid_search_dask_dataframe

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_grid_search_dask_dataframe():
    iris = load_iris()
    X = iris.data
    y = iris.target

    df = pd.DataFrame(X)
    ddf = dd.from_pandas(df, 2)

    dy = pd.Series(y)
    ddy = dd.from_pandas(dy, 2)

    clf = LogisticRegression(multi_class="auto", solver="lbfgs", max_iter=200)

    param_grid = {"C": [0.1, 1, 10]}
    gs = GridSearchCV(clf, param_grid, cv=5)
    dgs = dcv.GridSearchCV(clf, param_grid, cv=5)
    gs.fit(df, dy)
    dgs.fit(ddf, ddy)

    assert gs.best_params_ == dgs.best_params_ 
开发者ID:dask,项目名称:dask-ml,代码行数:22,代码来源:test_model_selection.py

示例8: test_frame_strategies

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_frame_strategies(daskify, strategy):
    df = pd.DataFrame({"A": [1, 1, np.nan, np.nan, 2, 2]})
    if daskify:
        df = dd.from_pandas(df, 2)

    if strategy == "constant":
        fill_value = 2
    else:
        fill_value = None

    b = dask_ml.impute.SimpleImputer(strategy=strategy, fill_value=fill_value)
    b.fit(df)
    if not daskify and strategy == "median":
        expected = pd.Series([1.5], index=["A"])
    else:
        expected = pd.Series([2], index=["A"])
    tm.assert_series_equal(b.statistics_, expected, check_dtype=False) 
开发者ID:dask,项目名称:dask-ml,代码行数:19,代码来源:test_impute.py

示例9: test_block_transform_multiply

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_block_transform_multiply(self, daskify, validation, factor):
        X = np.arange(100).reshape((25, 4))
        df = pd.DataFrame(X).rename(columns=str)
        if daskify:
            X = da.from_array(X, chunks=(5, 4))
            df = dd.from_pandas(df, npartitions=2)
        if factor:
            bt = BlockTransformer(multiply, validate=validation, factor=factor)
        else:
            bt = BlockTransformer(multiply, validate=validation)
        if daskify:
            assert dask.is_dask_collection(bt.transform(X))
            assert dask.is_dask_collection(bt.transform(df))
        if factor:
            da.utils.assert_eq(bt.transform(X), multiply(X, factor=factor))
            dd.utils.assert_eq(bt.transform(df), multiply(df, factor=factor))
        else:
            da.utils.assert_eq(bt.transform(X), multiply(X))
            dd.utils.assert_eq(bt.transform(df), multiply(df)) 
开发者ID:dask,项目名称:dask-ml,代码行数:21,代码来源:test_block_transformer.py

示例10: test_validate

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_validate(self, mocker, daskify, validate):
        X = np.arange(100).reshape((25, 4))
        df = pd.DataFrame(X).rename(columns=str)
        if daskify:
            X = da.from_array(X, chunks=(5, 4))
            df = dd.from_pandas(df, npartitions=2)
        m = mocker.patch("dask_ml.preprocessing._block_transformer.check_array")
        bt = BlockTransformer(lambda x: x, validate=validate)
        if validate:
            _ = bt.transform(X)
            m.assert_called_once()
            m.reset_mock()
            _ = bt.transform(df)
            m.assert_called_once()
        else:
            _ = bt.transform(X)
            m.assert_not_called()
            _ = bt.transform(df)
            m.assert_not_called() 
开发者ID:dask,项目名称:dask-ml,代码行数:21,代码来源:test_block_transformer.py

示例11: test_inverse_transform

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_inverse_transform(self):
        enc = dpp.OrdinalEncoder()
        df = dd.from_pandas(
            pd.DataFrame(
                {"A": np.arange(10), "B": pd.Categorical(["a"] * 4 + ["b"] * 6)}
            ),
            npartitions=2,
        )
        enc.fit(df)

        assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df).values))
        assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df)))

        assert_eq_df(df, enc.inverse_transform(enc.transform(df)))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df)))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df).values))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df).values)) 
开发者ID:dask,项目名称:dask-ml,代码行数:19,代码来源:test_data.py

示例12: test_categorical

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_categorical(self, categories, transformed, daskify, ordered):
        cat = pd.Series(
            ["a", "b", "a"],
            dtype=pd.api.types.CategoricalDtype(categories=categories, ordered=ordered),
        )
        if daskify:
            cat = dd.from_pandas(cat, npartitions=2)
            transformed = da.from_array(transformed, chunks=(2, 1))
            if daskify == "unknown":
                cat = cat.cat.as_unknown()

        a = dpp.LabelEncoder().fit(cat)

        if daskify != "unknown":
            assert a.dtype_ == cat.dtype
        np.testing.assert_array_equal(a.classes_, categories)
        result = a.transform(cat)
        da.utils.assert_eq(result, transformed)

        inv_transformed = a.inverse_transform(result)
        if daskify:
            # manually set the divisions for the test
            inv_transformed.divisions = (0, 2)
        dd.utils.assert_eq(inv_transformed, cat) 
开发者ID:dask,项目名称:dask-ml,代码行数:26,代码来源:test_label.py

示例13: test_use_categorical

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def test_use_categorical(self, daskify):
        data = pd.Series(
            ["b", "c"], dtype=pd.api.types.CategoricalDtype(["c", "a", "b"])
        )
        if daskify:
            data = dd.from_pandas(data, npartitions=2)
        a = dpp.LabelEncoder(use_categorical=False).fit(data)
        b = spp.LabelEncoder().fit(data)
        assert_estimator_equal(a, b, exclude={"dtype_"})
        assert a.dtype_ is None

        da.utils.assert_eq(a.transform(data), b.transform(data))
        a_trn = a.transform(data)
        b_trn = b.transform(data)
        da.utils.assert_eq(a_trn, b_trn)
        da.utils.assert_eq(a.inverse_transform(a_trn), b.inverse_transform(b_trn)) 
开发者ID:dask,项目名称:dask-ml,代码行数:18,代码来源:test_label.py

示例14: __init__

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def __init__(
        self,
        pandas_obj,
        npartitions=None,
        dask_threshold=1,
        scheduler="processes",
        progress_bar=True,
        progress_bar_desc=None,
        allow_dask_on_strings=False,
    ):
        super(Transformation, self).__init__(
            pandas_obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings
        )
        self._sample_pd = pandas_obj.iloc[: self._SAMPLE_SIZE]
        self._obj_pd = pandas_obj
        self._obj_dd = dd.from_pandas(pandas_obj, npartitions=npartitions)
        self._nrows = pandas_obj.shape[0] 
开发者ID:jmcarpenter2,项目名称:swifter,代码行数:19,代码来源:swifter.py

示例15: _dask_apply

# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import from_pandas [as 别名]
def _dask_apply(self, func, *args, **kwds):
        try:
            # check that the dask rolling apply matches the pandas apply
            with suppress_stdout_stderr():
                tmp_df = (
                    dd.from_pandas(self._comparison_pd, npartitions=self._npartitions)
                    .rolling(**{k: v for k, v in self._rolling_kwds.items() if k not in ["on", "closed"]})
                    .apply(func, *args, **kwds)
                    .compute(scheduler=self._scheduler)
                )
                self._validate_apply(
                    tmp_df.equals(self._comparison_pd.rolling(**self._rolling_kwds).apply(func, *args, **kwds)),
                    error_message="Dask rolling apply sample does not match pandas rolling apply sample.",
                )
            if self._progress_bar:
                with TQDMDaskProgressBar(desc=self._progress_bar_desc or "Dask Apply"):
                    return self._obj_dd.apply(func, *args, **kwds).compute(scheduler=self._scheduler)
            else:
                return self._obj_dd.apply(func, *args, **kwds).compute(scheduler=self._scheduler)
        except ERRORS_TO_HANDLE:
            if self._progress_bar:
                tqdm.pandas(desc=self._progress_bar_desc or "Pandas Apply")
                return self._obj_pd.progress_apply(func, *args, **kwds)
            else:
                return self._obj_pd.apply(func, *args, **kwds) 
开发者ID:jmcarpenter2,项目名称:swifter,代码行数:27,代码来源:swifter.py


注:本文中的dask.dataframe.from_pandas方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。