本文整理汇总了Python中dask.dataframe.Series方法的典型用法代码示例。如果您正苦于以下问题:Python dataframe.Series方法的具体用法?Python dataframe.Series怎么用?Python dataframe.Series使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask.dataframe
的用法示例。
在下文中一共展示了dataframe.Series方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_dask_data
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def extract_dask_data(data):
"""Extract data from dask.Series or dask.DataFrame for predictors.
Given a distributed dask.DataFrame or dask.Series containing columns or names
for one or more predictors, this operation returns a single dask.DataFrame or
dask.Series that can be iterated over.
Args:
data: A distributed dask.DataFrame or dask.Series.
Returns:
A dask.DataFrame or dask.Series that can be iterated over.
If the supplied argument is neither a dask.DataFrame nor a dask.Series this
operation returns it without modification.
"""
if isinstance(data, allowed_classes):
return _construct_dask_df_with_divisions(data)
else:
return data
示例2: _validate_parameters
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def _validate_parameters(self, X, y):
if (self.max_iter is not None) and self.max_iter < 1:
raise ValueError(
"Received max_iter={}. max_iter < 1 is not supported".format(
self.max_iter
)
)
# Make sure dask arrays are passed so error on unknown chunk size is raised
if isinstance(X, dd.DataFrame):
X = X.to_dask_array()
if isinstance(y, (dd.DataFrame, dd.Series)):
y = y.to_dask_array()
kwargs = dict(accept_unknown_chunks=False, accept_dask_dataframe=False)
X = self._check_array(X, **kwargs)
y = self._check_array(y, ensure_2d=False, **kwargs)
scorer = check_scoring(self.estimator, scoring=self.scoring)
return X, y, scorer
示例3: _check_array
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def _check_array(self, y: Union[ArrayLike, SeriesType]):
if isinstance(y, (dd.Series, pd.DataFrame)):
y = y.squeeze()
if y.ndim > 1:
raise ValueError("Expected a 1-D array or Series.")
if not self.use_categorical:
if isinstance(y, dd.Series):
y = y.to_dask_array(lengths=True)
elif isinstance(y, pd.Series):
y = np.asarray(y)
if isinstance(y, dd.Series):
if pd.api.types.is_categorical_dtype(y):
# TODO(dask-3784): just call y.cat.as_known()
# https://github.com/dask/dask/issues/3784
if not y.cat.known:
y = y.cat.as_known()
else:
y = y.to_dask_array(lengths=True)
return y
示例4: _construct_dask_df_with_divisions
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def _construct_dask_df_with_divisions(df):
"""Construct the new task graph and make a new dask.dataframe around it."""
divisions = _get_divisions(df)
# pylint: disable=protected-access
name = 'csv-index' + df._name
dsk = {(name, i): (_add_to_index, (df._name, i), divisions[i])
for i in range(df.npartitions)}
# pylint: enable=protected-access
from toolz import merge # pylint: disable=g-import-not-at-top
if isinstance(df, dd.DataFrame):
return dd.DataFrame(merge(dsk, df.dask), name, df.columns, divisions)
elif isinstance(df, dd.Series):
return dd.Series(merge(dsk, df.dask), name, df.name, divisions)
示例5: extract_dask_labels
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def extract_dask_labels(labels):
"""Extract data from dask.Series or dask.DataFrame for labels.
Given a distributed dask.DataFrame or dask.Series containing exactly one
column or name, this operation returns a single dask.DataFrame or dask.Series
that can be iterated over.
Args:
labels: A distributed dask.DataFrame or dask.Series with exactly one
column or name.
Returns:
A dask.DataFrame or dask.Series that can be iterated over.
If the supplied argument is neither a dask.DataFrame nor a dask.Series this
operation returns it without modification.
Raises:
ValueError: If the supplied dask.DataFrame contains more than one
column or the supplied dask.Series contains more than
one name.
"""
if isinstance(labels, dd.DataFrame):
ncol = labels.columns
elif isinstance(labels, dd.Series):
ncol = labels.name
if isinstance(labels, allowed_classes):
if len(ncol) > 1:
raise ValueError('Only one column for labels is allowed.')
return _construct_dask_df_with_divisions(labels)
else:
return labels
示例6: _access
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def _access(data, iloc):
"""Accesses an element from collection, using integer location based indexing.
Args:
data: array-like. The collection to access
iloc: `int` or `list` of `int`s. Location(s) to access in `collection`
Returns:
The element of `a` found at location(s) `iloc`.
"""
if HAS_PANDAS:
import pandas as pd # pylint: disable=g-import-not-at-top
if isinstance(data, pd.Series) or isinstance(data, pd.DataFrame):
return data.iloc[iloc]
return data[iloc]
示例7: setup_train_data_feeder
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def setup_train_data_feeder(
x, y, n_classes, batch_size=None, shuffle=True, epochs=None):
"""Create data feeder, to sample inputs from dataset.
If `x` and `y` are iterators, use `StreamingDataFeeder`.
Args:
x: numpy, pandas or Dask matrix or iterable.
y: numpy, pandas or Dask array or iterable.
n_classes: number of classes.
batch_size: size to split data into parts. Must be >= 1.
shuffle: Whether to shuffle the inputs.
epochs: Number of epochs to run.
Returns:
DataFeeder object that returns training data.
Raises:
ValueError: if one of `x` and `y` is iterable and the other is not.
"""
x, y = _data_type_filter(x, y)
if HAS_DASK:
# pylint: disable=g-import-not-at-top
import dask.dataframe as dd
if (isinstance(x, (dd.Series, dd.DataFrame)) and
(y is None or isinstance(y, (dd.Series, dd.DataFrame)))):
data_feeder_cls = DaskDataFeeder
else:
data_feeder_cls = DataFeeder
else:
data_feeder_cls = DataFeeder
if _is_iterable(x):
if y is not None and not _is_iterable(y):
raise ValueError('Both x and y should be iterators for '
'streaming learning to work.')
return StreamingDataFeeder(x, y, n_classes, batch_size)
return data_feeder_cls(
x, y, n_classes, batch_size, shuffle=shuffle, epochs=epochs)
示例8: is_series
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def is_series(data):
if not check_library(data, ['dask', 'streamz', 'pandas']):
return False
elif isinstance(data, pd.Series):
return True
elif check_library(data, 'streamz'):
import streamz.dataframe as sdf
return isinstance(data, (sdf.Series, sdf.Seriess))
elif check_library(data, 'dask'):
import dask.dataframe as dd
return isinstance(data, dd.Series)
else:
return False
示例9: is_cudf
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def is_cudf(data):
if 'cudf' in sys.modules:
from cudf import DataFrame, Series
return isinstance(data, (DataFrame, Series))
示例10: is_dask
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def is_dask(data):
if not check_library(data, 'dask'):
return False
import dask.dataframe as dd
return isinstance(data, (dd.DataFrame, dd.Series))
示例11: is_streamz
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def is_streamz(data):
if not check_library(data, 'streamz'):
return False
import streamz.dataframe as sdf
return sdf and isinstance(data, (sdf.DataFrame, sdf.Series, sdf.DataFrames, sdf.Seriess))
示例12: patch
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def patch(name='hvplot', extension='bokeh', logo=False):
from . import hvPlotTabular, post_patch
try:
import dask.dataframe as dd
except:
raise ImportError('Could not patch plotting API onto dask. '
'Dask could not be imported.')
_patch_plot = lambda self: hvPlotTabular(self)
_patch_plot.__doc__ = hvPlotTabular.__call__.__doc__
patch_property = property(_patch_plot)
setattr(dd.DataFrame, name, patch_property)
setattr(dd.Series, name, patch_property)
post_patch(extension, logo)
示例13: default
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def default(self, obj):
if isinstance(obj, set):
return hash(frozenset(obj))
elif isinstance(obj, np.ndarray):
return obj.tolist()
if pd and isinstance(obj, (pd.Series, pd.DataFrame)):
return obj.to_csv(header=True).encode('utf-8')
elif isinstance(obj, self.string_hashable):
return str(obj)
elif isinstance(obj, self.repr_hashable):
return repr(obj)
try:
return hash(obj)
except:
return id(obj)
示例14: is_series
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def is_series(data):
"""
Checks whether the supplied data is of Series type.
"""
dd = None
if 'dask.dataframe' in sys.modules:
import dask.dataframe as dd
return((pd is not None and isinstance(data, pd.Series)) or
(dd is not None and isinstance(data, dd.Series)))
示例15: applies
# 需要导入模块: from dask import dataframe [as 别名]
# 或者: from dask.dataframe import Series [as 别名]
def applies(cls, obj):
if not cls.loaded():
return False
import dask.dataframe as dd
return isinstance(obj, (dd.DataFrame, dd.Series))