本文整理汇总了Python中xarray.Dataset.copy方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.copy方法的具体用法?Python Dataset.copy怎么用?Python Dataset.copy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xarray.Dataset
的用法示例。
在下文中一共展示了Dataset.copy方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_roundtrip_strings_with_fill_value
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def test_roundtrip_strings_with_fill_value(self):
values = np.array(['ab', 'cdef', np.nan], dtype=object)
encoding = {'_FillValue': np.string_('X'), 'dtype': np.dtype('S1')}
original = Dataset({'x': ('t', values, {}, encoding)})
expected = original.copy(deep=True)
expected['x'][:2] = values[:2].astype('S')
with self.roundtrip(original) as actual:
self.assertDatasetIdentical(expected, actual)
original = Dataset({'x': ('t', values, {}, {'_FillValue': '\x00'})})
if not isinstance(self, Only32BitTypes):
# these stores can save unicode strings
expected = original.copy(deep=True)
if isinstance(self, BaseNetCDF4Test):
# netCDF4 can't keep track of an empty _FillValue for VLEN
# variables
expected['x'][-1] = ''
elif (isinstance(self, (NetCDF3ViaNetCDF4DataTest,
NetCDF4ClassicViaNetCDF4DataTest)) or
(has_netCDF4 and type(self) is GenericNetCDFDataTest)):
# netCDF4 can't keep track of an empty _FillValue for nc3, either:
# https://github.com/Unidata/netcdf4-python/issues/273
expected['x'][-1] = np.string_('')
with self.roundtrip(original) as actual:
self.assertDatasetIdentical(expected, actual)
示例2: adjust_spatial_attrs_impl
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def adjust_spatial_attrs_impl(ds: xr.Dataset, allow_point: bool) -> xr.Dataset:
"""
Adjust the global spatial attributes of the dataset by doing some
introspection of the dataset and adjusting the appropriate attributes
accordingly.
In case the determined attributes do not exist in the dataset, these will
be added.
For more information on suggested global attributes see
`Attribute Convention for Data Discovery
<http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery>`_
:param ds: Dataset to adjust
:param allow_point: Whether to accept single point cells
:return: Adjusted dataset
"""
copied = False
for dim in ('lon', 'lat'):
geo_spatial_attrs = _get_geo_spatial_cf_attrs_from_var(ds, dim, allow_point=allow_point)
if geo_spatial_attrs:
# Copy any new attributes into the shallow Dataset copy
for key in geo_spatial_attrs:
if geo_spatial_attrs[key] is not None:
if not copied:
ds = ds.copy()
copied = True
ds.attrs[key] = geo_spatial_attrs[key]
lon_min = ds.attrs.get('geospatial_lon_min')
lat_min = ds.attrs.get('geospatial_lat_min')
lon_max = ds.attrs.get('geospatial_lon_max')
lat_max = ds.attrs.get('geospatial_lat_max')
if lon_min is not None and lat_min is not None and lon_max is not None and lat_max is not None:
if not copied:
ds = ds.copy()
ds.attrs['geospatial_bounds'] = 'POLYGON(({} {}, {} {}, {} {}, {} {}, {} {}))'. \
format(lon_min, lat_min, lon_min, lat_max, lon_max, lat_max, lon_max, lat_min, lon_min, lat_min)
# Determination of the following attributes from introspection in a general
# way is ambiguous, hence it is safer to drop them than to risk preserving
# out of date attributes.
drop = ['geospatial_bounds_crs', 'geospatial_bounds_vertical_crs',
'geospatial_vertical_min', 'geospatial_vertical_max',
'geospatial_vertical_positive', 'geospatial_vertical_units',
'geospatial_vertical_resolution']
for key in drop:
ds.attrs.pop(key, None)
return ds
示例3: test_roundtrip_object_dtype
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def test_roundtrip_object_dtype(self):
floats = np.array([0.0, 0.0, 1.0, 2.0, 3.0], dtype=object)
floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object)
letters = np.array(['ab', 'cdef', 'g'], dtype=object)
letters_nans = np.array(['ab', 'cdef', np.nan], dtype=object)
all_nans = np.array([np.nan, np.nan], dtype=object)
original = Dataset({'floats': ('a', floats),
'floats_nans': ('a', floats_nans),
'letters': ('b', letters),
'letters_nans': ('b', letters_nans),
'all_nans': ('c', all_nans),
'nan': ([], np.nan)})
expected = original.copy(deep=True)
if isinstance(self, Only32BitTypes):
# for netCDF3 tests, expect the results to come back as characters
expected['letters_nans'] = expected['letters_nans'].astype('S')
expected['letters'] = expected['letters'].astype('S')
with self.roundtrip(original) as actual:
try:
self.assertDatasetIdentical(expected, actual)
except AssertionError:
# Most stores use '' for nans in strings, but some don't
# first try the ideal case (where the store returns exactly)
# the original Dataset), then try a more realistic case.
# ScipyDataTest, NetCDF3ViaNetCDF4DataTest and NetCDF4DataTest
# all end up using this case.
expected['letters_nans'][-1] = ''
self.assertDatasetIdentical(expected, actual)
示例4: adjust_temporal_attrs_impl
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
"""
Adjust the global temporal attributes of the dataset by doing some
introspection of the dataset and adjusting the appropriate attributes
accordingly.
In case the determined attributes do not exist in the dataset, these will
be added.
For more information on suggested global attributes see
`Attribute Convention for Data Discovery
<http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery>`_
:param ds: Dataset to adjust
:return: Adjusted dataset
"""
temporal_attrs = _get_temporal_cf_attrs_from_var(ds)
if temporal_attrs:
ds = ds.copy()
# Align temporal attributes with the ones from the shallow Dataset copy
for key in temporal_attrs:
if temporal_attrs[key] is not None:
ds.attrs[key] = temporal_attrs[key]
else:
ds.attrs.pop(key, None)
return ds
示例5: _normalize_dim_order
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def _normalize_dim_order(ds: xr.Dataset) -> xr.Dataset:
copy_created = False
for var_name in ds.data_vars:
var = ds[var_name]
dim_names = list(var.dims)
num_dims = len(dim_names)
if num_dims == 0:
continue
must_transpose = False
if 'time' in dim_names:
time_index = dim_names.index('time')
if time_index > 0:
must_transpose = _swap_pos(dim_names, time_index, 0)
if num_dims >= 2 and 'lat' in dim_names and 'lon' in dim_names:
lat_index = dim_names.index('lat')
if lat_index != num_dims - 2:
must_transpose = _swap_pos(dim_names, lat_index, -2)
lon_index = dim_names.index('lon')
if lon_index != num_dims - 1:
must_transpose = _swap_pos(dim_names, lon_index, -1)
if must_transpose:
if not copy_created:
ds = ds.copy()
copy_created = True
ds[var_name] = var.transpose(*dim_names)
return ds
示例6: anomaly_internal
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def anomaly_internal(ds: xr.Dataset,
time_range: TimeRangeLike.TYPE = None,
region: PolygonLike.TYPE = None,
monitor: Monitor = Monitor.NONE) -> xr.Dataset:
"""
Calculate anomaly using as reference data the mean of an optional region
and time slice from the given dataset. If no time slice/spatial region is
given, the operation will calculate anomaly using the mean of the whole
dataset as the reference.
This is done for each data array in the dataset.
:param ds: The dataset to calculate anomalies from
:param time_range: Time range to use for reference data
:param region: Spatial region to use for reference data
:param monitor: a progress monitor.
:return: The anomaly dataset
"""
ref = ds.copy()
if time_range:
time_range = TimeRangeLike.convert(time_range)
ref = subset_temporal(ref, time_range)
if region:
region = PolygonLike.convert(region)
ref = subset_spatial(ref, region)
with monitor.observing("Calculating anomaly"):
ref = ref.mean(keep_attrs=True, skipna=True)
diff = ds - ref
return diff
示例7: test_roundtrip_coordinates
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def test_roundtrip_coordinates(self):
original = Dataset({'foo': ('x', [0, 1])},
{'x': [2, 3], 'y': ('a', [42]), 'z': ('x', [4, 5])})
with self.roundtrip(original) as actual:
self.assertDatasetIdentical(original, actual)
expected = original.drop('foo')
with self.roundtrip(expected) as actual:
self.assertDatasetIdentical(expected, actual)
expected = original.copy()
expected.attrs['coordinates'] = 'something random'
with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
with self.roundtrip(expected):
pass
expected = original.copy(deep=True)
expected['foo'].attrs['coordinates'] = 'something random'
with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
with self.roundtrip(expected):
pass
示例8: _normalize_jd2datetime
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def _normalize_jd2datetime(ds: xr.Dataset) -> xr.Dataset:
"""
Convert the time dimension of the given dataset from Julian date to
datetime.
:param ds: Dataset on which to run conversion
"""
try:
time = ds.time
except AttributeError:
return ds
try:
units = time.units
except AttributeError:
units = None
try:
long_name = time.long_name
except AttributeError:
long_name = None
if units:
units = units.lower().strip()
if long_name:
units = long_name.lower().strip()
units = units or long_name
if not units or units != 'time in julian days':
return ds
ds = ds.copy()
# Decode JD time
# noinspection PyTypeChecker
tuples = [jd2gcal(x, 0) for x in ds.time.values]
# Replace JD time with datetime
ds.time.values = [datetime(x[0], x[1], x[2]) for x in tuples]
# Adjust attributes
ds.time.attrs['long_name'] = 'time'
ds.time.attrs['calendar'] = 'standard'
return ds
示例9: history_no_version
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def history_no_version(ds: xr.Dataset, a=1, b='bilinear'):
ds1 = ds.copy()
return ds1
示例10: history_named_op
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def history_named_op(ds: xr.Dataset, a=1, b='bilinear'):
ds1 = ds.copy()
ds2 = ds.copy()
ds3 = ds.copy()
return {'name1': ds1, 'name2': ds2, 'name3': ds3}
示例11: history_op
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def history_op(ds: xr.Dataset, a=1, b='bilinear'):
ret = ds.copy()
return ret
示例12: detect_outliers
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def detect_outliers(ds: xr.Dataset,
var: VarNamesLike.TYPE,
threshold_low: float = 0.05,
threshold_high: float = 0.95,
quantiles: bool = True,
mask: bool = False,
monitor: Monitor = Monitor.NONE) -> xr.Dataset:
"""
Detect outliers in the given Dataset.
When mask=True the input dataset should not contain nan values, otherwise
all existing nan values will be marked as 'outliers' in the mask data array
added to the output dataset.
:param ds: The dataset or dataframe for which to do outlier detection
:param var: Variable or variables in the dataset to which to do outlier
detection. Note that when multiple variables are selected, absolute
threshold values might not make much sense. Wild cards can be used to
select multiple variables matching a pattern.
:param threshold_low: Values less or equal to this will be removed/masked
:param threshold_high: Values greater or equal to this will be removed/masked
:param quantiles: If True, threshold values are treated as quantiles,
otherwise as absolute values.
:param mask: If True, an ancillary variable containing flag values for
outliers will be added to the dataset. Otherwise, outliers will be replaced
with nan directly in the data variables.
:param monitor: A progress monitor.
:return: The dataset with outliers masked or replaced with nan
"""
ds = DatasetLike.convert(ds)
# Create a list of variable names on which to perform outlier detection
# based on the input comma separated list that can contain wildcards
var_patterns = VarNamesLike.convert(var)
all_vars = list(ds.data_vars.keys())
variables = list()
for pattern in var_patterns:
leave = fnmatch.filter(all_vars, pattern)
variables = variables + leave
# For each array in the dataset for which we should detect outliers, detect
# outliers
ret_ds = ds.copy()
with monitor.starting("detect_outliers", total_work=len(variables) * 3):
for var_name in variables:
if quantiles:
# Get threshold values
with monitor.child(1).observing("quantile low"):
threshold_low = ret_ds[var_name].quantile(threshold_low)
with monitor.child(1).observing("quantile high"):
threshold_high = ret_ds[var_name].quantile(threshold_high)
else:
monitor.progress(2)
# If not mask, put nans in the data arrays for min/max outliers
if not mask:
arr = ret_ds[var_name]
attrs = arr.attrs
ret_ds[var_name] = arr.where((arr > threshold_low) & (arr < threshold_high))
ret_ds[var_name].attrs = attrs
else:
# Create and add a data variable containing the mask for this data
# variable
_mask_outliers(ret_ds, var_name, threshold_low, threshold_high)
monitor.progress(1)
return ret_ds
示例13: anomaly_external
# 需要导入模块: from xarray import Dataset [as 别名]
# 或者: from xarray.Dataset import copy [as 别名]
def anomaly_external(ds: xr.Dataset,
file: str,
transform: str = None,
monitor: Monitor = Monitor.NONE) -> xr.Dataset:
"""
Calculate anomaly with external reference data, for example, a climatology.
The given reference dataset is expected to consist of 12 time slices, one
for each month.
The returned dataset will contain the variable names found in both - the
reference and the given dataset. Names found in the given dataset, but not in
the reference, will be dropped from the resulting dataset. The calculated
anomaly will be against the corresponding month of the reference data.
E.g. January against January, etc.
In case spatial extents differ between the reference and the given dataset,
the anomaly will be calculated on the intersection.
:param ds: The dataset to calculate anomalies from
:param file: Path to reference data file
:param transform: Apply the given transformation before calculating the anomaly.
For supported operations see help on 'ds_arithmetics' operation.
:param monitor: a progress monitor.
:return: The anomaly dataset
"""
# Check if the time coordinate is of dtype datetime
try:
if ds.time.dtype != 'datetime64[ns]':
raise ValidationError('The dataset provided for anomaly calculation'
' is required to have a time coordinate of'
' dtype datetime64[ns]. Running the normalize'
' operation on this dataset might help.')
except AttributeError:
raise ValidationError('The dataset provided for anomaly calculation'
' is required to have a time coordinate.')
try:
if ds.attrs['time_coverage_resolution'] != 'P1M':
raise ValidationError('anomaly_external expects a monthly dataset'
' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
except KeyError:
try:
ds = adjust_temporal_attrs(ds)
if ds.attrs['time_coverage_resolution'] != 'P1M':
raise ValidationError('anomaly_external expects a monthly dataset'
' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
except KeyError:
raise ValidationError('Could not determine temporal resolution of'
' of the given input dataset.')
clim = xr.open_dataset(file)
try:
if len(clim.time) != 12:
raise ValidationError('The reference dataset is expected to be a '
'monthly climatology. The provided dataset has'
' a time dimension with length: {}'.format(len(clim.time)))
except AttributeError:
raise ValidationError('The reference dataset is required to '
'have a time coordinate.')
ret = ds.copy()
if transform:
ret = ds_arithmetics(ds, transform)
# Group by months, subtract the appropriate slice from the reference
# Note that this requires that 'time' coordinate labels are of type
# datetime64[ns]
total_work = 100
step = 100 / 12
with monitor.starting('Anomaly', total_work=total_work):
monitor.progress(work=0)
kwargs = {'ref': clim, 'monitor': monitor, 'step': step}
ret = ret.groupby(ds['time.month']).apply(_group_anomaly,
**kwargs)
# Running groupby results in a redundant 'month' variable being added to
# the dataset
ret = ret.drop('month')
ret.attrs = ds.attrs
# The dataset may be cropped
return adjust_spatial_attrs(ret)