本文整理汇总了Python中sklearn.utils.Bunch方法的典型用法代码示例。如果您正苦于以下问题:Python utils.Bunch方法的具体用法?Python utils.Bunch怎么用?Python utils.Bunch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils
的用法示例。
在下文中一共展示了utils.Bunch方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __getitem__
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def __getitem__(self, idx: int):
# X_wide and X_deep are assumed to be *always* present
if isinstance(self.X_wide, sparse_matrix):
X = Bunch(wide=np.array(self.X_wide[idx].todense()).squeeze())
else:
X = Bunch(wide=self.X_wide[idx])
X.deepdense = self.X_deep[idx]
if self.X_text is not None:
X.deeptext = self.X_text[idx]
if self.X_img is not None:
# if an image dataset is used, make sure is in the right format to
# be ingested by the conv layers
xdi = self.X_img[idx]
# if int must be uint8
if "int" in str(xdi.dtype) and "uint8" != str(xdi.dtype):
xdi = xdi.astype("uint8")
# if int float must be float32
if "float" in str(xdi.dtype) and "float32" != str(xdi.dtype):
xdi = xdi.astype("float32")
# if there are no transforms, or these do not include ToTensor(),
# then we need to replicate what Tensor() does -> transpose axis
# and normalize if necessary
if not self.transforms or "ToTensor" not in self.transforms_names:
xdi = xdi.transpose(2, 0, 1)
if "int" in str(xdi.dtype):
xdi = (xdi / xdi.max()).astype("float32")
# if ToTensor() is included, simply apply transforms
if "ToTensor" in self.transforms_names:
xdi = self.transforms(xdi)
# else apply transforms on the result of calling torch.tensor on
# xdi after all the previous manipulation
elif self.transforms:
xdi = self.transforms(torch.tensor(xdi))
# fill the Bunch
X.deepimage = xdi
if self.Y is not None:
y = self.Y[idx]
return X, y
else:
return X
示例2: __getitem__
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def __getitem__(self, idx: int):
X = Bunch(wide=self.X_wide[idx])
X.deepdense = self.X_deep[idx]
X.deeptext = self.X_text[idx]
X.deepimage = self.X_img[idx]
y = self.Y[idx]
return X, y
示例3: make_calcium_traces
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def make_calcium_traces(
neuron_ids=('a','b','c'),
duration=60.0,
sampling_rate=30.0,
oscillation=True,
):
n_neurons = len(neuron_ids)
gen_params = dict(
g=[.95],
sn=.3,
T=int(sampling_rate*duration),
framerate=sampling_rate,
firerate=.5,
b=0,
N=n_neurons,
seed=13,
)
if oscillation:
make_traces = gen_sinusoidal_data
else:
make_traces = gen_data
traces, _, spikes = map(np.squeeze, make_traces(**gen_params))
time = np.arange(0, traces.shape[1]/sampling_rate, 1/sampling_rate)
traces = pd.DataFrame(traces.T, index=time, columns=neuron_ids)
spikes = pd.DataFrame(spikes.T, index=time, columns=neuron_ids)
return Bunch(
traces=traces,
spikes=spikes,
)
示例4: _extend_confusion_matrix
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def _extend_confusion_matrix(*, true_positives, false_positives, true_negatives, false_negatives):
"""Extend the provided confusion matrix counts with additional implied fields.
Parameters
----------
true_positives, false_positives, true_negatives, false_negatives : int
The counts appearing in the confusion matrix.
Returns
-------
result : sklearn.utils.Bunch
Dictionary-like object, with attributes:
true_positives, false_positives, true_negatives, false_negatives : int
The provided counts.
predicted_positives, predicted_negatives, positives, negatives, n : int
Derived counts.
"""
return Bunch(
true_positives=true_positives,
false_positives=false_positives,
true_negatives=true_negatives,
false_negatives=false_negatives,
predicted_positives=(true_positives + false_positives),
predicted_negatives=(true_negatives + false_negatives),
positives=(true_positives + false_negatives),
negatives=(true_negatives + false_positives),
n=(true_positives + true_negatives + false_positives + false_negatives),
)
示例5: _load_data
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def _load_data(dset, dfiles):
"""
Loads `dfiles` for `dset` and return Bunch with data and labels
Parameters
----------
dset : {'sim', 'digits'}
Dataset to load
dfiles : list of str
Data files in `dset`
Returns
-------
data : :obj:`sklearn.utils.Bunch`
With keys `data` and `labels`
"""
dpath = _res_path.format(resource=dset)
if not op.isdir(dpath): # should never happen
raise ValueError('{} is not a valid dataset. If you are receiving '
'this error after using snf.datasets.load_simdata() '
'or snf.datasets.load_digits() it is possible that '
'snfpy was improperly installed. Please check your '
'installation and try again.'.format(dset))
# space versus comma-delimited files (ugh)
try:
data = [np.loadtxt(op.join(dpath, fn)) for fn in dfiles]
except ValueError:
data = [np.loadtxt(op.join(dpath, fn), delimiter=',') for fn in dfiles]
return Bunch(data=data,
labels=np.loadtxt(op.join(dpath, 'label.csv')))
示例6: load_simdata
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def load_simdata():
"""
Loads "similarity" data with two datatypes
Returns
-------
sim : :obj:`sklearn.utils.Bunch`
Dictionary-like object with keys ['data', 'labels']
"""
dfiles = [
'data1.csv', 'data2.csv'
]
return _load_data('sim', dfiles)
示例7: load_digits
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def load_digits():
"""
Loads "digits" dataset with four datatypes
Returns
-------
digits : :obj:`sklearn.utils.Bunch`
Dictionary-like object with keys ['data', 'labels']
"""
dfiles = [
'fourier.csv', 'pixel.csv', 'profile.csv', 'zer.csv'
]
return _load_data('digits', dfiles)
示例8: __init__
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def __init__(self, dataset_name=None, data=None, target=None, metadata=None, update_hashes=True,
**kwargs):
"""
Object representing a dataset object.
Notionally compatible with scikit-learn's Bunch object
dataset_name: string (required)
key to use for this dataset
data:
Data: (usually np.array or np.ndarray)
target: np.array
Either classification target or label to be used. for each of the points
in `data`
metadata: dict
Data about the object. Key fields include `license_txt` and `descr`
update_hashes:
If True, update the data/target hashes in the Metadata.
"""
super().__init__(**kwargs)
if dataset_name is None:
if metadata is not None and metadata.get("dataset_name", None) is not None:
dataset_name = metadata['dataset_name']
else:
raise Exception('dataset_name is required')
if metadata is not None:
self['metadata'] = metadata
else:
self['metadata'] = {}
self['metadata']['dataset_name'] = dataset_name
self['data'] = data
self['target'] = target
if update_hashes:
data_hashes = self.get_data_hashes()
self['metadata'] = {**self['metadata'], **data_hashes}
示例9: named_estimators
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def named_estimators(self):
""" Bunches the estimators by name """
return Bunch(**dict(self.estimators))
示例10: group_summary
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def group_summary(metric_function, y_true, y_pred, *,
sensitive_features,
indexed_params=None,
**metric_params):
r"""Apply a metric to each subgroup of a set of data.
:param metric_function: Function with signature
``metric_function(y_true, y_pred, \*\*metric_params)``
:param y_true: Array of ground-truth values
:param y_pred: Array of predicted values
:param sensitive_features: Array indicating the group to which each input value belongs
:param indexed_params: Names of ``metric_function`` parameters that
should be split according to ``sensitive_features`` in addition to ``y_true``
and ``y_pred``. Defaults to ``None`` corresponding to ``{"sample_weight"}``.
:param \*\*metric_params: Optional arguments to be passed to the ``metric_function``
:return: Object containing the result of applying ``metric_function`` to the entire dataset
and to each group identified in ``sensitive_features``
:rtype: :py:class:`sklearn.utils.Bunch` with the fields ``overall`` and ``by_group``
"""
_check_array_sizes(y_true, y_pred, 'y_true', 'y_pred')
_check_array_sizes(y_true, sensitive_features, 'y_true', 'sensitive_features')
# Make everything a numpy array
# This allows for fast slicing of the groups
y_t = _convert_to_ndarray_and_squeeze(y_true)
y_p = _convert_to_ndarray_and_squeeze(y_pred)
s_f = _convert_to_ndarray_and_squeeze(sensitive_features)
# Evaluate the overall metric with the numpy arrays
# This ensures consistency in how metric_function is called
checked_args = _check_metric_params(y_t, metric_params, indexed_params)
result_overall = metric_function(y_t, y_p, **checked_args)
groups = np.unique(s_f)
result_by_group = {}
for group in groups:
group_indices = (group == s_f)
result_by_group[group] = metric_function(
y_t[group_indices], y_p[group_indices],
**_check_metric_params(y_t, metric_params, indexed_params, group_indices))
return Bunch(overall=result_overall, by_group=result_by_group)
# This loosely follows the pattern of _check_fit_params in
# sklearn/utils/validation.py
示例11: validation
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def validation(model, criterion, val_loader,
epoch, step, fh):
report_epoch = 10
val_metrics = Metrics()
with torch.no_grad():
model.eval()
vl = val_loader
tq = tqdm.tqdm(total=(len(vl) * val_loader.batch_size))
tq.set_description(f'(val) Ep{epoch:>3d}')
for i, (inputs, targets, labels, names) in enumerate(val_loader):
inputs = inputs.cuda()
targets = targets.cuda()
outputs = model(inputs)
loss = criterion(outputs, targets)
tq.update(inputs.size(0))
val_metrics.loss.append(loss.item())
val_metrics.bce.append(criterion._stash_bce_loss.item())
val_metrics.jaccard.append(criterion._stash_jaccard.item())
if i > 0 and i % report_epoch == 0:
report_metrics = Bunch(
epoch=epoch,
step=step,
val_loss=np.mean(val_metrics.loss[-report_epoch:]),
val_bce=np.mean(val_metrics.bce[-report_epoch:]),
val_jaccard=np.mean(
val_metrics.jaccard[-report_epoch:]),
)
tq.set_postfix(
loss=f'{report_metrics.val_loss:.5f}',
bce=f'{report_metrics.val_bce:.5f}',
jaccard=f'{report_metrics.val_jaccard:.5f}')
# End of epoch
report_metrics = Bunch(
epoch=epoch,
step=step,
val_loss=np.mean(val_metrics.loss[-report_epoch:]),
val_bce=np.mean(val_metrics.bce[-report_epoch:]),
val_jaccard=np.mean(val_metrics.jaccard[-report_epoch:]),
)
tq.set_postfix(
loss=f'{report_metrics.val_loss:.5f}',
bce=f'{report_metrics.val_bce:.5f}',
jaccard=f'{report_metrics.val_jaccard:.5f}')
tq.close()
return val_metrics
示例12: load_nhefs
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def load_nhefs(raw=False, restrict=True):
"""Loads the NHEFS smoking-cessation and weight-loss dataset.
Data was gathered during an observational study conducted by the NHANS
during the 1970's and 1980'. It follows a cohort a people whom some
decided to quite smoking and some decided to persist, and record the
gain in weight for each individual to try estimate the causal contribution
of smoking cessation on weight gain.
This dataset is used throughout Hernán and Robins' Causal Inference Book.
https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/
If used for academic purposes, please consider citing the book:
Hernán MA, Robins JM (2020). Causal Inference: What If. Boca Raton: Chapman & Hall/CRC.
Args:
raw (bool): Whether to return the entire DataFrame and descriptors or not.
If False, only confounders are used for the data.
If True, returns a (pd.DataFrame, pd.Series) tuple (data and description).
restrict (bool): Whether to apply exclusion criteria on missing data or not.
Note: if False - data will have censored (NaN) outcomes.
Returns:
Bunch: dictionary-like object
attributes are: `X` (covariates), `a` (treatment assignment) `y` (outcome),
`descriptors` (feature description)
"""
dir_name = os.path.join(DATA_DIR_NAME, "nhefs")
data = load_data_file("NHEFS.csv", dir_name)
descriptors = load_data_file("NHEFS_codebook.csv", dir_name)
descriptors = descriptors.set_index("Variable name")["Description"]
if raw:
return data, descriptors
confounders = ["active", "age", "education", "exercise", "race",
"sex", "smokeintensity", "smokeyrs", "wt71"]
if restrict:
restrictions = ["wt82"]
missing = data[restrictions].isnull().any(axis="columns")
data = data.loc[~missing]
a = data.pop("qsmk")
y = data.pop("wt82_71")
X = data[confounders]
descriptors = descriptors[confounders + ["qsmk", "wt82_71"]]
X = pd.get_dummies(X, columns=["active", "education", "exercise"], drop_first=True)
X = X.join(X[['age', 'wt71', 'smokeintensity', 'smokeyrs']] ** 2, rsuffix="^2")
data = Bunch(X=X, a=a, y=y, descriptors=descriptors)
return data
示例13: load_acic16
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import Bunch [as 别名]
def load_acic16(instance=1, raw=False):
""" Loads single dataset from the 2016 Atlantic Causal Inference Conference data challenge.
The dataset is based on real covariates but synthetically simulates the treatment assignment
and potential outcomes. It therefore also contains sufficient ground truth to evaluate
the effect estimation of causal models.
The competition introduced 7700 simulated files (100 instances for each of the 77
data-generating-processes). We provide a smaller sample of one instance from 10
DGPs. For the full dataset, see the link below to the competition site.
If used for academic purposes, please consider citing the competition organizers:
Vincent Dorie, Jennifer Hill, Uri Shalit, Marc Scott, and Dan Cervone. "Automated versus do-it-yourself methods
for causal inference: Lessons learned from a data analysis competition."
Statistical Science 34, no. 1 (2019): 43-68.
Args:
instance (int): number between 1-10 (inclusive), dataset to load.
raw (bool): Whether to apply contrast ("dummify") on non-numeric columns
If True, returns a (pd.DataFrame, pd.DataFrame) tuple (one for covariates and the second with
treatment assignment, noisy potential outcomes and true potential outcomes).
Returns:
Bunch: dictionary-like object
attributes are: `X` (covariates), `a` (treatment assignment), `y` (outcome),
`po` (ground truth potential outcomes: `po[0]` potential outcome for controls and
`po[1]` potential outcome for treated),
`descriptors` (feature description).
See Also:
* `Publication <https://projecteuclid.org/euclid.ss/1555056030>`_
* `Official competition site <http://jenniferhill7.wixsite.com/acic-2016/competition>`_
* `Official github with data generating code <https://github.com/vdorie/aciccomp/tree/master/2016>`_
"""
dir_name = os.path.join(DATA_DIR_NAME, "acic_challenge_2016")
X = load_data_file("x.csv", dir_name)
zymu = load_data_file("zymu_{}.csv".format(instance), dir_name)
if raw:
return X, zymu
non_numeric_cols = X.select_dtypes(include=[object]).columns
X = pd.get_dummies(X, columns=non_numeric_cols, drop_first=True)
a = zymu["z"].rename("a")
# # Extract observed outcome:
y = zymu[["y0", "y1"]]
y = y.rename(columns=lambda x: int(x.strip("y"))) # remove 'y' prefix to allow lookup
y = y.lookup(y.index, a) # Choose the outcome based on the treatment assignment
y = pd.Series(y, index=a.index) # `lookup` return ndarray, convert back to Series
# # Potential outcomes:
po = zymu[["mu0", "mu1"]]
po = po.rename(columns=lambda x: x.strip("mu"))
descriptors = pd.Series(data="No true meaning", index=X.columns)
data = Bunch(X=X, a=a, y=y, po=po, descriptors=descriptors)
return data