當前位置: 首頁>>代碼示例>>Python>>正文


Python utils.Bunch方法代碼示例

本文整理匯總了Python中sklearn.utils.Bunch方法的典型用法代碼示例。如果您正苦於以下問題:Python utils.Bunch方法的具體用法?Python utils.Bunch怎麽用?Python utils.Bunch使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.utils的用法示例。


在下文中一共展示了utils.Bunch方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __getitem__

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def __getitem__(self, idx: int):
        # X_wide and X_deep are assumed to be *always* present
        if isinstance(self.X_wide, sparse_matrix):
            X = Bunch(wide=np.array(self.X_wide[idx].todense()).squeeze())
        else:
            X = Bunch(wide=self.X_wide[idx])
        X.deepdense = self.X_deep[idx]
        if self.X_text is not None:
            X.deeptext = self.X_text[idx]
        if self.X_img is not None:
            # if an image dataset is used, make sure is in the right format to
            # be ingested by the conv layers
            xdi = self.X_img[idx]
            # if int must be uint8
            if "int" in str(xdi.dtype) and "uint8" != str(xdi.dtype):
                xdi = xdi.astype("uint8")
            # if int float must be float32
            if "float" in str(xdi.dtype) and "float32" != str(xdi.dtype):
                xdi = xdi.astype("float32")
            # if there are no transforms, or these do not include ToTensor(),
            # then we need to  replicate what Tensor() does -> transpose axis
            # and normalize if necessary
            if not self.transforms or "ToTensor" not in self.transforms_names:
                xdi = xdi.transpose(2, 0, 1)
                if "int" in str(xdi.dtype):
                    xdi = (xdi / xdi.max()).astype("float32")
            # if ToTensor() is included, simply apply transforms
            if "ToTensor" in self.transforms_names:
                xdi = self.transforms(xdi)
            # else apply transforms on the result of calling torch.tensor on
            # xdi after all the previous manipulation
            elif self.transforms:
                xdi = self.transforms(torch.tensor(xdi))
            # fill the Bunch
            X.deepimage = xdi
        if self.Y is not None:
            y = self.Y[idx]
            return X, y
        else:
            return X 
開發者ID:jrzaurin,項目名稱:pytorch-widedeep,代碼行數:42,代碼來源:_wd_dataset.py

示例2: __getitem__

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def __getitem__(self, idx: int):

        X = Bunch(wide=self.X_wide[idx])
        X.deepdense = self.X_deep[idx]
        X.deeptext = self.X_text[idx]
        X.deepimage = self.X_img[idx]
        y = self.Y[idx]
        return X, y 
開發者ID:jrzaurin,項目名稱:pytorch-widedeep,代碼行數:10,代碼來源:test_warm_up_routines.py

示例3: make_calcium_traces

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def make_calcium_traces(
    neuron_ids=('a','b','c'),
    duration=60.0,
    sampling_rate=30.0,
    oscillation=True,
):

    n_neurons = len(neuron_ids)

    gen_params = dict(
        g=[.95],
        sn=.3,
        T=int(sampling_rate*duration),
        framerate=sampling_rate,
        firerate=.5,
        b=0,
        N=n_neurons,
        seed=13,
    )

    if oscillation:
        make_traces = gen_sinusoidal_data
    else:
        make_traces = gen_data

    traces, _, spikes = map(np.squeeze, make_traces(**gen_params))

    time = np.arange(0, traces.shape[1]/sampling_rate, 1/sampling_rate)

    traces = pd.DataFrame(traces.T, index=time, columns=neuron_ids)
    spikes = pd.DataFrame(spikes.T, index=time, columns=neuron_ids)

    return Bunch(
        traces=traces,
        spikes=spikes,
    ) 
開發者ID:AllenInstitute,項目名稱:neuroglia,代碼行數:38,代碼來源:synthetic_calcium.py

示例4: _extend_confusion_matrix

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def _extend_confusion_matrix(*, true_positives, false_positives, true_negatives, false_negatives):
    """Extend the provided confusion matrix counts with additional implied fields.

    Parameters
    ----------
    true_positives, false_positives, true_negatives, false_negatives : int
        The counts appearing in the confusion matrix.

    Returns
    -------
    result : sklearn.utils.Bunch
        Dictionary-like object, with attributes:

        true_positives, false_positives, true_negatives, false_negatives : int
            The provided counts.

        predicted_positives, predicted_negatives, positives, negatives, n : int
            Derived counts.
    """
    return Bunch(
        true_positives=true_positives,
        false_positives=false_positives,
        true_negatives=true_negatives,
        false_negatives=false_negatives,
        predicted_positives=(true_positives + false_positives),
        predicted_negatives=(true_negatives + false_negatives),
        positives=(true_positives + false_negatives),
        negatives=(true_negatives + false_positives),
        n=(true_positives + true_negatives + false_positives + false_negatives),
    ) 
開發者ID:fairlearn,項目名稱:fairlearn,代碼行數:32,代碼來源:_tradeoff_curve_utilities.py

示例5: _load_data

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def _load_data(dset, dfiles):
    """
    Loads `dfiles` for `dset` and return Bunch with data and labels

    Parameters
    ----------
    dset : {'sim', 'digits'}
        Dataset to load
    dfiles : list of str
        Data files in `dset`

    Returns
    -------
    data : :obj:`sklearn.utils.Bunch`
        With keys `data` and `labels`
    """

    dpath = _res_path.format(resource=dset)

    if not op.isdir(dpath):  # should never happen
        raise ValueError('{} is not a valid dataset. If you are receiving '
                         'this error after using snf.datasets.load_simdata() '
                         'or snf.datasets.load_digits() it is possible that '
                         'snfpy was improperly installed. Please check your '
                         'installation and try again.'.format(dset))

    # space versus comma-delimited files (ugh)
    try:
        data = [np.loadtxt(op.join(dpath, fn)) for fn in dfiles]
    except ValueError:
        data = [np.loadtxt(op.join(dpath, fn), delimiter=',') for fn in dfiles]

    return Bunch(data=data,
                 labels=np.loadtxt(op.join(dpath, 'label.csv'))) 
開發者ID:rmarkello,項目名稱:snfpy,代碼行數:36,代碼來源:datasets.py

示例6: load_simdata

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def load_simdata():
    """
    Loads "similarity" data with two datatypes

    Returns
    -------
    sim : :obj:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['data', 'labels']
    """

    dfiles = [
        'data1.csv', 'data2.csv'
    ]

    return _load_data('sim', dfiles) 
開發者ID:rmarkello,項目名稱:snfpy,代碼行數:17,代碼來源:datasets.py

示例7: load_digits

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def load_digits():
    """
    Loads "digits" dataset with four datatypes

    Returns
    -------
    digits : :obj:`sklearn.utils.Bunch`
        Dictionary-like object with keys ['data', 'labels']
    """

    dfiles = [
        'fourier.csv', 'pixel.csv', 'profile.csv', 'zer.csv'
    ]

    return _load_data('digits', dfiles) 
開發者ID:rmarkello,項目名稱:snfpy,代碼行數:17,代碼來源:datasets.py

示例8: __init__

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def __init__(self, dataset_name=None, data=None, target=None, metadata=None, update_hashes=True,
                 **kwargs):
        """
        Object representing a dataset object.
        Notionally compatible with scikit-learn's Bunch object

        dataset_name: string (required)
            key to use for this dataset
        data:
            Data: (usually np.array or np.ndarray)
        target: np.array
            Either classification target or label to be used. for each of the points
            in `data`
        metadata: dict
            Data about the object. Key fields include `license_txt` and `descr`
        update_hashes:
            If True, update the data/target hashes in the Metadata.
        """
        super().__init__(**kwargs)

        if dataset_name is None:
            if metadata is not None and metadata.get("dataset_name", None) is not None:
                dataset_name = metadata['dataset_name']
            else:
                raise Exception('dataset_name is required')

        if metadata is not None:
            self['metadata'] = metadata
        else:
            self['metadata'] = {}
        self['metadata']['dataset_name'] = dataset_name
        self['data'] = data
        self['target'] = target
        if update_hashes:
            data_hashes = self.get_data_hashes()
            self['metadata'] = {**self['metadata'], **data_hashes} 
開發者ID:hackalog,項目名稱:cookiecutter-easydata,代碼行數:38,代碼來源:datasets.py

示例9: named_estimators

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def named_estimators(self):
        """ Bunches the estimators by name """
        return Bunch(**dict(self.estimators)) 
開發者ID:Ibotta,項目名稱:sk-dist,代碼行數:5,代碼來源:postprocessing.py

示例10: group_summary

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def group_summary(metric_function, y_true, y_pred, *,
                  sensitive_features,
                  indexed_params=None,
                  **metric_params):
    r"""Apply a metric to each subgroup of a set of data.

    :param metric_function: Function with signature
        ``metric_function(y_true, y_pred, \*\*metric_params)``

    :param y_true: Array of ground-truth values

    :param y_pred: Array of predicted values

    :param sensitive_features: Array indicating the group to which each input value belongs

    :param indexed_params: Names of ``metric_function`` parameters that
        should be split according to ``sensitive_features`` in addition to ``y_true``
        and ``y_pred``. Defaults to ``None`` corresponding to ``{"sample_weight"}``.

    :param \*\*metric_params: Optional arguments to be passed to the ``metric_function``

    :return: Object containing the result of applying ``metric_function`` to the entire dataset
        and to each group identified in ``sensitive_features``
    :rtype: :py:class:`sklearn.utils.Bunch` with the fields ``overall`` and ``by_group``
    """
    _check_array_sizes(y_true, y_pred, 'y_true', 'y_pred')
    _check_array_sizes(y_true, sensitive_features, 'y_true', 'sensitive_features')

    # Make everything a numpy array
    # This allows for fast slicing of the groups
    y_t = _convert_to_ndarray_and_squeeze(y_true)
    y_p = _convert_to_ndarray_and_squeeze(y_pred)
    s_f = _convert_to_ndarray_and_squeeze(sensitive_features)

    # Evaluate the overall metric with the numpy arrays
    # This ensures consistency in how metric_function is called
    checked_args = _check_metric_params(y_t, metric_params, indexed_params)
    result_overall = metric_function(y_t, y_p, **checked_args)

    groups = np.unique(s_f)
    result_by_group = {}
    for group in groups:
        group_indices = (group == s_f)
        result_by_group[group] = metric_function(
            y_t[group_indices], y_p[group_indices],
            **_check_metric_params(y_t, metric_params, indexed_params, group_indices))

    return Bunch(overall=result_overall, by_group=result_by_group)


# This loosely follows the pattern of _check_fit_params in
# sklearn/utils/validation.py 
開發者ID:fairlearn,項目名稱:fairlearn,代碼行數:54,代碼來源:_metrics_engine.py

示例11: validation

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def validation(model, criterion, val_loader,
               epoch, step, fh):
    report_epoch = 10
    val_metrics = Metrics()

    with torch.no_grad():
        model.eval()

        vl = val_loader

        tq = tqdm.tqdm(total=(len(vl) * val_loader.batch_size))
        tq.set_description(f'(val) Ep{epoch:>3d}')
        for i, (inputs, targets, labels, names) in enumerate(val_loader):
            inputs = inputs.cuda()
            targets = targets.cuda()

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            tq.update(inputs.size(0))

            val_metrics.loss.append(loss.item())
            val_metrics.bce.append(criterion._stash_bce_loss.item())
            val_metrics.jaccard.append(criterion._stash_jaccard.item())

            if i > 0 and i % report_epoch == 0:
                report_metrics = Bunch(
                    epoch=epoch,
                    step=step,
                    val_loss=np.mean(val_metrics.loss[-report_epoch:]),
                    val_bce=np.mean(val_metrics.bce[-report_epoch:]),
                    val_jaccard=np.mean(
                        val_metrics.jaccard[-report_epoch:]),
                )
                tq.set_postfix(
                    loss=f'{report_metrics.val_loss:.5f}',
                    bce=f'{report_metrics.val_bce:.5f}',
                    jaccard=f'{report_metrics.val_jaccard:.5f}')

        # End of epoch
        report_metrics = Bunch(
            epoch=epoch,
            step=step,
            val_loss=np.mean(val_metrics.loss[-report_epoch:]),
            val_bce=np.mean(val_metrics.bce[-report_epoch:]),
            val_jaccard=np.mean(val_metrics.jaccard[-report_epoch:]),
        )
        tq.set_postfix(
            loss=f'{report_metrics.val_loss:.5f}',
            bce=f'{report_metrics.val_bce:.5f}',
            jaccard=f'{report_metrics.val_jaccard:.5f}')
        tq.close()

    return val_metrics 
開發者ID:SpaceNetChallenge,項目名稱:SpaceNet_Off_Nadir_Solutions,代碼行數:55,代碼來源:main.py

示例12: load_nhefs

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def load_nhefs(raw=False, restrict=True):
    """Loads the NHEFS smoking-cessation and weight-loss dataset.

    Data was gathered during an observational study conducted by the NHANS
    during the 1970's and 1980'. It follows a cohort a people whom some
    decided to quite smoking and some decided to persist, and record the
    gain in weight for each individual to try estimate the causal contribution
    of smoking cessation on weight gain.

    This dataset is used throughout Hernán and Robins' Causal Inference Book.
     https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/
    If used for academic purposes, please consider citing the book:
     Hernán MA, Robins JM (2020). Causal Inference: What If. Boca Raton: Chapman & Hall/CRC.

    Args:
        raw (bool): Whether to return the entire DataFrame and descriptors or not.
                    If False, only confounders are used for the data.
                    If True, returns a (pd.DataFrame, pd.Series) tuple (data and description).
        restrict (bool): Whether to apply exclusion criteria on missing data or not.
                         Note: if False - data will have censored (NaN) outcomes.

    Returns:
        Bunch: dictionary-like object
               attributes are: `X` (covariates), `a` (treatment assignment) `y` (outcome),
                               `descriptors` (feature description)
    """
    dir_name = os.path.join(DATA_DIR_NAME, "nhefs")
    data = load_data_file("NHEFS.csv", dir_name)
    descriptors = load_data_file("NHEFS_codebook.csv", dir_name)

    descriptors = descriptors.set_index("Variable name")["Description"]

    if raw:
        return data, descriptors

    confounders = ["active", "age", "education", "exercise", "race",
                   "sex", "smokeintensity", "smokeyrs", "wt71"]

    if restrict:
        restrictions = ["wt82"]
        missing = data[restrictions].isnull().any(axis="columns")
        data = data.loc[~missing]

    a = data.pop("qsmk")
    y = data.pop("wt82_71")
    X = data[confounders]
    descriptors = descriptors[confounders + ["qsmk", "wt82_71"]]

    X = pd.get_dummies(X, columns=["active", "education", "exercise"], drop_first=True)
    X = X.join(X[['age', 'wt71', 'smokeintensity', 'smokeyrs']] ** 2, rsuffix="^2")

    data = Bunch(X=X, a=a, y=y, descriptors=descriptors)
    return data 
開發者ID:IBM,項目名稱:causallib,代碼行數:55,代碼來源:data_loader.py

示例13: load_acic16

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import Bunch [as 別名]
def load_acic16(instance=1, raw=False):
    """ Loads single dataset from the 2016 Atlantic Causal Inference Conference data challenge.

    The dataset is based on real covariates but synthetically simulates the treatment assignment
    and potential outcomes. It therefore also contains sufficient ground truth to evaluate
    the effect estimation of causal models.
    The competition introduced 7700 simulated files (100 instances for each of the 77
    data-generating-processes). We provide a smaller sample of one instance from 10
    DGPs. For the full dataset, see the link below to the competition site.

    If used for academic purposes, please consider citing the competition organizers:
     Vincent Dorie, Jennifer Hill, Uri Shalit, Marc Scott, and Dan Cervone. "Automated versus do-it-yourself methods
     for causal inference: Lessons learned from a data analysis competition."
     Statistical Science 34, no. 1 (2019): 43-68.

    Args:
        instance (int): number between 1-10 (inclusive), dataset to load.
        raw (bool): Whether to apply contrast ("dummify") on non-numeric columns
                    If True, returns a (pd.DataFrame, pd.DataFrame) tuple (one for covariates and the second with
                    treatment assignment, noisy potential outcomes and true potential outcomes).

    Returns:
        Bunch: dictionary-like object
               attributes are: `X` (covariates), `a` (treatment assignment), `y` (outcome),
                               `po` (ground truth potential outcomes: `po[0]` potential outcome for controls and
                                `po[1]` potential outcome for treated),
                               `descriptors` (feature description).


    See Also:
        * `Publication <https://projecteuclid.org/euclid.ss/1555056030>`_
        * `Official competition site <http://jenniferhill7.wixsite.com/acic-2016/competition>`_
        * `Official github with data generating code <https://github.com/vdorie/aciccomp/tree/master/2016>`_
    """
    dir_name = os.path.join(DATA_DIR_NAME, "acic_challenge_2016")

    X = load_data_file("x.csv", dir_name)
    zymu = load_data_file("zymu_{}.csv".format(instance), dir_name)

    if raw:
        return X, zymu

    non_numeric_cols = X.select_dtypes(include=[object]).columns
    X = pd.get_dummies(X, columns=non_numeric_cols, drop_first=True)

    a = zymu["z"].rename("a")
    # # Extract observed outcome:
    y = zymu[["y0", "y1"]]
    y = y.rename(columns=lambda x: int(x.strip("y")))  # remove 'y' prefix to allow lookup
    y = y.lookup(y.index, a)  # Choose the outcome based on the treatment assignment
    y = pd.Series(y, index=a.index)  # `lookup` return ndarray, convert back to Series
    # # Potential outcomes:
    po = zymu[["mu0", "mu1"]]
    po = po.rename(columns=lambda x: x.strip("mu"))

    descriptors = pd.Series(data="No true meaning", index=X.columns)
    data = Bunch(X=X, a=a, y=y, po=po, descriptors=descriptors)
    return data 
開發者ID:IBM,項目名稱:causallib,代碼行數:60,代碼來源:data_loader.py


注:本文中的sklearn.utils.Bunch方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。