Python base.Bunch方法代码示例

本文整理汇总了Python中sklearn.datasets.base.Bunch方法的典型用法代码示例。如果您正苦于以下问题：Python base.Bunch方法的具体用法？Python base.Bunch怎么用？Python base.Bunch使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets.base的用法示例。

在下文中一共展示了base.Bunch方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _make_path_events_file_spm_auditory_data

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _make_path_events_file_spm_auditory_data(spm_auditory_data):
    """
    Accepts data for spm_auditory dataset as Bunch
    and constructs the filepath for its events descriptor file.
    Parameters
    ----------
    spm_auditory_data: Bunch

    Returns
    -------
    events_filepath: string
        Full path to the events.tsv file for spm_auditory dataset.
    """
    events_file_location = os.path.dirname(spm_auditory_data['func'][0])
    events_filename = os.path.basename(events_file_location) + '_events.tsv'
    events_filepath = os.path.join(events_file_location, events_filename)
    return events_filepath

开发者ID:nilearn，项目名称:nistats，代码行数:19，代码来源:datasets.py

示例2: fetch_asirra

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_asirra(image_count=1000):
    """

    Parameters
    ----------
    image_count : positive integer

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'images', the sample images, 'data', the flattened images,
        'target', the label for the image (0 for cat, 1 for dog),
        and 'DESCR' the full description of the dataset.
    """
    partial_path = check_fetch_asirra()
    m = Memory(cachedir=partial_path, compress=6, verbose=0)
    load_func = m.cache(_fetch_asirra)
    images, target = load_func(partial_path, image_count=image_count)
    return Bunch(data=images.reshape(len(images), -1),
                 images=images, target=target,
                 DESCR="Asirra cats and dogs dataset")

开发者ID:sklearn-theano，项目名称:sklearn-theano，代码行数:24，代码来源:asirra.py

示例3: load_images

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def load_images(filenames):
    """Load images for image manipulation.

    Parameters
    ----------
    filenames : iterable
         Iterable of filename paths as strings

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'images', the sample images, 'filenames', the file
        names for the images
    """
    # Load image data for each image in the source folder.
    images = [np.array(Image.open(filename, 'r')) for filename in filenames]

    return Bunch(images=images,
                 filenames=filenames)

开发者ID:sklearn-theano，项目名称:sklearn-theano，代码行数:22，代码来源:base.py

示例4: load_sample_images

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def load_sample_images():
    """Load sample images for image manipulation.
    Loads ``sloth``, ``sloth_closeup``, ``cat_and_dog``.

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'images', the sample images, 'filenames', the file
        names for the images, and 'DESCR'
        the full description of the dataset.
    """
    module_path = os.path.join(os.path.dirname(__file__), "images")
    with open(os.path.join(module_path, 'README.txt')) as f:
        descr = f.read()
    filenames = [os.path.join(module_path, filename)
                 for filename in os.listdir(module_path)
                 if filename.endswith(".jpg")]
    # Load image data for each image in the source folder.
    images = [np.array(Image.open(filename, 'r')) for filename in filenames]

    return Bunch(images=images,
                 filenames=filenames,
                 DESCR=descr)

开发者ID:sklearn-theano，项目名称:sklearn-theano，代码行数:26，代码来源:base.py

示例5: test_bunch_pickle_generated_with_0_16_and_read_with_0_17

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
    bunch = Bunch(key='original')
    # This reproduces a problem when Bunch pickles have been created
    # with scikit-learn 0.16 and are read with 0.17. Basically there
    # is a surprising behaviour because reading bunch.key uses
    # bunch.__dict__ (which is non empty for 0.16 Bunch objects)
    # whereas assigning into bunch.key uses bunch.__setattr__. See
    # https://github.com/scikit-learn/scikit-learn/issues/6196 for
    # more details
    bunch.__dict__['key'] = 'set from __dict__'
    bunch_from_pkl = loads(dumps(bunch))
    # After loading from pickle the __dict__ should have been ignored
    assert_equal(bunch_from_pkl.key, 'original')
    assert_equal(bunch_from_pkl['key'], 'original')
    # Making sure that changing the attr does change the value
    # associated with __getitem__ as well
    bunch_from_pkl.key = 'changed'
    assert_equal(bunch_from_pkl.key, 'changed')
    assert_equal(bunch_from_pkl['key'], 'changed')

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_base.py

示例6: _get_cluster_assignments

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _get_cluster_assignments(dataset_name, url, sep=" ", skip_header=False):
    data_dir = _get_dataset_dir("categorization", verbose=0)
    _fetch_file(url=url,
                 data_dir=data_dir,
                 uncompress=True,
                 move="{0}/{0}.txt".format(dataset_name),
                 verbose=0)
    files = glob.glob(os.path.join(data_dir, dataset_name + "/*.txt"))
    X = []
    y = []
    names = []
    for cluster_id, file_name in enumerate(files):
        with open(file_name) as f:
            lines = f.read().splitlines()[(int(skip_header)):]

            X += [l.split(sep) for l in lines]
            y += [os.path.basename(file_name).split(".")[0]] * len(lines)
    return Bunch(X=np.array(X, dtype="object"), y=np.array(y).astype("object"))

开发者ID:tombosc，项目名称:cpae，代码行数:20，代码来源:utils.py

示例7: fetch_BLESS

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_BLESS():
    """
    Fetch Baroni and Marco categorization dataset

    Parameters
    -------

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        dictionary-like object. Keys of interest:
        'X': words
        'y': cluster assignment

    References
    ----------
    Baroni et al. "How we BLESSed distributional semantic evaluation", 2011

    Notes
    -----
    Data set includes 200 concrete nouns (100 animate and 100 inanimate nouns)
    from different classes (e.g., tools, clothing, vehicles, animals, etc.).
    """
    return _get_cluster_assignments(dataset_name="EN-BLESS",
                                    url="https://www.dropbox.com/sh/5qbl5cmh17o3eh0/AACyCEqpMktdMI05zwphJRI7a?dl=1")

开发者ID:tombosc，项目名称:cpae，代码行数:27，代码来源:categorization.py

示例8: fetch_ESSLI_1a

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_ESSLI_1a():
    """
    Fetch ESSLI 1a task categorization dataset.

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        dictionary-like object. Keys of interest:
        'X': words
        'y': cluster assignment

    References
    ----------
    Originally published at http://wordspace.collocations.de/doku.php/data:esslli2008:concrete_nouns_categorization.

    Notes
    -----
    The goal of the sub-task is to group concrete nouns into semantic categories.
    The data set consists of 44 concrete nouns, belonging to 6 semantic categories (four animates and two inanimates).
    The nouns are included in the feature norms described in McRae et al. (2005)
    """
    return _get_cluster_assignments(dataset_name="EN-ESSLI-1a",
                                    url="https://www.dropbox.com/sh/h362565r1sk5wii/AADjcdYy3nRo-MjuFUSvb-0ya?dl=1")

开发者ID:tombosc，项目名称:cpae，代码行数:25，代码来源:categorization.py

示例9: fetch_SCWS

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_SCWS():
    """
    Fetch SCWS dataset for testing similarity (with a context)

    Returns
    -------
    data : sklearn.datasets.base.Bunch
        dictionary-like object. Keys of interest:
        'X': matrix of 2 words per column,
        'y': vector with mean scores,
        'sd': standard deviation of scores

    References
    ----------
    Huang et al., "Improving Word Representations via Global Context and Multiple Word Prototypes", 2012

    Notes
    -----
    TODO
    """
    data = _get_as_pd('https://www.dropbox.com/s/qgqj366lzzzj1ua/preproc_SCWS.txt?dl=1', 'similarity', header=None, sep="\t")
    X = data.values[:, 0:2].astype("object")
    mean = data.values[:,2].astype(np.float)
    sd = np.std(data.values[:, 3:14].astype(np.float), axis=1).flatten()
    return Bunch(X=X, y=mean,sd=sd)

开发者ID:tombosc，项目名称:cpae，代码行数:27，代码来源:similarity.py

示例10: json2bunch

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def json2bunch(fName):   #传入数据，面向不同的数据存储方式，需要调整函数内读取的代码
    infoDic=[]
    f=open(fName)
    jsonDecodes=json.load(f)
    j=0
    for info in jsonDecodes:
        condiKeys=info['detail_info'].keys()
        if 'price' in condiKeys and'overall_rating' in condiKeys and 'service_rating' in condiKeys and 'facility_rating' in condiKeys and 'hygiene_rating' in condiKeys and 'image_num' in condiKeys and 'comment_num' in condiKeys and 'favorite_num' in condiKeys: #提取的键都有数据时，才提取，否则忽略掉此数据
            if 50<float(info['detail_info']['price'])<1000: #设置价格区间，提取数据
                j+=1
                infoDic.append([info['location']['lat'],info['location']['lng'],info['detail_info']['price'],info['detail_info']['overall_rating'],info['detail_info']['service_rating'],info['detail_info']['facility_rating'],info['detail_info']['hygiene_rating'],info['detail_info']['image_num'],info['detail_info']['comment_num'],info['detail_info']['favorite_num'],info['detail_info']['checkin_num'],info['name']])
            else:pass
        else:pass
    print('.....................................',j)

    data=np.array([(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],v[8],v[9],v[10]) for v in infoDic],dtype='float')  #解释变量(特征)数据部分
    targetInfo=np.array([v[11] for v in infoDic])  #目标变量(类标)部分
    dataBunch=base.Bunch(DESCR=r'info of poi',data=data,feature_names=['lat','lng','price','overall_rating','service_rating','facility_rating','hygiene_rating','image_num','comment_num','favorite_num','checkin_num'],target=targetInfo,target_names=['price','name'])  #建立sklearn的数据存储格式bunch
    return dataBunch #返回bunch格式的数据

开发者ID:richieBao，项目名称:python-urbanPlanning，代码行数:21，代码来源:poiRegression.py

示例11: jsonDataFilter

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def jsonDataFilter(fileInfo):   #传入数据，面向不同的数据存储方式，需要调整函数内读取的代码
    rootPath=list(fileInfo.keys())  #待读取数据文件的根目录
#    print(rootPath)
    dataName=flatten_lst(list(fileInfo.values()))  #待读取数据文件的文件名列表
#    print(dataName)
    coodiDic=[]
    for fName in dataName:  #逐一读取json数据格式文件，并将需要数据存储于列表中，本次实验数据为poi的经纬度信息和一级行业分类名，注意使用了百度坐标系，未转换为WGS84.    
        f=open(os.path.join(rootPath[0],fName))
        jsonDecodes=json.load(f)
        coodiDic.append([(coordi['location']['lat'],coordi['location']['lng'],fName[:-5]) for coordi in jsonDecodes])
        coodiDic=flatten_lst(coodiDic) #读取的数据多层嵌套，需展平处理。       
#    print(coodiDic)
    data=np.array([(v[0],v[1]) for v in coodiDic])  #经纬度信息
    targetNames=np.array([v[2] for v in coodiDic])  #一级分类
#    print(data)
#    print(targetNames)
    class_label=LabelEncoder()  #以整数形式编码一级分类名
    targetLabel=class_label.fit_transform(targetNames)
    class_mapping=[(idx,label) for idx,label in enumerate(class_label.classes_)]  #建立一级分类名和整数编码的映射列表
#    print(class_mapping)
    dataBunch=base.Bunch(DESCR=r'spatial points datasets of poi',data=data,feature_names=["XCoordinate","yCoordinate"],target=targetLabel,target_names=class_mapping)  #建立sklearn的数据存储格式bunch
    return dataBunch,class_mapping  #返回bunch格式的数据和分类名映射列表

开发者ID:richieBao，项目名称:python-urbanPlanning，代码行数:24，代码来源:poiStructure.py

示例12: test_bunch_pickle_generated_with_0_16_and_read_with_0_17

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
    bunch = Bunch(key='original')
    # This reproduces a problem when Bunch pickles have been created
    # with scikit-learn 0.16 and are read with 0.17. Basically there
    # is a suprising behaviour because reading bunch.key uses
    # bunch.__dict__ (which is non empty for 0.16 Bunch objects)
    # whereas assigning into bunch.key uses bunch.__setattr__. See
    # https://github.com/scikit-learn/scikit-learn/issues/6196 for
    # more details
    bunch.__dict__['key'] = 'set from __dict__'
    bunch_from_pkl = loads(dumps(bunch))
    # After loading from pickle the __dict__ should have been ignored
    assert_equal(bunch_from_pkl.key, 'original')
    assert_equal(bunch_from_pkl['key'], 'original')
    # Making sure that changing the attr does change the value
    # associated with __getitem__ as well
    bunch_from_pkl.key = 'changed'
    assert_equal(bunch_from_pkl.key, 'changed')
    assert_equal(bunch_from_pkl['key'], 'changed')

开发者ID:alvarobartt，项目名称:twitter-stock-recommendation，代码行数:21，代码来源:test_base.py

示例13: fetch_localizer_first_level

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_localizer_first_level(data_dir=None, verbose=1):
    """ Download a first-level localizer fMRI dataset

    Parameters
    ----------
    data_dir: string
        directory where data should be downloaded and unpacked.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        dictionary-like object, with the keys:
        epi_img: the input 4D image
        events: a csv file describing the paardigm
    """
    url = 'https://osf.io/2bqxn/download'
    epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz'
    events = 'sub-12069_task-localizer_events.tsv'
    opts = {'uncompress': True}
    options = ('epi_img', 'events')
    dir_ = 'localizer_first_level'
    filenames = [(os.path.join(dir_, name), url, opts)
                 for name in [epi_img, events]]

    dataset_name = 'localizer_first_level'
    data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
                                verbose=verbose)
    files = _fetch_files(data_dir, filenames, verbose=verbose)

    params = dict(list(zip(options, files)))
    return Bunch(**params)

开发者ID:nilearn，项目名称:nistats，代码行数:33，代码来源:datasets.py

示例14: fetch_spm_auditory

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
                       subject_id='sub001', verbose=1):
    """Function to fetch SPM auditory single-subject data.

    Parameters
    ----------
    data_dir: string
        Path of the data directory. Used to force data storage in a specified
        location. If the data is already present there, then will simply
        glob it.

    Returns
    -------
    data: sklearn.datasets.base.Bunch
        Dictionary-like object, the interest attributes are:
        - 'func': string list. Paths to functional images
        - 'anat': string list. Path to anat image

    References
    ----------
    :download:
        http://www.fil.ion.ucl.ac.uk/spm/data/auditory/

    """
    data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
                                verbose=verbose)
    subject_dir = os.path.join(data_dir, subject_id)
    if not os.path.exists(subject_dir):
        _download_spm_auditory_data(data_dir, subject_dir, subject_id)
    spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
    try:
        spm_auditory_data['events']
    except KeyError:
        events_filepath = _make_path_events_file_spm_auditory_data(
            spm_auditory_data)
        if not os.path.isfile(events_filepath):
            _make_events_file_spm_auditory_data(events_filepath)
        spm_auditory_data['events'] = events_filepath
    return spm_auditory_data

开发者ID:nilearn，项目名称:nistats，代码行数:41，代码来源:datasets.py

示例15: _glob_spm_multimodal_fmri_data

# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _glob_spm_multimodal_fmri_data(subject_dir):
    """glob data from subject_dir."""
    _subject_data = {'slice_order': 'descending'}

    for session in range(1, 3):
        # glob func data for session
        _subject_data = _get_func_data_spm_multimodal(subject_dir,
                                                      session,
                                                      _subject_data)
        if not _subject_data:
            return None
        # glob trials .mat file
        _subject_data = _get_session_trials_spm_multimodal(subject_dir,
                                                           session,
                                                           _subject_data)
        if not _subject_data:
            return None
        try:
            events = _make_events_file_spm_multimodal_fmri(_subject_data,
                                                           session)
        except MatReadError as mat_err:
            warnings.warn(
                '{}. An events.tsv file '
                'cannot be generated'.format(str(mat_err)))
        else:
            events_filepath = _make_events_filepath_spm_multimodal_fmri(
                _subject_data, session)
            events.to_csv(events_filepath, sep='\t', index=False)
            _subject_data['events{}'.format(session)] = events_filepath

    # glob for anat data
    _subject_data = _get_anatomical_data_spm_multimodal(subject_dir,
                                                        _subject_data)
    if not _subject_data:
        return None

    return Bunch(**_subject_data)

开发者ID:nilearn，项目名称:nistats，代码行数:39，代码来源:datasets.py

注：本文中的sklearn.datasets.base.Bunch方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。