本文整理汇总了Python中sklearn.datasets.base.Bunch方法的典型用法代码示例。如果您正苦于以下问题:Python base.Bunch方法的具体用法?Python base.Bunch怎么用?Python base.Bunch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets.base
的用法示例。
在下文中一共展示了base.Bunch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _make_path_events_file_spm_auditory_data
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _make_path_events_file_spm_auditory_data(spm_auditory_data):
"""
Accepts data for spm_auditory dataset as Bunch
and constructs the filepath for its events descriptor file.
Parameters
----------
spm_auditory_data: Bunch
Returns
-------
events_filepath: string
Full path to the events.tsv file for spm_auditory dataset.
"""
events_file_location = os.path.dirname(spm_auditory_data['func'][0])
events_filename = os.path.basename(events_file_location) + '_events.tsv'
events_filepath = os.path.join(events_file_location, events_filename)
return events_filepath
示例2: fetch_asirra
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_asirra(image_count=1000):
"""
Parameters
----------
image_count : positive integer
Returns
-------
data : Bunch
Dictionary-like object with the following attributes :
'images', the sample images, 'data', the flattened images,
'target', the label for the image (0 for cat, 1 for dog),
and 'DESCR' the full description of the dataset.
"""
partial_path = check_fetch_asirra()
m = Memory(cachedir=partial_path, compress=6, verbose=0)
load_func = m.cache(_fetch_asirra)
images, target = load_func(partial_path, image_count=image_count)
return Bunch(data=images.reshape(len(images), -1),
images=images, target=target,
DESCR="Asirra cats and dogs dataset")
示例3: load_images
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def load_images(filenames):
"""Load images for image manipulation.
Parameters
----------
filenames : iterable
Iterable of filename paths as strings
Returns
-------
data : Bunch
Dictionary-like object with the following attributes :
'images', the sample images, 'filenames', the file
names for the images
"""
# Load image data for each image in the source folder.
images = [np.array(Image.open(filename, 'r')) for filename in filenames]
return Bunch(images=images,
filenames=filenames)
示例4: load_sample_images
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def load_sample_images():
"""Load sample images for image manipulation.
Loads ``sloth``, ``sloth_closeup``, ``cat_and_dog``.
Returns
-------
data : Bunch
Dictionary-like object with the following attributes :
'images', the sample images, 'filenames', the file
names for the images, and 'DESCR'
the full description of the dataset.
"""
module_path = os.path.join(os.path.dirname(__file__), "images")
with open(os.path.join(module_path, 'README.txt')) as f:
descr = f.read()
filenames = [os.path.join(module_path, filename)
for filename in os.listdir(module_path)
if filename.endswith(".jpg")]
# Load image data for each image in the source folder.
images = [np.array(Image.open(filename, 'r')) for filename in filenames]
return Bunch(images=images,
filenames=filenames,
DESCR=descr)
示例5: test_bunch_pickle_generated_with_0_16_and_read_with_0_17
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
bunch = Bunch(key='original')
# This reproduces a problem when Bunch pickles have been created
# with scikit-learn 0.16 and are read with 0.17. Basically there
# is a surprising behaviour because reading bunch.key uses
# bunch.__dict__ (which is non empty for 0.16 Bunch objects)
# whereas assigning into bunch.key uses bunch.__setattr__. See
# https://github.com/scikit-learn/scikit-learn/issues/6196 for
# more details
bunch.__dict__['key'] = 'set from __dict__'
bunch_from_pkl = loads(dumps(bunch))
# After loading from pickle the __dict__ should have been ignored
assert_equal(bunch_from_pkl.key, 'original')
assert_equal(bunch_from_pkl['key'], 'original')
# Making sure that changing the attr does change the value
# associated with __getitem__ as well
bunch_from_pkl.key = 'changed'
assert_equal(bunch_from_pkl.key, 'changed')
assert_equal(bunch_from_pkl['key'], 'changed')
示例6: _get_cluster_assignments
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _get_cluster_assignments(dataset_name, url, sep=" ", skip_header=False):
data_dir = _get_dataset_dir("categorization", verbose=0)
_fetch_file(url=url,
data_dir=data_dir,
uncompress=True,
move="{0}/{0}.txt".format(dataset_name),
verbose=0)
files = glob.glob(os.path.join(data_dir, dataset_name + "/*.txt"))
X = []
y = []
names = []
for cluster_id, file_name in enumerate(files):
with open(file_name) as f:
lines = f.read().splitlines()[(int(skip_header)):]
X += [l.split(sep) for l in lines]
y += [os.path.basename(file_name).split(".")[0]] * len(lines)
return Bunch(X=np.array(X, dtype="object"), y=np.array(y).astype("object"))
示例7: fetch_BLESS
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_BLESS():
"""
Fetch Baroni and Marco categorization dataset
Parameters
-------
Returns
-------
data : sklearn.datasets.base.Bunch
dictionary-like object. Keys of interest:
'X': words
'y': cluster assignment
References
----------
Baroni et al. "How we BLESSed distributional semantic evaluation", 2011
Notes
-----
Data set includes 200 concrete nouns (100 animate and 100 inanimate nouns)
from different classes (e.g., tools, clothing, vehicles, animals, etc.).
"""
return _get_cluster_assignments(dataset_name="EN-BLESS",
url="https://www.dropbox.com/sh/5qbl5cmh17o3eh0/AACyCEqpMktdMI05zwphJRI7a?dl=1")
示例8: fetch_ESSLI_1a
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_ESSLI_1a():
"""
Fetch ESSLI 1a task categorization dataset.
Returns
-------
data : sklearn.datasets.base.Bunch
dictionary-like object. Keys of interest:
'X': words
'y': cluster assignment
References
----------
Originally published at http://wordspace.collocations.de/doku.php/data:esslli2008:concrete_nouns_categorization.
Notes
-----
The goal of the sub-task is to group concrete nouns into semantic categories.
The data set consists of 44 concrete nouns, belonging to 6 semantic categories (four animates and two inanimates).
The nouns are included in the feature norms described in McRae et al. (2005)
"""
return _get_cluster_assignments(dataset_name="EN-ESSLI-1a",
url="https://www.dropbox.com/sh/h362565r1sk5wii/AADjcdYy3nRo-MjuFUSvb-0ya?dl=1")
示例9: fetch_SCWS
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_SCWS():
"""
Fetch SCWS dataset for testing similarity (with a context)
Returns
-------
data : sklearn.datasets.base.Bunch
dictionary-like object. Keys of interest:
'X': matrix of 2 words per column,
'y': vector with mean scores,
'sd': standard deviation of scores
References
----------
Huang et al., "Improving Word Representations via Global Context and Multiple Word Prototypes", 2012
Notes
-----
TODO
"""
data = _get_as_pd('https://www.dropbox.com/s/qgqj366lzzzj1ua/preproc_SCWS.txt?dl=1', 'similarity', header=None, sep="\t")
X = data.values[:, 0:2].astype("object")
mean = data.values[:,2].astype(np.float)
sd = np.std(data.values[:, 3:14].astype(np.float), axis=1).flatten()
return Bunch(X=X, y=mean,sd=sd)
示例10: json2bunch
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def json2bunch(fName): #传入数据,面向不同的数据存储方式,需要调整函数内读取的代码
infoDic=[]
f=open(fName)
jsonDecodes=json.load(f)
j=0
for info in jsonDecodes:
condiKeys=info['detail_info'].keys()
if 'price' in condiKeys and'overall_rating' in condiKeys and 'service_rating' in condiKeys and 'facility_rating' in condiKeys and 'hygiene_rating' in condiKeys and 'image_num' in condiKeys and 'comment_num' in condiKeys and 'favorite_num' in condiKeys: #提取的键都有数据时,才提取,否则忽略掉此数据
if 50<float(info['detail_info']['price'])<1000: #设置价格区间,提取数据
j+=1
infoDic.append([info['location']['lat'],info['location']['lng'],info['detail_info']['price'],info['detail_info']['overall_rating'],info['detail_info']['service_rating'],info['detail_info']['facility_rating'],info['detail_info']['hygiene_rating'],info['detail_info']['image_num'],info['detail_info']['comment_num'],info['detail_info']['favorite_num'],info['detail_info']['checkin_num'],info['name']])
else:pass
else:pass
print('.....................................',j)
data=np.array([(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],v[8],v[9],v[10]) for v in infoDic],dtype='float') #解释变量(特征)数据部分
targetInfo=np.array([v[11] for v in infoDic]) #目标变量(类标)部分
dataBunch=base.Bunch(DESCR=r'info of poi',data=data,feature_names=['lat','lng','price','overall_rating','service_rating','facility_rating','hygiene_rating','image_num','comment_num','favorite_num','checkin_num'],target=targetInfo,target_names=['price','name']) #建立sklearn的数据存储格式bunch
return dataBunch #返回bunch格式的数据
示例11: jsonDataFilter
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def jsonDataFilter(fileInfo): #传入数据,面向不同的数据存储方式,需要调整函数内读取的代码
rootPath=list(fileInfo.keys()) #待读取数据文件的根目录
# print(rootPath)
dataName=flatten_lst(list(fileInfo.values())) #待读取数据文件的文件名列表
# print(dataName)
coodiDic=[]
for fName in dataName: #逐一读取json数据格式文件,并将需要数据存储于列表中,本次实验数据为poi的经纬度信息和一级行业分类名,注意使用了百度坐标系,未转换为WGS84.
f=open(os.path.join(rootPath[0],fName))
jsonDecodes=json.load(f)
coodiDic.append([(coordi['location']['lat'],coordi['location']['lng'],fName[:-5]) for coordi in jsonDecodes])
coodiDic=flatten_lst(coodiDic) #读取的数据多层嵌套,需展平处理。
# print(coodiDic)
data=np.array([(v[0],v[1]) for v in coodiDic]) #经纬度信息
targetNames=np.array([v[2] for v in coodiDic]) #一级分类
# print(data)
# print(targetNames)
class_label=LabelEncoder() #以整数形式编码一级分类名
targetLabel=class_label.fit_transform(targetNames)
class_mapping=[(idx,label) for idx,label in enumerate(class_label.classes_)] #建立一级分类名和整数编码的映射列表
# print(class_mapping)
dataBunch=base.Bunch(DESCR=r'spatial points datasets of poi',data=data,feature_names=["XCoordinate","yCoordinate"],target=targetLabel,target_names=class_mapping) #建立sklearn的数据存储格式bunch
return dataBunch,class_mapping #返回bunch格式的数据和分类名映射列表
示例12: test_bunch_pickle_generated_with_0_16_and_read_with_0_17
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
bunch = Bunch(key='original')
# This reproduces a problem when Bunch pickles have been created
# with scikit-learn 0.16 and are read with 0.17. Basically there
# is a suprising behaviour because reading bunch.key uses
# bunch.__dict__ (which is non empty for 0.16 Bunch objects)
# whereas assigning into bunch.key uses bunch.__setattr__. See
# https://github.com/scikit-learn/scikit-learn/issues/6196 for
# more details
bunch.__dict__['key'] = 'set from __dict__'
bunch_from_pkl = loads(dumps(bunch))
# After loading from pickle the __dict__ should have been ignored
assert_equal(bunch_from_pkl.key, 'original')
assert_equal(bunch_from_pkl['key'], 'original')
# Making sure that changing the attr does change the value
# associated with __getitem__ as well
bunch_from_pkl.key = 'changed'
assert_equal(bunch_from_pkl.key, 'changed')
assert_equal(bunch_from_pkl['key'], 'changed')
示例13: fetch_localizer_first_level
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_localizer_first_level(data_dir=None, verbose=1):
""" Download a first-level localizer fMRI dataset
Parameters
----------
data_dir: string
directory where data should be downloaded and unpacked.
Returns
-------
data: sklearn.datasets.base.Bunch
dictionary-like object, with the keys:
epi_img: the input 4D image
events: a csv file describing the paardigm
"""
url = 'https://osf.io/2bqxn/download'
epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz'
events = 'sub-12069_task-localizer_events.tsv'
opts = {'uncompress': True}
options = ('epi_img', 'events')
dir_ = 'localizer_first_level'
filenames = [(os.path.join(dir_, name), url, opts)
for name in [epi_img, events]]
dataset_name = 'localizer_first_level'
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
verbose=verbose)
files = _fetch_files(data_dir, filenames, verbose=verbose)
params = dict(list(zip(options, files)))
return Bunch(**params)
示例14: fetch_spm_auditory
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory',
subject_id='sub001', verbose=1):
"""Function to fetch SPM auditory single-subject data.
Parameters
----------
data_dir: string
Path of the data directory. Used to force data storage in a specified
location. If the data is already present there, then will simply
glob it.
Returns
-------
data: sklearn.datasets.base.Bunch
Dictionary-like object, the interest attributes are:
- 'func': string list. Paths to functional images
- 'anat': string list. Path to anat image
References
----------
:download:
http://www.fil.ion.ucl.ac.uk/spm/data/auditory/
"""
data_dir = _get_dataset_dir(data_name, data_dir=data_dir,
verbose=verbose)
subject_dir = os.path.join(data_dir, subject_id)
if not os.path.exists(subject_dir):
_download_spm_auditory_data(data_dir, subject_dir, subject_id)
spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir)
try:
spm_auditory_data['events']
except KeyError:
events_filepath = _make_path_events_file_spm_auditory_data(
spm_auditory_data)
if not os.path.isfile(events_filepath):
_make_events_file_spm_auditory_data(events_filepath)
spm_auditory_data['events'] = events_filepath
return spm_auditory_data
示例15: _glob_spm_multimodal_fmri_data
# 需要导入模块: from sklearn.datasets import base [as 别名]
# 或者: from sklearn.datasets.base import Bunch [as 别名]
def _glob_spm_multimodal_fmri_data(subject_dir):
"""glob data from subject_dir."""
_subject_data = {'slice_order': 'descending'}
for session in range(1, 3):
# glob func data for session
_subject_data = _get_func_data_spm_multimodal(subject_dir,
session,
_subject_data)
if not _subject_data:
return None
# glob trials .mat file
_subject_data = _get_session_trials_spm_multimodal(subject_dir,
session,
_subject_data)
if not _subject_data:
return None
try:
events = _make_events_file_spm_multimodal_fmri(_subject_data,
session)
except MatReadError as mat_err:
warnings.warn(
'{}. An events.tsv file '
'cannot be generated'.format(str(mat_err)))
else:
events_filepath = _make_events_filepath_spm_multimodal_fmri(
_subject_data, session)
events.to_csv(events_filepath, sep='\t', index=False)
_subject_data['events{}'.format(session)] = events_filepath
# glob for anat data
_subject_data = _get_anatomical_data_spm_multimodal(subject_dir,
_subject_data)
if not _subject_data:
return None
return Bunch(**_subject_data)