本文整理汇总了Python中sklearn.datasets.fetch_mldata方法的典型用法代码示例。如果您正苦于以下问题:Python datasets.fetch_mldata方法的具体用法?Python datasets.fetch_mldata怎么用?Python datasets.fetch_mldata使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets
的用法示例。
在下文中一共展示了datasets.fetch_mldata方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_data
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def load_data(dtype=np.float32, order='F'):
"""Load the data, then cache and memmap the train/test split"""
######################################################################
# Load dataset
safe_print("Loading dataset...")
data = fetch_mldata('MNIST original')
X = check_array(data['data'], dtype=dtype, order=order)
y = data["target"]
# Normalize features
X = X / 255
# Create train-test split (as [Joachims, 2006])
safe_print("Creating train-test split...")
n_train = 60000
X_train = X[:n_train]
y_train = y[:n_train]
X_test = X[n_train:]
y_test = y[n_train:]
return X_train, X_test, y_train, y_test
示例2: test_download
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def test_download(tmpdata):
"""Test that fetch_mldata is able to download and cache a data set."""
_urlopen_ref = datasets.mldata.urlopen
datasets.mldata.urlopen = mock_mldata_urlopen({
'mock': {
'label': sp.ones((150,)),
'data': sp.ones((150, 4)),
},
})
try:
mock = assert_warns(DeprecationWarning, fetch_mldata,
'mock', data_home=tmpdata)
for n in ["COL_NAMES", "DESCR", "target", "data"]:
assert_in(n, mock)
assert_equal(mock.target.shape, (150,))
assert_equal(mock.data.shape, (150, 4))
assert_raises(datasets.mldata.HTTPError,
assert_warns, DeprecationWarning,
fetch_mldata, 'not_existing_name')
finally:
datasets.mldata.urlopen = _urlopen_ref
示例3: test_fetch_one_column
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def test_fetch_one_column(tmpdata):
_urlopen_ref = datasets.mldata.urlopen
try:
dataname = 'onecol'
# create fake data set in cache
x = sp.arange(6).reshape(2, 3)
datasets.mldata.urlopen = mock_mldata_urlopen({dataname: {'x': x}})
dset = fetch_mldata(dataname, data_home=tmpdata)
for n in ["COL_NAMES", "DESCR", "data"]:
assert_in(n, dset)
assert_not_in("target", dset)
assert_equal(dset.data.shape, (2, 3))
assert_array_equal(dset.data, x)
# transposing the data array
dset = fetch_mldata(dataname, transpose_data=False, data_home=tmpdata)
assert_equal(dset.data.shape, (3, 2))
finally:
datasets.mldata.urlopen = _urlopen_ref
示例4: mnist
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def mnist(missingness="mcar", thr=0.2):
""" Loads corrupted MNIST
Parameters
----------
missingness: ('mcar', 'mar', 'mnar')
Type of missigness you want in your dataset
th: float between [0,1]
Percentage of missing data in generated data
Returns
-------
numpy.ndarray
"""
from sklearn.datasets import fetch_mldata
dataset = fetch_mldata('MNIST original')
corruptor = Corruptor(dataset.data, thr=thr)
data = getattr(corruptor, missingness)()
return {"X": data, "Y": dataset.target}
示例5: train
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def train(self, n_epochs, batch_size=128, save_interval=50):
mnist = fetch_mldata('MNIST original')
X = mnist.data
y = mnist.target
# Rescale [-1, 1]
X = (X.astype(np.float32) - 127.5) / 127.5
for epoch in range(n_epochs):
# Select a random half batch of images
idx = np.random.randint(0, X.shape[0], batch_size)
imgs = X[idx]
# Train the Autoencoder
loss, _ = self.autoencoder.train_on_batch(imgs, imgs)
# Display the progress
print ("%d [D loss: %f]" % (epoch, loss))
# If at save interval => save generated image samples
if epoch % save_interval == 0:
self.save_imgs(epoch, X)
示例6: test_download
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def test_download():
"""Test that fetch_mldata is able to download and cache a data set."""
_urlopen_ref = datasets.mldata.urlopen
datasets.mldata.urlopen = mock_mldata_urlopen({
'mock': {
'label': sp.ones((150,)),
'data': sp.ones((150, 4)),
},
})
try:
mock = fetch_mldata('mock', data_home=tmpdir)
for n in ["COL_NAMES", "DESCR", "target", "data"]:
assert_in(n, mock)
assert_equal(mock.target.shape, (150,))
assert_equal(mock.data.shape, (150, 4))
assert_raises(datasets.mldata.HTTPError,
fetch_mldata, 'not_existing_name')
finally:
datasets.mldata.urlopen = _urlopen_ref
示例7: test_fetch_one_column
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def test_fetch_one_column():
_urlopen_ref = datasets.mldata.urlopen
try:
dataname = 'onecol'
# create fake data set in cache
x = sp.arange(6).reshape(2, 3)
datasets.mldata.urlopen = mock_mldata_urlopen({dataname: {'x': x}})
dset = fetch_mldata(dataname, data_home=tmpdir)
for n in ["COL_NAMES", "DESCR", "data"]:
assert_in(n, dset)
assert_not_in("target", dset)
assert_equal(dset.data.shape, (2, 3))
assert_array_equal(dset.data, x)
# transposing the data array
dset = fetch_mldata(dataname, transpose_data=False, data_home=tmpdir)
assert_equal(dset.data.shape, (3, 2))
finally:
datasets.mldata.urlopen = _urlopen_ref
示例8: get_mnist
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def get_mnist():
""" Gets MNIST dataset """
np.random.seed(1234) # set seed for deterministic ordering
data_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
data_path = os.path.join(data_path, '../../data')
mnist = fetch_mldata('MNIST original', data_home=data_path)
p = np.random.permutation(mnist.data.shape[0])
X = mnist.data[p].astype(np.float32)*0.02
Y = mnist.target[p]
return X, Y
示例9: get_mnist
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def get_mnist():
np.random.seed(1234) # set seed for deterministic ordering
data_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
data_path = os.path.join(data_path, '../../data')
mnist = fetch_mldata('MNIST original', data_home=data_path)
p = np.random.permutation(mnist.data.shape[0])
X = mnist.data[p].astype(np.float32)*0.02
Y = mnist.target[p]
return X, Y
示例10: MNIST_dataload
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def MNIST_dataload():
from sklearn.datasets import fetch_mldata
import numpy as np
mnist = fetch_mldata('MNIST original')
Data = mnist.data
label = mnist.target
return Data,label
示例11: load_data_target
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def load_data_target(name):
"""
Loads data and target given the name of the dataset.
"""
if name == "Boston":
data = load_boston()
elif name == "Housing":
data = fetch_california_housing()
dataset_size = 1000 # this is necessary so that SVR does not slow down too much
data["data"] = data["data"][:dataset_size]
data["target"] =data["target"][:dataset_size]
elif name == "digits":
data = load_digits()
elif name == "Climate Model Crashes":
try:
data = fetch_mldata("climate-model-simulation-crashes")
except HTTPError as e:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00252/pop_failures.dat"
data = urlopen(url).read().split('\n')[1:]
data = [[float(v) for v in d.split()] for d in data]
samples = np.array(data)
data = dict()
data["data"] = samples[:, :-1]
data["target"] = np.array(samples[:, -1], dtype=np.int)
else:
raise ValueError("dataset not supported.")
return data["data"], data["target"]
示例12: training_data
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def training_data():
"""Get the `MNIST original` training data."""
_np.random.seed(1)
permutation = _np.random.permutation(range(60000))
mnist = _fetch_mldata('MNIST original',
data_home=_os.path.join(_DATA_FOLDER,
'MNIST_original'))
return (mnist.data[:60000, :][permutation, :].reshape((60000, 1, 28, 28)).astype('float32'),
mnist.target[:60000][permutation].reshape((60000, 1)).astype('float32'))
示例13: test_data
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def test_data():
"""Get the `MNIST original` test data."""
mnist = _fetch_mldata('MNIST original',
data_home=_os.path.join(_DATA_FOLDER,
'MNIST_original'))
return (mnist.data[60000:, :].reshape((10000, 1, 28, 28)).astype('float32'),
mnist.target[60000:].reshape((10000, 1)).astype('float32'))
示例14: main
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def main():
from sklearn.datasets import load_digits, fetch_mldata
SMALL_MNIST = False
if SMALL_MNIST:
mnist_digits = load_digits()
n_input = np.prod(mnist_digits.images.shape[1:])
n_images = len(mnist_digits.images) # 1797
data_images = mnist_digits.images.reshape(n_images, -1) / 16. # -> 1797 x 64
data_targets = mnist_digits.target
# im_size_x, im_size_y = 8, 8
else:
mnist_digits = fetch_mldata('MNIST original')
n_input = np.prod(mnist_digits.data.shape[1:])
data_images = mnist_digits.data / 255. # -> 70000 x 284
data_targets = mnist_digits.target
# im_size_x, im_size_y = 28, 28
n_hidden, n_output = 5, 10
nn = NeuralNetworkClassifier(n_input, n_hidden, n_output)
weight_shapes = nn.get_weights_shapes()
weights = []
for weight_shape in weight_shapes:
weights.append(np.random.randn(*weight_shape))
nn.set_weights(*weights)
score = nn.score(data_images, data_targets)
print("Score is: ", score)
示例15: __init__
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_mldata [as 别名]
def __init__(self, traj, parameters):
super().__init__(traj)
if parameters.use_small_mnist:
# 8 x 8 images
mnist_digits = load_digits()
n_input = np.prod(mnist_digits.images.shape[1:])
n_images = len(mnist_digits.images) # 1797
data_images = mnist_digits.images.reshape(n_images, -1) / 16. # -> 1797 x 64
data_targets = mnist_digits.target
else:
# 28 x 28 images
mnist_digits = fetch_mldata('MNIST original')
n_input = np.prod(mnist_digits.data.shape[1:])
data_images = mnist_digits.data / 255. # -> 70000 x 284
n_images = len(data_images)
data_targets = mnist_digits.target
self.n_images = n_images
self.data_images, self.data_targets = data_images, data_targets
seed = parameters.seed
n_hidden = parameters.n_hidden
seed = np.uint32(seed)
self.random_state = np.random.RandomState(seed=seed)
n_output = 10 # This is always true for mnist
self.nn = NeuralNetworkClassifier(n_input, n_hidden, n_output)
self.random_state = np.random.RandomState(seed=seed)
# create_individual can be called because __init__ is complete except for traj initializtion
indiv_dict = self.create_individual()
for key, val in indiv_dict.items():
traj.individual.f_add_parameter(key, val)
traj.individual.f_add_parameter('seed', seed)