本文整理汇总了Python中sklearn.datasets.fetch_mldata函数的典型用法代码示例。如果您正苦于以下问题:Python fetch_mldata函数的具体用法?Python fetch_mldata怎么用?Python fetch_mldata使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fetch_mldata函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__( self, use_mnist=False ):
self.use_mnist = use_mnist
if self.use_mnist:
#self.digits = fetch_mldata('MNIST original')
self.mnist_digits_train = fetch_mldata('MNIST original', subset='train')
self.mnist_digits_test = fetch_mldata('MNIST original', subset='test')
else:
self.digits = load_digits()
self.X = self.digits.data
self.y = self.digits.target
self.best_f1_score = 0
self.best_score = 0
"""
示例2: testScript
def testScript():
print "\n---> Started Logistic Regression - Iris dataset - Own function - k class...\n"
attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
#attributes, outcomes = np.array(attributes), np.array(outcomes)
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=True)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Iris dataset - Inbuilt function - k class...\n"
attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
#attributes, outcomes = np.array(attributes), np.array(outcomes)
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Digits dataset - Own function - k class...\n"
mnist = datasets.fetch_mldata('MNIST original')
X, y = mnist.data / 255., mnist.target
attributes = X[:20000]
outcomes = y[:20000]
#print list(set(outcomes))
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Digits dataset - Inbuilt function - k class...\n"
mnist = datasets.fetch_mldata('MNIST original')
X, y = mnist.data / 255., mnist.target
attributes = X[:20000]
outcomes = y[:20000]
#print list(set(outcomes))
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
示例3: main
def main():
files = [
join(SCRIPT_DIR, "train_x.npy"),
join(SCRIPT_DIR, "train_y.npy"),
join(SCRIPT_DIR, "validate_x.npy"),
join(SCRIPT_DIR, "validate_y.npy"),
join(SCRIPT_DIR, "test_x.npy"),
join(SCRIPT_DIR, "test_y.npy")
]
if all([exists(fname) and stat(fname).st_size > 100 for fname in files]):
print("Already downloaded. Skipping")
else:
mnist = fetch_mldata('MNIST original')
np.random.seed(1234)
data = mnist.data
target = mnist.target
indices = np.arange(len(data))
np.random.shuffle(indices)
data = data[indices]
target = target[indices]
train_x, train_y = (data[:-10000].astype(np.float32) / 255.0).astype(np.float32), target[:-10000].astype(np.int32)
test_x, test_y = (data[-10000:].astype(np.float32) / 255.0).astype(np.float32), target[-10000:].astype(np.int32)
np.save(join(SCRIPT_DIR, "train_x.npy"), train_x[:int(0.9 * train_x.shape[0])])
np.save(join(SCRIPT_DIR, "train_y.npy"), train_y[:int(0.9 * train_y.shape[0])])
np.save(join(SCRIPT_DIR, "validate_x.npy"), train_x[int(0.9 * train_x.shape[0]):])
np.save(join(SCRIPT_DIR, "validate_y.npy"), train_y[int(0.9 * train_y.shape[0]):])
np.save(join(SCRIPT_DIR, "test_x.npy"), test_x)
np.save(join(SCRIPT_DIR, "test_y.npy"), test_y)
print("Done.")
示例4: prepare_dataset
def prepare_dataset():
print('load MNIST dataset')
mnist = fetch_mldata('MNIST original')
mnist['data'] = mnist['data'].astype(np.float32)
mnist['data'] /= 255
mnist['target'] = mnist['target'].astype(np.int32)
return mnist
示例5: load
def load(config, test=False):
"""Load MNIST dataset using scikit-learn. Returns a dict with the
following entries:
- images: n x 28 x 28 array
- data: n x 784 array
- target: n array
"""
dataset = fetch_mldata('mnist-original')
X, y = dataset.data, dataset.target
X = X.astype(np.float32) / 255.0
if test:
idx_start, idx_end = config['test_set']
else:
idx_start, idx_end = config['train_set']
X, y = shuffle(X, y, random_state=42)
X = X[idx_start:idx_end]
y = y[idx_start:idx_end]
return {
'images': X.reshape(-1, 28, 28),
'data': X,
'target': y,
}
示例6: get_mnist
def get_mnist(start=None, end=None, random=False, num=None):
mnist = fetch_mldata('MNIST original', data_home='~/diss/mnist')
if random is not None and num is not None:
idx = np.random.choice(range(mnist.data.shape[0]), num)
elif start is not None and end is not None:
idx = range(start, end)
return mnist.data[idx], mnist.target[idx]
示例7: main
def main(description, gpu, output):
logging.basicConfig(level=logging.INFO)
logging.info('fetch MNIST dataset')
mnist = fetch_mldata(description)
mnist.data = mnist.data.astype(numpy.float32)
mnist.data /= 255
mnist.target = mnist.target.astype(numpy.int32)
data_train, data_test, target_train, target_test = train_test_split(mnist.data, mnist.target)
data = data_train, data_test
target = target_train, target_test
start_time = time.time()
if gpu >= 0:
cuda.check_cuda_available()
cuda.get_device(gpu).use()
logging.info("Using gpu device {}".format(gpu))
else:
logging.info("Not using gpu device")
mlp = MLP(data=data, target=target, gpu=gpu)
mlp.train_and_test(n_epoch=1)
end_time = time.time()
logging.info("time = {} min".format((end_time - start_time) / 60.0))
logging.info('saving trained mlp into {}'.format(output))
with open(output, 'wb') as fp:
pickle.dump(mlp, fp)
示例8: test_classifier_chain_vs_independent_models
def test_classifier_chain_vs_independent_models():
# Verify that an ensemble of classifier chains (each of length
# N) can achieve a higher Jaccard similarity score than N independent
# models
yeast = fetch_mldata('yeast')
X = yeast['data']
Y = yeast['target'].transpose().toarray()
X_train = X[:2000, :]
X_test = X[2000:, :]
Y_train = Y[:2000, :]
Y_test = Y[2000:, :]
ovr = OneVsRestClassifier(LogisticRegression())
ovr.fit(X_train, Y_train)
Y_pred_ovr = ovr.predict(X_test)
chain = ClassifierChain(LogisticRegression(),
order=np.array([0, 2, 4, 6, 8, 10,
12, 1, 3, 5, 7, 9,
11, 13]))
chain.fit(X_train, Y_train)
Y_pred_chain = chain.predict(X_test)
assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
jaccard_similarity_score(Y_test, Y_pred_ovr))
示例9: load_script
def load_script(script_vars):
def define(var_name, fun, overwrite=False):
if script_vars.has_key(var_name) and not overwrite:
print('%s is already defined' % var_name)
return script_vars[var_name]
else:
print('computing variables %s' % var_name)
value = fun()
script_vars[var_name] = value
globals()[var_name] = value
return value
print(globals().keys())
custom_data_home="/home/stefan2/mnistdata"
custom_data_home="/home/stefan2/mnistdata"
define('mnist', lambda: fetch_mldata('MNIST original', data_home=custom_data_home))
data = mnist.data.astype(float) #[0:100,:] #convert to float
labels = mnist.target #[0:100]
n,m = data.shape
print("num data points %s" % n)
#run the method after successive othogonalization
for j in range(0, 50):
print("iteration: " + str(j))
res = find_dominant_directions(data)
plot_vector_png("pattern_" + str(j), res)
for i in range(0, n):
v = data[i,:]
proj = np.reshape(v, (1, m)).dot(np.reshape(res, (m,1)))[0,0]
data[i,:] = v - proj*res
示例10: test_configs
def test_configs():
from sklearn import datasets
from datetime import datetime
import sys
import os
import logging
log = logging.getLogger()
handler = logging.StreamHandler(sys.stdout)
fmt = logging.Formatter('%(asctime)s %(levelname)s: %(message)s','%Y-%m-%d %H:%M:%S')
handler.setFormatter(fmt)
log.addHandler(handler)
log.setLevel(logging.DEBUG)
custom_data_home = os.getcwd() + '/sk_data'
digits = datasets.fetch_mldata('MNIST original', data_home=custom_data_home)
X = np.asarray(digits.data, 'float32')
X = X
# images = [imresize(im.reshape(28, 28), (32, 32)) for im in X]
# X = np.vstack([im.flatten() for im in images])
X[X < 128] = 0
X[X >= 128] = 1
X /= 256.
models = []
for w_sigma in [.1, .5, 1, 2, 5]:
for sparsity in [.001, .01, .05, .1, .5]:
log.info('Building RBM_dl:\n w_sigma=%s\n sparsity=%s' %(w_sigma,sparsity,))
model = ConvRBM((28, 28), 40, w_size=11, n_iter=3, verbose=True, w_sigma=w_sigma, sparsity=sparsity)
model.fit(X)
models.append({
'model' : model,
'w_sigma' : w_sigma,
'sparsity' : sparsity,
})
log.info('Done')
return models
示例11: run
def run(data_path):
print "Reading the dataset:", data_path
## http://continuum.io/blog/wiserf-use-cases-and-benchmarks
mnist = fetch_mldata('MNIST original')
# Define training and testing sets
inds = arange(len(mnist.data))
test_i = random.sample(xrange(len(inds)), int(0.1 * len(inds)))
train_i = numpy.delete(inds, test_i)
X_train = mnist.data[train_i].astype(numpy.double)
y_train = mnist.target[train_i].astype(numpy.double)
X_test = mnist.data[test_i].astype(numpy.double)
y_test = mnist.target[test_i].astype(numpy.double)
# Trunk the data
X_digits, y_digits = shuffle(X_train, y_train)
X_digits_train = X_digits[:1000]
y_digits_train = y_digits[:1000]
X_digits_valid = X_digits[1000:2000]
y_digits_valid = y_digits[1000:2000]
X_digits_test = X_digits[2000:3000]
y_digits_test = y_digits[2000:3000]
knn_digits = KNeighborsClassifier(n_neighbors=10)
knn_digits.fit(X_digits_train, y_digits_train)
print "KNN validation accuracy on MNIST digits: ",
print knn_digits.score(X_digits_valid, y_digits_valid)
示例12: main
def main():
print '... get mnist data'
mnist = fetch_mldata('MNIST original', data_home='.')
fig, axes = plt.subplots(5, 3, figsize=(6, 8))
data = mnist.data[[0, 7000, 14000, 21000, 28000]]
print '... start training'
for i, (axrow, img) in enumerate(zip(axes, data)):
img = img.reshape(28, 28)
img = (img >= 128).astype(int)
corrupted = get_corrupted_input(img, 0.05)
mrf = MRF(corrupted)
if i == 0:
axes[i][0].set_title('元画像')
axes[i][1].set_title('ノイズあり')
axes[i][2].set_title('ノイズ除去')
axes[i][0].imshow(img, cmap=cm.Greys_r)
axes[i][1].imshow(corrupted, cmap=cm.Greys_r)
axes[i][2].imshow(mrf.denoised, cmap=cm.Greys_r)
for ax in axrow:
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.show()
示例13: load
def load(train_n, test_n):
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.data = mnist.data.astype(np.float32) / 256.0
mnist.target = mnist.target.astype(np.int32)
N = len(mnist.data)
order = np.random.permutation(N)
train = {i: [] for i in range(10)}
test = {i: [] for i in range(10)}
train_m = math.ceil(train_n / 10)
train_sum = 0
test_m = math.ceil(test_n / 10)
test_sum = 0
for i in range(N):
x = mnist.data[order[i]]
y = mnist.target[order[i]]
if train_sum < train_n and len(train[y]) < train_m:
train[y].append(x)
train_sum += 1
if test_sum < test_n and len(test[y]) < test_m:
test[y].append(x)
test_sum += 1
return train, test
示例14: download__by_category
def download__by_category():
# mnist = fetch_mldata('MNIST original')
mnist = fetch_mldata('MNIST original')
# mnist.data = random.sample(mnist.data, 1000)
# mnist.target = random.sample(mnist.target, 1000)
# mnist.data (70000, 784), mnist.target (70000, 1)
trainX, trainY = mnist.data[:-10000], mnist.target[:-10000]
testX, testY = mnist.data[-10000:], mnist.target[-10000:]
if not exists('train'):
os.makedirs('train')
x = {i:[] for i in range(10)}
for i in range(len(trainY)):
tmp = x[trainY[i]]
tmp.append(trainX[i])
x[trainY[i]] = tmp
for i in range(10):
cPickle.dump(x[i], open(join('train', '{}.pkl'.format(i)), 'w+'))
if not exists('test'):
os.makedirs('test')
x = {i:[] for i in range(10)}
for i in range(len(testY)):
tmp = x[testY[i]]
tmp.append(testX[i])
x[testY[i]] = tmp
for i in range(10):
cPickle.dump(x[i], open(join('test', '{}.pkl'.format(i)), 'w+'))
示例15: make_data
def make_data(N):
print("fetch MNIST dataset")
mnist = fetch_mldata('MNIST original',data_home='.')
mnist.data = mnist.data.astype(np.float32)
mnist.data /= 255
mnist.taret = mnist.target.astype(np.int32)
# make y label
mnist_target = np.zeros((mnist.target.shape[0],10))
for index, num in enumerate(mnist.target):
mnist_target[index][num] = 1.
# print(mnist_target)
# mazemaze
index = random.sample(range(mnist.target.shape[0]), (mnist.target.shape[0]))
tmp_target = [mnist_target[i] for i in index]
tmp_data = [mnist.data[i] for i in index]
# print("N : ", len(tmp_target))
# print("tmp_target : ", tmp_target)
x_train, x_test = np.split(tmp_data, [N])
y_train, y_test = np.split(tmp_target, [N])
return [x_train, x_test, y_train, y_test]