本文整理汇总了Python中pybrain.datasets.ClassificationDataSet类的典型用法代码示例。如果您正苦于以下问题:Python ClassificationDataSet类的具体用法?Python ClassificationDataSet怎么用?Python ClassificationDataSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ClassificationDataSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: batch_classify
def batch_classify(self, samples):
ds = ClassificationDataSet(len(self._fx))
for sample in samples:
fvec = [sample[l] for l in self._fx]
ds.addSample(fvec, [0])
results = self._trainer.testOnClassData(ds)
return [self._rmap[r] for r in results]
示例2: generate_data
def generate_data(n=400):
INPUT_FEATURES = 2
CLASSES = 3
#means = [(-1, 0), (2, 4), (3, 1)]
#cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
#minX, maxX = means[0][0], means[0][0]
#minY, maxY = means[0][1], means[0][1]
#print minX, maxX , minY, maxY
# #for i in range(n):
# for klass in range(CLASSES):
# features = multivariate_normal(means[klass], cov[klass])
# #print means[klass], cov[klass]
# #print features
# x, y = features
# minX, maxX = min(minX, x), max(maxX, x)
# minY, maxY = min(minY, y), max(maxY, y)
# alldata.addSample(features, [klass])
#print alldata
alldata.addSample([0,0], [0])
alldata.addSample([0,1], [1])
alldata.addSample([1,0], [1])
alldata.addSample([1,1], [0])
return {'minX': 0, 'maxX': 1,
'minY': 0, 'maxY': 1, 'd': alldata}
示例3: __init__
class NeuralNetLearner:
def __init__(self):
self.bunch = load_digits()
self.X = np.asarray(self.bunch.data, 'float32')
self.Y = np.asarray(self.bunch.target, 'float32')
#self.X, self.Y = nudge_dataset(self.X, self.bunch.target)
self.X = (self.X - np.min(self.X, 0)) / (np.max(self.X, 0) + 0.0001) # 0-1 scaling
self.ds = ClassificationDataSet(64, nb_classes=10, class_labels=self.bunch.target_names)
for (x, y) in zip(self.X, self.Y):
self.ds.addSample(x, y)
self.test_data, self.train_data = self.ds.splitWithProportion(0.3)
self.network = buildNetwork(64, 10, 1)
def get_datasets(self):
return self.train_data, self.test_data
def activate(self, x):
self.network.activate(x.tolist())
def fitness_func(self, x):
if not (x.size == 64):
print("Bad input vector: ", x)
return
sum_of_squared_error = 0
for (input, target) in self.ds:
sum_of_squared_error += (target - self.activate(input.tolist()))
return (sum_of_squared_error / self.ds.length)
def get_weights(self):
return
示例4: read_data
def read_data(filename):
"""
See http://www.pybrain.org/docs/api/datasets/classificationdataset.html
Reads a (naive) csv file of data and converts it into
a ClassificationDataSet. 'Naive' in this case means
the data can be parsed by splitting on commas - i.e.,
no quotations or escapes. I picked this file format
because it should be trivial to convert all our data into it.
Raises an exception when an IO error occurs.
Parameters:
filename - The name of the file containing the data.
"""
data_file = open(filename, "r")
data_lines = [line.split(',') for line in data_file.readlines()]
data_file.close()
features = [[float(f) for f in line[0:-1]] for line in data_lines]
classes = [[int(line[-1])] for line in data_lines]
# Workaround to make classifications zero-based
class_min = min([c[0] for c in classes])
for i in range(len(classes)):
classes[i][0] -= class_min
data_set = ClassificationDataSet(len(features[0]))
for feature_vector, classification in zip(features, classes):
data_set.addSample(feature_vector, classification)
return data_set
示例5: build_dataset
def build_dataset(data_pair):
inputs, classes = data_pair
ds = ClassificationDataSet(256)
data = zip(inputs, classes)
for (inp, c) in data:
ds.appendLinked(inp, [c])
return ds
示例6: getdata
def getdata(do_preprocessing, full_data):
'''
fetch and format the match data according to the given flags
do_preprocessing: bool: true if preprocessing needs to be do_preprocessing
full_data: bool: false if the minimal data should be used
'''
print ("fetching data ...")
if full_data == 0 :
fn = getMinimalDatafromMatch
else:
fn = getBasicDatafromMatch
if globals.use_saved_data:
try:
with open('processed_data%d' % full_data) as outfile:
data = json.load(outfile)
except IOError:
matches = Match.objects.all()
data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
with open('processed_data%d' % full_data, 'w') as outfile:
json.dump(data,outfile)
else:
matches = Match.objects.all()
data = map(lambda x: (fn(x,do_preprocessing,False), x.won), matches)
data += map(lambda x: (fn(x,do_preprocessing,True), not x.won), matches)
with open('processed_data%d' % full_data, 'w') as outfile:
json.dump(data,outfile)
all_data = None
for input, won in data:
if all_data is None:
all_data = ClassificationDataSet(len(input), 1, nb_classes=2)
all_data.addSample(input, int(won))
return all_data
示例7: NeuralNetwork
class NeuralNetwork(BaseWorkflow):
def __init__(self, purpose='train', num_inputs=None, num_ouputs=None, classes=None, class_lables=None):
super(NeuralNetwork, self).__init__()
self.purpose = purpose
self.data_path = self.config.neural_net.get(self.purpose, None)
self.file_name = 'neural_net'
self.all_data = ClassificationDataSet(num_inputs,
num_ouputs,
nb_classes=classes,
class_labels=class_lables)
self.train = None
self.test = None
self.neural_network = None
self.train_result = None
self.test_result = None
self.cross_validation_result = None
def process(self):
self.prepare_train_test()
self.build_network()
trainer = self.train_network(dataset=self.train)
self.score_train_test(trainer=trainer)
self.cross_validate(dataset=self.all_data)
def add_sample(self, correlogram_matrix=None, target=None, sample_path=None):
self.all_data.addSample(correlogram_matrix, target)
logger.info('sample added from {sample_path}'.format(sample_path=sample_path))
def prepare_train_test(self):
self.test, self.train = self.all_data.splitWithProportion(0.25)
def build_network(self):
self.neural_network = buildNetwork(self.train.indim, 7, self.train.outdim, outclass=SoftmaxLayer) # feed forward network
def train_network(self, dataset=None):
starter_trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
starter_trainer.trainUntilConvergence(validationProportion=0.25, maxEpochs=100)
return starter_trainer
def score_train_test(self, trainer=None):
self.test_result = percentError(trainer.testOnClassData(dataset=self.test), self.test['class'])
logger.info('test error result: {result}'.format(result=self.test_result))
self.train_result = percentError(trainer.testOnClassData(dataset=self.train), self.train['class'] )
logger.info('train error result: {result}'.format(result=self.train_result))
def cross_validate(self, dataset=None):
trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01)
validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10)
mean_validation_result = validator.validate()
self.cross_validation_result = mean_validation_result
logger.info('cross val result: {result}'.format(result=self.cross_validation_result))
@staticmethod
def save_network_to_xml(net=None, file_name=None):
NetworkWriter.writeToFile(net, file_name)
@staticmethod
def read_network_from_xml(file_name=None):
return NetworkReader.readFrom(file_name)
示例8: importFromCSV
def importFromCSV(self, fileName, numInputs, numClasses):
"""
Function that reads in a CSV file and passes on to the pybrain
neural net dataset structure to be used with the library's
neural net classes.
It expects that the last columns (determined by numOutputs) to be
the classification columns.
"""
dataSet = None
dataFile = open(fileName)
line = dataFile.readline()
data = [str(x) for x in line.strip().split(',') if x != '']
if(data[0] == '!labels:'):
labels = data[1:]
dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses, class_labels=labels)
line = dataFile.readline()
else:
dataSet = ClassificationDataSet(numInputs, nb_classes=numClasses)
while line != '':
data = [float(x) for x in line.strip().split(',') if x != '']
inputData = data[:numInputs]
outputData = data[-1:]
dataSet.addSample(inputData, outputData)
line = dataFile.readline()
dataFile.close()
return dataSet
示例9: load_data
def load_data(filename):
"""
load dataset for classification
"""
assert os.path.exists(filename)==True
dat = scipy.io.loadmat(filename)
inputs = dat['inputs']
#print len(inputs)
targets = dat['targets']
#print len(targets)
assert len(inputs)==len(targets)
global alldata
global indim
global outdim
indim = len(inputs[0])
outdim = 1
#print indim
alldata = ClassificationDataSet(indim, outdim, nb_classes = 8)
alldata.setField('input',inputs)
alldata.setField('target',targets)
assert len(alldata['input'])==len(alldata['target'])
print type(alldata)
示例10: test
def test(self,filename,classes,trainer,net):
testLabels = []
#load test data
tstdata = ClassificationDataSet(103, 1, nb_classes=classes)
tstdata = self.loaddata(filename, classes)
testLabels = tstdata['target'];
# some sort of mandatory conversion
tstdata._convertToOneOfMany()
# using numpy array
output = np.array([net.activate(x) for x, _ in tstdata])
output = output.argmax(axis=1)
print(output)
print("on test data",percentError( output, tstdata['class'] ))
for i, l in enumerate(output):
print l, '->', testLabels[i][0]
# alternate version - using activateOnDataset function
out = net.activateOnDataset(tstdata).argmax(axis=1)
print out
return percentError( out, tstdata['class'])
示例11: trainModel
def trainModel(self):
self.finalDataSet = np.c_[self.flattenNumericalData, self.flattenCategoryData, self.flattenTargetDataConverted]
self.finalHeaderSet = self.flattenNumericalHeader + self.flattenCategoryHeader + self.flattenTargetHeader
self.nattributes = self.flattenNumericalData.shape[1] + self.flattenCategoryData.shape[1]
ds = ClassificationDataSet(self.nattributes, 1, nb_classes=self.nbClasses)
for rowData in self.finalDataSet:
target = rowData[-1]
variables = rowData[0:-1]
ds.addSample(variables, target)
self.testDataSet, self.trainDataSet = ds.splitWithProportion(0.25)
self.testDataSet._convertToOneOfMany()
self.trainDataSet._convertToOneOfMany()
print self.testDataSet
print self.trainDataSet
self.net = buildNetwork(self.nattributes, self.nhiddenNerons, self.noutput, hiddenclass=TanhLayer, outclass=SigmoidLayer, bias=True)
self.trainer = BackpropTrainer(self.net, self.trainDataSet, learningrate=0.001, momentum=0.99)
begin0 = time.time()
# self.trainer.trainUntilConvergence(verbose=True, dataset=ds, validationProportion=0.25, maxEpochs=10)
for i in xrange(10):
begin = time.time()
self.trainer.trainEpochs(10)
end = time.time()
print 'iteration ', i, ' takes ', end-begin, 'seconds'
end0 = time.time()
print 'total time consumed: ', end0 - begin0
示例12: run_nn_fold
def run_nn_fold(training_data, test_data):
test_features, ignore, featureMap, labels, labelMap = fs.mutualinfo(training_data)
input_len = len(test_features[0])
num_classes = len(labelMap.keys())
train_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)
for i in range(len(test_features)):
train_ds.addSample(tuple(test_features[i]), (labels[i]))
train_ds._convertToOneOfMany()
net = buildNetwork(train_ds.indim, 2, train_ds.outdim, bias=True, hiddenclass=TanhLayer, outclass=SoftmaxLayer)
trainer = BackpropTrainer(net, train_ds, verbose=True)
print "training until convergence..."
trainer.trainUntilConvergence(maxEpochs=100)
print "done. testing..."
test_ds = ClassificationDataSet(input_len, 1,nb_classes=num_classes)
labels = []
for tweetinfo in test_data:
featuresFound = tweetinfo["Features"]
label = tweetinfo["Answer"]
labels.append(label)
features = [0]*len(featureMap.keys())
for feat in featuresFound:
if feat in featureMap:
features[ featureMap[feat] ] = 1
test_ds.addSample(tuple(features), (labelMap[label]))
test_ds._convertToOneOfMany()
tstresult = percentError( trainer.testOnClassData(
dataset=test_ds ), test_ds['class'] )
print tstresult
示例13: classifer
def classifer(labels, data):
""" data in format (value, label)
"""
clsff = ClassificationDataSet(2,class_labels=labels)
for d in data:
clsff.appendLinked(d[0], d[1])
clsff.calculateStatistics()
示例14: createnetwork
def createnetwork(n_hoglist,n_classlist,n_classnum,n_hiddensize=100):
n_inputdim=len(n_hoglist[0])
n_alldata = ClassificationDataSet(n_inputdim,1, nb_classes=n_classnum)
for i in range(len(n_hoglist)):
n_input = n_hoglist[i]
n_class = n_classlist[i]
n_alldata.addSample(n_input, [n_class])
n_tstdata, n_trndata = n_alldata.splitWithProportion( 0.25 )
n_trndata._convertToOneOfMany( )
n_tstdata._convertToOneOfMany( )
print "Number of training patterns: ", len(n_trndata)
print "Input and output dimensions: ", n_trndata.indim, n_trndata.outdim
print "First sample (input, target, class):"
print n_trndata['input'][0], n_trndata['target'][0], n_trndata['class'][0]
n_fnn = buildNetwork(n_trndata.indim,n_hiddensize, n_trndata.outdim, outclass=SoftmaxLayer)
n_trainer = BackpropTrainer(n_fnn, dataset=n_trndata, momentum=0.1, verbose=True, weightdecay=0.01)
n_result = 1
while n_result > 0.1:
print n_result
n_trainer.trainEpochs(1)
n_trnresult = percentError(n_trainer.testOnClassData(),
n_trndata['class'])
n_tstresult = percentError(n_trainer.testOnClassData(
dataset=n_tstdata), n_tstdata['class'])
print "epoch: %4d" % n_trainer.totalepochs, \
" train error: %5.2f%%" % n_trnresult, \
" test error: %5.2f%%" % n_tstresult
n_result = n_tstresult
示例15: build_dataset
def build_dataset(
mongo_collection, patch_size=IMG_SIZE, orig_size=IMG_SIZE, nb_classes=2, edgedetect=True, transform=True
):
# depricated
if edgedetect:
import cv2
from pybrain.datasets import SupervisedDataSet, ClassificationDataSet
patch_size = min(patch_size, orig_size)
trim = round((orig_size - patch_size) / 2)
# ds = SupervisedDataSet(patch_size**2, 1)
ds = ClassificationDataSet(patch_size ** 2, target=1, nb_classes=nb_classes)
cursor = list(mongo_collection.find())
for one_image in cursor:
# convert from binary to numpy array and transform
img_array = np.fromstring(one_image["image"], dtype="uint8")
if edgedetect:
img_array = cv2.Canny(img_array, 150, 200)
img_crop = img_array.reshape(orig_size, orig_size)[trim : (trim + patch_size), trim : (trim + patch_size)]
classification = float(one_image["class"])
if transform:
transformed = transform_img(img_crop.ravel(), patch_size)
else:
transformed = [img_crop.ravel()]
for one_img in transformed:
ds.addSample(one_img.ravel(), classification)
print("New dataset contains %d images (%d positive)." % (len(ds), sum(ds["target"])))
return ds