本文整理汇总了Python中samples.loadDataFile函数的典型用法代码示例。如果您正苦于以下问题:Python loadDataFile函数的具体用法?Python loadDataFile怎么用?Python loadDataFile使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了loadDataFile函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: readDigitData
def readDigitData(trainingSize=100, testSize=100):
rootdata = "digitdata/"
# loading digits data
rawTrainingData = samples.loadDataFile(
rootdata + "trainingimages", trainingSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
)
trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels", trainingSize)
rawValidationData = samples.loadDataFile(
rootdata + "validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
)
validationLabels = samples.loadLabelsFile(rootdata + "validationlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("digitdata/testimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize)
try:
print "Extracting features..."
featureFunction = dataClassifier.basicFeatureExtractorDigit
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
except:
display("An exception was raised while extracting basic features: \n %s" % getExceptionTraceBack())
return (
trainingData,
trainingLabels,
validationData,
validationLabels,
rawTrainingData,
rawValidationData,
testData,
testLabels,
rawTestData,
)
示例2: get_neuron_training_data
def get_neuron_training_data():
training_data = samples.loadDataFile("digitdata/trainingimages", num_train_examples, 28, 28)
training_labels = np.array(samples.loadLabelsFile("digitdata/traininglabels", num_train_examples))
training_labels = training_labels == 3
featurized_training_data = np.array(map(dcu.simple_image_featurization, training_data))
return training_data, featurized_training_data, training_labels
示例3: get_neuron_test_data
def get_neuron_test_data():
test_data = samples.loadDataFile("digitdata/testimages", 1000, 28,28)
test_labels = np.array(samples.loadLabelsFile("digitdata/testlabels", 1000))
test_labels = test_labels == 3
featurized_test_data = np.array(map(dcu.simple_image_featurization, test_data))
return test_data, featurized_test_data, test_labels
示例4: runClassifier
def runClassifier(args, options):
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
numTest = options.test
if(options.data=="faces"):
print "loading face data set"
rawTrainingData = samples.loadDataFile("facedata/facedatatrain",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels")
rawValidationData = samples.loadDataFile("facedata/facedatavalidation",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatavalidationlabels")
rawTestData = samples.loadDataFile("facedata/facedatatest", FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels")
rawTrainingData,trainingLabels=randomSample(rawTrainingData,trainingLabels,numTraining)
rawTestData,testLabels=randomSample(rawTestData,testLabels,numTest)
else:
print "loading digit data set"
rawTrainingData = samples.loadDataFile("digitdata/trainingimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels")
rawValidationData = samples.loadDataFile("digitdata/validationimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels")
rawTestData = samples.loadDataFile("digitdata/testimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels")
rawTrainingData, trainingLabels = randomSample(rawTrainingData, trainingLabels, numTraining)
rawTestData, testLabels = randomSample(rawTestData, testLabels, numTest)
print "Extracting features..."
if (options.classifier == "linear_svm"):
if (options.data == "faces"):
featureFunction = HogFeatureFaceImg
else:
featureFunction=HogFeatureImgDigit
trainingData = map(featureFunction, rawTrainingData)
trainingData=np.array(trainingData).transpose()
validationData=map(featureFunction, rawValidationData)
validationData = np.array(validationData).transpose()
testData=map(featureFunction, rawTestData)
testData = np.array(testData).transpose()
else:
if (options.data == "faces"):
featureFunction = enhancedFeatureExtractorFace
else:
featureFunction = enhancedFeatureExtractorDigit
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
print "Training..."
start = timeit.default_timer()
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
stop = timeit.default_timer()
print stop - start, " s"
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
示例5: runClassifier
def runClassifier(args, options):
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
if(options.data=="faces"):
rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
# Extract features
print "Extracting features..."
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Training..."
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
# do odds ratio computation if specified at command line
if((options.odds) & (options.classifier != "mostFrequent")):
label1, label2 = options.label1, options.label2
features_odds = classifier.findHighOddsFeatures(label1,label2)
if(options.classifier == "naiveBayes" or options.classifier == "nb"):
string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
else:
string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)
print string3
printImage(features_odds)
示例6: runClassifier
def runClassifier(args, options):
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
if(options.data=="faces"):
rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
# Extract features
print "Extracting features..."
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Training..."
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
示例7: runClassifier
def runClassifier(args, options):
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
numTest = options.test
if(options.data=="pacman"):
agentToClone = args.get('agentToClone', None)
trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None))
trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2]
rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining)
rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest)
rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
# Extract features
print "Extracting features..."
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Training..."
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
# do odds ratio computation if specified at command line
if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
label1, label2 = options.label1, options.label2
features_odds = classifier.findHighOddsFeatures(label1,label2)
if(options.classifier == "naiveBayes" or options.classifier == "nb"):
string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
else:
string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)
print string3
printImage(features_odds)
if((options.weights) & (options.classifier == "perceptron")):
for l in classifier.legalLabels:
features_weights = classifier.findHighWeightFeatures(l)
print ("=== Features with high weight for label %d ==="%l)
printImage(features_weights)
示例8: writeLabeledData
import numpy as np
def writeLabeledData(prefix, labeled_data):
datums, labels = zip(*labeled_data)
with open(prefix + "images", 'w') as f:
for datum in datums:
f.write(str(datum) + "\n")
f.close()
with open(prefix + "labels", 'w') as f:
for label in labels:
f.write(str(label) + "\n")
f.close()
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000)
rawValidationData = samples.loadDataFile("digitdata/validationimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 1000)
rawTestData = samples.loadDataFile("digitdata/testimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", 1000)
all_data = rawTrainingData + rawValidationData + rawTestData
all_labels = trainingLabels + validationLabels + testLabels
labeled_data = zip(all_data, all_labels)
perm = np.random.permutation(len(labeled_data))
permuted_data = []
示例9: runClassifier
def runClassifier(args, options):
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
numTest = options.test
if(options.data=="faces"):
rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest)
rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
# Extract features
print "Extracting features..."
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Start training..."
start = time.time()
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
end = time.time() - start
print "Traning time: " + str(end)
print "Start validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print "Validation result: ", str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Start testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print "Testing result: ", str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
#analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
# do odds ratio computation if specified at command line
if((options.odds) & (options.classifier == NB) ):
label1, label2 = options.label1, options.label2
features_odds = classifier.findHighOddsFeatures(label1,label2)
if(options.classifier == NB):
string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
else:
string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)
print string3
printImage(features_odds)
if((options.weights) & (options.classifier == PT)):
for l in classifier.legalLabels:
features_weights = classifier.findHighWeightFeatures(l)
print ("=== Features with high weight for label %d ==="%l)
printImage(features_weights)
示例10: runClassifier
def runClassifier(args, options):
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options['train']
if(options['data']=="faces"):
rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
# Extract features
print "Extracting features..."
trainingData = map(featureFunction, rawTrainingData)
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Training..."
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
util.pause()
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
# do odds ratio computation if specified at command line
if((options['odds']) & (options['classifier'] != "mostfrequent")):
class1, class2 = options['class1'], options['class2']
features_class1,features_class2,features_odds = classifier.findHighOddsFeatures(class1,class2)
if(options['classifier'] == "naivebayes"):
string1 = "=== Features with max P(F_i = 1 | class = %d) ===" % class1
string2 = "=== Features with max P(F_i = 1 | class = %d) ===" % class2
string3 = "=== Features with highest odd ratio of class %d over class %d ===" % (class1, class2)
else:
string1 = "=== Features with largest weight for class %d ===" % class1
string2 = "=== Features with largest weight for class %d ===" % class2
string3 = "=== Features with for which weight(class %d)-weight(class %d) is biggest ===" % (class1, class2)
print string1
printImage(features_class1)
print string2
printImage(features_class2)
print string3
printImage(features_odds)
示例11:
"""This file is in Beta and is not the real autograder."""
import data_classification_utils as dcu
import samples
import numpy as np
training_data = samples.loadDataFile("digitdata/trainingimages", 1, 28, 28)
features = dcu.simple_image_featurization(training_data[0])
expected = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 2., 2., 2., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 2., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 2., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 2., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 2., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 2., 2., 2., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 2., 2., 2., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
if not np.array_equal(features, expected):
print("Error, featurization is incorrect. You reported: ")
print(features)
示例12: runClassifier
def runClassifier(args, options):
#print 'args: ', args
#print 'options', options
featureFunction = args['featureFunction']
classifier = args['classifier']
printImage = args['printImage']
# Load data
numTraining = options.training
numTest = options.test
if(options.data=="faces"):
rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest)
rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest)
else:
rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
# Extract features
#print "Extracting features..."
#print '#######type of rawTrainingData is', rawTrainingData.__class__ # list of Datum
#print '#######type of rawTrainingData[0] is', rawTrainingData[0].__class__ # Datum
trainingData = map(featureFunction, rawTrainingData)
#print '#######type of trainingData is', trainingData.__class__ # list of Counter
#print '#######type of trainingData[0] is', trainingData[0].__class__ # Counter
validationData = map(featureFunction, rawValidationData)
testData = map(featureFunction, rawTestData)
# Conduct training and testing
print "Training..."
classifier.train(trainingData, trainingLabels, validationData, validationLabels)
print "Validating..."
guesses = classifier.classify(validationData)
correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
print "Testing..."
guesses = classifier.classify(testData)
correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
# do odds ratio computation if specified at command line
if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
label1, label2 = options.label1, options.label2
features_odds = classifier.findHighOddsFeatures(label1,label2)
if(options.classifier == "naiveBayes" or options.classifier == "nb"):
string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
else:
string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)
print string3
printImage(features_odds)
if((options.weights) & (options.classifier == "perceptron")):
for l in classifier.legalLabels:
features_weights = classifier.findHighWeightFeatures(l)
print ("=== Features with high weight for label %d ==="%l)
printImage(features_weights)