本文整理匯總了Python中shogun.Features.CombinedFeatures類的典型用法代碼示例。如果您正苦於以下問題:Python CombinedFeatures類的具體用法?Python CombinedFeatures怎麽用?Python CombinedFeatures使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了CombinedFeatures類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: evaluate
def evaluate(options, svm, kernel, features, motifs):
"""Evaluate examples using a trained kernel"""
query = MotifFinder(finder_settings=MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width))
query.setFastaFile(options.query)
query.setMotifs(options.qgff)
qmotifs, qpositions = query.getResults()
feats_query = CombinedFeatures()
wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS)
try:
assert set(qmotifs.keys()).issuperset(set(motifs))
except AssertionError:
print "The motif positions in the query sequence are incomplete, there are no positions for:"
print set(motifs).difference(qmotifs.keys())
raise
for motif in motifs:
feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif]))
query_positions = array(qpositions, dtype=float64)
query_positions = query_positions.T
rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions))
kernel.init(features, feats_query)
out = svm.classify().get_labels()
qgenes = query.getGenes()
ret_str = ""
print "#example\toutput\tsplit"
for i in xrange(len(out)):
if out[i] >= 0:
classif = "\tpositive\t"
else:
classif = "\tnegative\t"
ret_str += qgenes[i] + classif + str(out[i]) + "\n"
print str(i) + "\t" + str(out[i]) + "\t0"
return ret_str
示例2: get_weighted_spectrum_kernel
def get_weighted_spectrum_kernel(subfeats_list, options):
"""build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement)
Arguments:
subfeats_list -- list of sub-feature objects
options -- object containing option data
Return:
CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel
"""
kmerlen = options.kmerlen
kmerlen2 = options.kmerlen2
subkernels = 0
kernel = CombinedKernel()
feats = CombinedFeatures()
for subfeats in subfeats_list:
feats.append_feature_obj(subfeats)
for k in xrange(kmerlen, kmerlen2+1):
if k <= 8:
subkernel = CommWordStringKernel(10, False)
else:
subkernel = CommUlongStringKernel(10, False)
kernel.append_kernel(subkernel)
subkernels+=1
kernel.init(feats, feats)
kernel.set_subkernel_weights(numpy.array([1/float(subkernels)]*subkernels, numpy.dtype('float64')))
return kernel
示例3: create_combined_wd_features
def create_combined_wd_features(instances, feat_type):
"""
creates a combined wd feature object
"""
num_features = len(instances[0])
# contruct combined features
feat = CombinedFeatures()
for idx in range(num_features):
# cut column idx
data = [instance[idx] for instance in instances]
seq_len = len(data[0])
for seq in data:
if len(seq) != seq_len:
print "warning, seq lengths differ", len(seq), seq_len, "in", idx, "num_feat", num_features
tmp_feat = get_wd_features(data, feat_type)
feat.append_feature_obj(tmp_feat)
return feat
示例4: evaluation_cross_validation_multiclass_storage
def evaluation_cross_validation_multiclass_storage(traindat=traindat, label_traindat=label_traindat):
from shogun.Evaluation import CrossValidation, CrossValidationResult
from shogun.Evaluation import CrossValidationPrintOutput
from shogun.Evaluation import CrossValidationMKLStorage, CrossValidationMulticlassStorage
from shogun.Evaluation import MulticlassAccuracy, F1Measure
from shogun.Evaluation import StratifiedCrossValidationSplitting
from shogun.Features import MulticlassLabels
from shogun.Features import RealFeatures, CombinedFeatures
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Classifier import MKLMulticlass
from shogun.Mathematics import Statistics, MSG_DEBUG
# training data, combined features all on same data
features=RealFeatures(traindat)
comb_features=CombinedFeatures()
comb_features.append_feature_obj(features)
comb_features.append_feature_obj(features)
comb_features.append_feature_obj(features)
labels=MulticlassLabels(label_traindat)
# kernel, different Gaussians combined
kernel=CombinedKernel()
kernel.append_kernel(GaussianKernel(10, 0.1))
kernel.append_kernel(GaussianKernel(10, 1))
kernel.append_kernel(GaussianKernel(10, 2))
# create mkl using libsvm, due to a mem-bug, interleaved is not possible
svm=MKLMulticlass(1.0,kernel,labels);
svm.set_kernel(kernel);
# splitting strategy for 5 fold cross-validation (for classification its better
# to use "StratifiedCrossValidation", but the standard
# "StratifiedCrossValidationSplitting" is also available
splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)
# evaluation method
evaluation_criterium=MulticlassAccuracy()
# cross-validation instance
cross_validation=CrossValidation(svm, comb_features, labels,
splitting_strategy, evaluation_criterium)
cross_validation.set_autolock(False)
# append cross vlaidation output classes
#cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
#mkl_storage=CrossValidationMKLStorage()
#cross_validation.add_cross_validation_output(mkl_storage)
multiclass_storage=CrossValidationMulticlassStorage()
multiclass_storage.append_binary_evaluation(F1Measure())
cross_validation.add_cross_validation_output(multiclass_storage)
cross_validation.set_num_runs(3)
# perform cross-validation
result=cross_validation.evaluate()
roc_0_0_0 = multiclass_storage.get_fold_ROC(0,0,0)
#print roc_0_0_0
auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0,0,0,0)
#print auc_0_0_0
return roc_0_0_0, auc_0_0_0
示例5: predict
def predict(self, seq, chunk_size = int(10e6)):
"""
predicts on whole contig, splits up sequence in chunks of size chunk_size
"""
seq_len = len(seq)
num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
assert(num_chunks > 0)
sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))
start = 0
stop = min(chunk_size, seq_len)
out = []
# iterate over chunks
for chunk_idx in range(num_chunks):
sys.stderr.write("processing chunk #%i\n" % (chunk_idx))
assert (start < stop)
chunk = seq[start:stop]
assert(len(self.sensors) > 0)
tf = CombinedFeatures()
for i in xrange(len(self.sensors)):
f = self.sensors[i].get_test_features(chunk, self.window)
tf.append_feature_obj(f)
sys.stderr.write("initialising kernel...")
self.kernel.init(self.svs, tf)
sys.stderr.write("..done\n")
self.svm.set_kernel(self.kernel)
lab_out = self.svm.apply()
# work around problem with get_labels()
tmp_out = [lab_out.get_label(idx) for idx in range(0, lab_out.get_num_labels())]
assert(len(tmp_out) > 0)
out.extend(tmp_out)
print "len out", len(out)
# increment chunk
start = stop
stop = min(stop+chunk_size, seq_len)
l = (-self.window[0]) * [-42]
r = self.window[1] * [-42]
# concatenate
ret = l + out + r
assert(len(ret) == len(seq))
return ret
示例6: training_run
def training_run(options):
"""Conduct a training run and return a trained SVM kernel"""
settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace)
positives = MotifFinder(finder_settings=settings)
positives.setFastaFile(options.positives)
positives.setMotifs(options.pgff)
pmotifs, ppositions = positives.getResults()
negatives = MotifFinder(finder_settings=settings)
negatives.setFastaFile(options.negatives)
negatives.setMotifs(options.ngff)
nmotifs, npositions = negatives.getResults()
wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
wds_svm = EasySVM.EasySVM(wds_kparams)
num_positives = len(pmotifs.values()[0])
num_negatives = len(nmotifs.values()[0])
# Creating Kernel Objects
kernel = CombinedKernel()
features = CombinedFeatures()
kernel_array = []
motifs = pmotifs.keys()
motifs.sort()
# Adding Kmer Kernels
for motif in motifs:
all_examples = pmotifs[motif] + nmotifs[motif]
motif_features = wds_svm.createFeatures(all_examples)
wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"])
wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32))
features.append_feature_obj(motif_features)
kernel_array.append(wds_kernel)
kernel.append_kernel(wds_kernel)
rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
positions = array(ppositions + npositions, dtype=float64).T
position_features = rbf_svm.createFeatures(positions)
features.append_feature_obj(position_features)
motif_labels = append(ones(num_positives), -ones(num_negatives))
complete_labels = Labels(motif_labels)
rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"])
kernel_array.append(rbf_kernel)
kernel.append_kernel(rbf_kernel)
# Kernel init
kernel.init(features, features)
kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
# Training
svm.train()
if not os.path.exists(options.output_path):
os.mkdir(options.output_path)
html = {}
if options.contrib:
html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs)
if options.logos:
html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path)
if options.query:
html["query"] = evaluate(options, svm, kernel, features, motifs)
htmlize(html, options.output_html)
示例7: evaluation_cross_validation_mkl_weight_storage
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat):
from shogun.Evaluation import CrossValidation, CrossValidationResult
from shogun.Evaluation import CrossValidationPrintOutput
from shogun.Evaluation import CrossValidationMKLStorage
from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
from shogun.Evaluation import StratifiedCrossValidationSplitting
from shogun.Features import BinaryLabels
from shogun.Features import RealFeatures, CombinedFeatures
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Classifier import LibSVM, MKLClassification
from shogun.Mathematics import Statistics
# training data, combined features all on same data
features=RealFeatures(traindat)
comb_features=CombinedFeatures()
comb_features.append_feature_obj(features)
comb_features.append_feature_obj(features)
comb_features.append_feature_obj(features)
labels=BinaryLabels(label_traindat)
# kernel, different Gaussians combined
kernel=CombinedKernel()
kernel.append_kernel(GaussianKernel(10, 0.1))
kernel.append_kernel(GaussianKernel(10, 1))
kernel.append_kernel(GaussianKernel(10, 2))
# create mkl using libsvm, due to a mem-bug, interleaved is not possible
svm=MKLClassification(LibSVM());
svm.set_interleaved_optimization_enabled(False);
svm.set_kernel(kernel);
# splitting strategy for 5 fold cross-validation (for classification its better
# to use "StratifiedCrossValidation", but the standard
# "StratifiedCrossValidationSplitting" is also available
splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)
# evaluation method
evaluation_criterium=ContingencyTableEvaluation(ACCURACY)
# cross-validation instance
cross_validation=CrossValidation(svm, comb_features, labels,
splitting_strategy, evaluation_criterium)
cross_validation.set_autolock(False)
# append cross vlaidation output classes
#cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
mkl_storage=CrossValidationMKLStorage()
cross_validation.add_cross_validation_output(mkl_storage)
cross_validation.set_num_runs(3)
# perform cross-validation
result=cross_validation.evaluate()
# print mkl weights
weights=mkl_storage.get_mkl_weights()
示例8: statistics_linear_time_mmd_kernel_choice
def statistics_linear_time_mmd_kernel_choice():
from shogun.Features import RealFeatures, CombinedFeatures
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
# note that the linear time statistic is designed for much larger datasets
n=50000
dim=5
difference=2
# data is standard normal distributed. only one dimension of Y has a mean
# shift of difference
(X,Y)=gen_data.create_mean_data(n,dim,difference)
# concatenate since MMD class takes data as one feature object
# (it is possible to give two, but then data is copied)
Z=concatenate((X,Y), axis=1)
print "dimension means of X", [mean(x) for x in X]
print "dimension means of Y", [mean(x) for x in Y]
# create kernels/features to choose from
# here: just a bunch of Gaussian Kernels with different widths
# real sigmas are 2^-5, ..., 2^10
sigmas=array([pow(2,x) for x in range(-5,10)])
# shogun has a different parametrization of the Gaussian kernel
shogun_sigmas=array([x*x*2 for x in sigmas])
# We will use multiple kernels
kernel=CombinedKernel()
# two separate feature objects here, could also be one with appended data
features=CombinedFeatures()
# all kernels work on same features
for i in range(len(sigmas)):
kernel.append_kernel(GaussianKernel(10, shogun_sigmas[i]))
features.append_feature_obj(RealFeatures(Z))
mmd=LinearTimeMMD(kernel,features, n)
print "start learning kernel weights"
mmd.set_opt_regularization_eps(10E-5)
mmd.set_opt_low_cut(10E-5)
mmd.set_opt_max_iterations(1000)
mmd.set_opt_epsilon(10E-7)
mmd.optimize_kernel_weights()
weights=kernel.get_subkernel_weights()
print "learned weights:", weights
#pyplot.plot(array(range(len(sigmas))), weights)
#pyplot.show()
print "index of max weight", weights.argmax()
示例9: statistics_linear_time_mmd_kernel_choice
def statistics_linear_time_mmd_kernel_choice():
from shogun.Features import RealFeatures, CombinedFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
# note that the linear time statistic is designed for much larger datasets
n=50000
dim=5
difference=2
# use data generator class to produce example data
# in pratice, this generate data function could be replaced by a method
# that obtains data from a stream
data=DataGenerator.generate_mean_data(n,dim,difference)
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)
# create kernels/features to choose from
# here: just a bunch of Gaussian Kernels with different widths
# real sigmas are 2^-5, ..., 2^10
sigmas=array([pow(2,x) for x in range(-5,10)])
# shogun has a different parametrization of the Gaussian kernel
shogun_sigmas=array([x*x*2 for x in sigmas])
# We will use multiple kernels
kernel=CombinedKernel()
# two separate feature objects here, could also be one with appended data
features=CombinedFeatures()
# all kernels work on same features
for i in range(len(sigmas)):
kernel.append_kernel(GaussianKernel(10, shogun_sigmas[i]))
features.append_feature_obj(RealFeatures(data))
mmd=LinearTimeMMD(kernel,features, n)
print "start learning kernel weights"
mmd.set_opt_regularization_eps(10E-5)
mmd.set_opt_low_cut(10E-5)
mmd.set_opt_max_iterations(1000)
mmd.set_opt_epsilon(10E-7)
mmd.optimize_kernel_weights()
weights=kernel.get_subkernel_weights()
print "learned weights:", weights
#pyplot.plot(array(range(len(sigmas))), weights)
#pyplot.show()
print "index of max weight", weights.argmax()
示例10: create_promoter_features
def create_promoter_features(data, param):
"""
creates promoter combined features
@param examples:
@param param:
"""
print "creating promoter features"
(center, left, right) = split_data_promoter(data, param["center_offset"], param["center_pos"])
# set up base features
feat_center = StringCharFeatures(DNA)
feat_center.set_features(center)
feat_left = get_spectrum_features(left)
feat_right = get_spectrum_features(right)
# construct combined features
feat = CombinedFeatures()
feat.append_feature_obj(feat_center)
feat.append_feature_obj(feat_left)
feat.append_feature_obj(feat_right)
return feat
示例11: _predict
def _predict(self, predictor, examples, task_name):
"""
make prediction on examples using trained predictor
@param predictor: trained predictor (task_id, num_nodes, combined_kernel, predictor)
@type predictor: tuple<int, int, CombinedKernel, SVM>
@param examples: list of examples
@type examples: list<object>
@param task_name: task name
@type task_name: str
"""
(task_id, combined_kernel, svm, param) = predictor
# shogun data
base_feat = shogun_factory.create_features(examples, param)
# construct combined kernel
feat = CombinedFeatures()
for i in xrange(combined_kernel.get_num_subkernels()):
feat.append_feature_obj(base_feat)
# fetch kernel normalizer
normalizer = combined_kernel.get_kernel(i).get_normalizer()
# cast using dedicated SWIG-helper function
normalizer = KernelNormalizerToMultitaskKernelMaskPairNormalizer(normalizer)
# set task vector
normalizer.set_task_vector_rhs([task_id]*len(examples))
combined_kernel = svm.get_kernel()
combined_kernel.init(combined_kernel.get_lhs(), feat)
# predict
out = svm.classify().get_labels()
# predict
#out = svm.classify(feat).get_labels()
return out
示例12: construct_features
def construct_features(features):
"""
makes a list
"""
feat_all = [inst for inst in features]
feat_lhs = [inst[0:15] for inst in features]
feat_rhs = [inst[15:] for inst in features]
feat_wd = get_wd_features(feat_all)
feat_spec_1 = get_spectrum_features(feat_lhs, order=3)
feat_spec_2 = get_spectrum_features(feat_rhs, order=3)
feat_comb = CombinedFeatures()
feat_comb.append_feature_obj(feat_wd)
feat_comb.append_feature_obj(feat_spec_1)
feat_comb.append_feature_obj(feat_spec_2)
return feat_comb
示例13: create_combined_spectrum_features
def create_combined_spectrum_features(instances, feat_type):
"""
creates a combined spectrum feature object
"""
num_features = len(instances[0])
# contruct combined features
feat = CombinedFeatures()
for idx in range(num_features):
# cut column idx
data = [instance[idx] for instance in instances]
tmp_feat = get_spectrum_features(data, feat_type)
feat.append_features(tmp_feat)
return feat
示例14: mkl_binclass_modular
def mkl_binclass_modular (train_data, testdata, train_labels, test_labels, d1, d2):
# create some Gaussian train/test matrix
tfeats = RealFeatures(train_data)
tkernel = GaussianKernel(128, d1)
tkernel.init(tfeats, tfeats)
K_train = tkernel.get_kernel_matrix()
pfeats = RealFeatures(test_data)
tkernel.init(tfeats, pfeats)
K_test = tkernel.get_kernel_matrix()
# create combined train features
feats_train = CombinedFeatures()
feats_train.append_feature_obj(RealFeatures(train_data))
# and corresponding combined kernel
kernel = CombinedKernel()
kernel.append_kernel(CustomKernel(K_train))
kernel.append_kernel(GaussianKernel(128, d2))
kernel.init(feats_train, feats_train)
# train mkl
labels = Labels(train_labels)
mkl = MKLClassification()
# not to use svmlight
mkl.set_interleaved_optimization_enabled(0)
# which norm to use for MKL
mkl.set_mkl_norm(2)
# set cost (neg, pos)
mkl.set_C(1, 1)
# set kernel and labels
mkl.set_kernel(kernel)
mkl.set_labels(labels)
# train
mkl.train()
# test
# create combined test features
feats_pred = CombinedFeatures()
feats_pred.append_feature_obj(RealFeatures(test_data))
# and corresponding combined kernel
kernel = CombinedKernel()
kernel.append_kernel(CustomKernel(K_test))
kernel.append_kernel(GaussianKernel(128, d2))
kernel.init(feats_train, feats_pred)
# and classify
mkl.set_kernel(kernel)
output = mkl.apply().get_labels()
output = [1.0 if i>0 else -1.0 for i in output]
accu = len(where(output == test_labels)[0]) / float(len(output))
return accu
示例15: init_weighted_spectrum_kernel
def init_weighted_spectrum_kernel(kern, subfeats_list_lhs, subfeats_list_rhs):
"""initialize weighted spectrum kernel (wrapper function)
"""
feats_lhs = CombinedFeatures()
feats_rhs = CombinedFeatures()
for subfeats in subfeats_list_lhs:
feats_lhs.append_feature_obj(subfeats)
for subfeats in subfeats_list_rhs:
feats_rhs.append_feature_obj(subfeats)
kern.init(feats_lhs, feats_rhs)