本文整理汇总了Python中shogun.Features.RealFeatures.add_subset方法的典型用法代码示例。如果您正苦于以下问题:Python RealFeatures.add_subset方法的具体用法?Python RealFeatures.add_subset怎么用?Python RealFeatures.add_subset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类shogun.Features.RealFeatures
的用法示例。
在下文中一共展示了RealFeatures.add_subset方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: normally
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
# use data generator class to produce example data
data=DataGenerator.generate_mean_data(m,dim,difference)
# create shogun feature representation
features=RealFeatures(data)
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
# Using all distances here would blow up memory
subset=Math.randperm_vec(features.get_num_vectors())
subset=subset[0:200]
features.add_subset(subset)
dist=EuclideanDistance(features, features)
distances=dist.get_distance_matrix()
features.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma=median_distance**2
print "median distance for Gaussian kernel:", sigma
kernel=GaussianKernel(10,sigma)
# use biased statistic
mmd=LinearTimeMMD(kernel,features, m)
# sample alternative distribution
alt_samples=zeros(num_null_samples)
for i in range(len(alt_samples)):
data=DataGenerator.generate_mean_data(m,dim,difference)
示例2: statistics_linear_time_mmd
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
def statistics_linear_time_mmd ():
from shogun.Features import RealFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
from shogun.Distance import EuclideanDistance
from shogun.Mathematics import Statistics, Math
# note that the linear time statistic is designed for much larger datasets
n=10000
dim=2
difference=0.5
# use data generator class to produce example data
# in pratice, this generate data function could be replaced by a method
# that obtains data from a stream
data=DataGenerator.generate_mean_data(n,dim,difference)
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)
# create shogun feature representation
features=RealFeatures(data)
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
# Using all distances here would blow up memory
subset=Math.randperm_vec(features.get_num_vectors())
subset=subset[0:200]
features.add_subset(subset)
dist=EuclideanDistance(features, features)
distances=dist.get_distance_matrix()
features.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma=median_distance**2
print "median distance for Gaussian kernel:", sigma
kernel=GaussianKernel(10,sigma)
mmd=LinearTimeMMD(kernel,features, n)
# perform test: compute p-value and test if null-hypothesis is rejected for
# a test level of 0.05
statistic=mmd.compute_statistic()
print "test statistic:", statistic
# do the same thing using two different way to approximate null-dstribution
# bootstrapping and gaussian approximation (ony for really large samples)
alpha=0.05
print "computing p-value using bootstrapping"
mmd.set_null_approximation_method(BOOTSTRAP)
mmd.set_bootstrap_iterations(50) # normally, far more iterations are needed
p_value=mmd.compute_p_value(statistic)
print "p_value:", p_value
print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
print "computing p-value using gaussian approximation"
mmd.set_null_approximation_method(MMD1_GAUSSIAN)
p_value=mmd.compute_p_value(statistic)
print "p_value:", p_value
print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
# sample from null distribution (these may be plotted or whatsoever)
# mean should be close to zero, variance stronly depends on data/kernel
mmd.set_null_approximation_method(BOOTSTRAP)
mmd.set_bootstrap_iterations(10) # normally, far more iterations are needed
null_samples=mmd.bootstrap_null()
print "null mean:", mean(null_samples)
print "null variance:", var(null_samples)
示例3: hsic_graphical
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
def hsic_graphical():
# parameters, change to get different results
m=250
difference=3
# setting the angle lower makes a harder test
angle=pi/30
# number of samples taken from null and alternative distribution
num_null_samples=500
# use data generator class to produce example data
data=DataGenerator.generate_sym_mix_gauss(m,difference,angle)
# create shogun feature representation
features_x=RealFeatures(array([data[0]]))
features_y=RealFeatures(array([data[1]]))
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
subset=int32(array([x for x in range(features_x.get_num_vectors())])) # numpy
subset=random.permutation(subset) # numpy permutation
subset=subset[0:200]
features_x.add_subset(subset)
dist=EuclideanDistance(features_x, features_x)
distances=dist.get_distance_matrix()
features_x.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_x=median_distance**2
features_y.add_subset(subset)
dist=EuclideanDistance(features_y, features_y)
distances=dist.get_distance_matrix()
features_y.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_y=median_distance**2
print "median distance for Gaussian kernel on x:", sigma_x
print "median distance for Gaussian kernel on y:", sigma_y
kernel_x=GaussianKernel(10,sigma_x)
kernel_y=GaussianKernel(10,sigma_y)
# create hsic instance. Note that this is a convienience constructor which copies
# feature data. features_x and features_y are not these used in hsic.
# This is only for user-friendlyness. Usually, its ok to do this.
# Below, the alternative distribution is sampled, which means
# that new feature objects have to be created in each iteration (slow)
# However, normally, the alternative distribution is not sampled
hsic=HSIC(kernel_x,kernel_y,features_x,features_y)
# sample alternative distribution
alt_samples=zeros(num_null_samples)
for i in range(len(alt_samples)):
data=DataGenerator.generate_sym_mix_gauss(m,difference,angle)
features_x.set_feature_matrix(array([data[0]]))
features_y.set_feature_matrix(array([data[1]]))
# re-create hsic instance everytime since feature objects are copied due to
# useage of convienience constructor
hsic=HSIC(kernel_x,kernel_y,features_x,features_y)
alt_samples[i]=hsic.compute_statistic()
# sample from null distribution
# bootstrapping, biased statistic
hsic.set_null_approximation_method(BOOTSTRAP)
hsic.set_bootstrap_iterations(num_null_samples)
null_samples_boot=hsic.bootstrap_null()
# fit gamma distribution, biased statistic
hsic.set_null_approximation_method(HSIC_GAMMA)
gamma_params=hsic.fit_null_gamma()
# sample gamma with parameters
null_samples_gamma=array([gamma(gamma_params[0], gamma_params[1]) for _ in range(num_null_samples)])
# plot
figure()
# plot data x and y
subplot(2,2,1)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 4) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 4) ) # reduce number of x-ticks
grid(True)
plot(data[0], data[1], 'o')
title('Data, rotation=$\pi$/'+str(1/angle*pi)+'\nm='+str(m))
xlabel('$x$')
ylabel('$y$')
# compute threshold for test level
alpha=0.05
null_samples_boot.sort()
null_samples_gamma.sort()
thresh_boot=null_samples_boot[floor(len(null_samples_boot)*(1-alpha))];
thresh_gamma=null_samples_gamma[floor(len(null_samples_gamma)*(1-alpha))];
type_one_error_boot=sum(null_samples_boot<thresh_boot)/float(num_null_samples)
type_one_error_gamma=sum(null_samples_gamma<thresh_boot)/float(num_null_samples)
# plot alternative distribution with threshold
subplot(2,2,2)
#.........这里部分代码省略.........
示例4: normally
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
# use data generator class to produce example data
data=DataGenerator.generate_sym_mix_gauss(m,difference,angle)
# create shogun feature representation
features_x=RealFeatures(array([data[0]]))
features_y=RealFeatures(array([data[1]]))
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
subset=Math.randperm_vec(features_x.get_num_vectors())
subset=subset[0:200]
features_x.add_subset(subset)
dist=EuclideanDistance(features_x, features_x)
distances=dist.get_distance_matrix()
features_x.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_x=median_distance**2
features_y.add_subset(subset)
dist=EuclideanDistance(features_y, features_y)
distances=dist.get_distance_matrix()
features_y.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_y=median_distance**2
print "median distance for Gaussian kernel on x:", sigma_x
print "median distance for Gaussian kernel on y:", sigma_y
kernel_x=GaussianKernel(10,sigma_x)
kernel_y=GaussianKernel(10,sigma_y)
示例5: statistics_hsic
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
def statistics_hsic (n, difference, angle):
from shogun.Features import RealFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel
from shogun.Statistics import HSIC
from shogun.Statistics import BOOTSTRAP, HSIC_GAMMA
from shogun.Distance import EuclideanDistance
from shogun.Mathematics import Math, Statistics, IntVector
# init seed for reproducability
Math.init_random(1)
# note that the HSIC has to store kernel matrices
# which upper bounds the sample size
# use data generator class to produce example data
data=DataGenerator.generate_sym_mix_gauss(n,difference,angle)
#plot(data[0], data[1], 'x');show()
# create shogun feature representation
features_x=RealFeatures(array([data[0]]))
features_y=RealFeatures(array([data[1]]))
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
subset=IntVector.randperm_vec(features_x.get_num_vectors())
subset=subset[0:200]
features_x.add_subset(subset)
dist=EuclideanDistance(features_x, features_x)
distances=dist.get_distance_matrix()
features_x.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_x=median_distance**2
features_y.add_subset(subset)
dist=EuclideanDistance(features_y, features_y)
distances=dist.get_distance_matrix()
features_y.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma_y=median_distance**2
#print "median distance for Gaussian kernel on x:", sigma_x
#print "median distance for Gaussian kernel on y:", sigma_y
kernel_x=GaussianKernel(10,sigma_x)
kernel_y=GaussianKernel(10,sigma_y)
hsic=HSIC(kernel_x,kernel_y,features_x,features_y)
# perform test: compute p-value and test if null-hypothesis is rejected for
# a test level of 0.05 using different methods to approximate
# null-distribution
statistic=hsic.compute_statistic()
#print "HSIC:", statistic
alpha=0.05
#print "computing p-value using bootstrapping"
hsic.set_null_approximation_method(BOOTSTRAP)
# normally, at least 250 iterations should be done, but that takes long
hsic.set_bootstrap_iterations(100)
# bootstrapping allows usage of unbiased or biased statistic
p_value_boot=hsic.compute_p_value(statistic)
thresh_boot=hsic.compute_threshold(alpha)
#print "p_value:", p_value_boot
#print "threshold for 0.05 alpha:", thresh_boot
#print "p_value <", alpha, ", i.e. test sais p and q are dependend:", p_value_boot<alpha
#print "computing p-value using gamma method"
hsic.set_null_approximation_method(HSIC_GAMMA)
p_value_gamma=hsic.compute_p_value(statistic)
thresh_gamma=hsic.compute_threshold(alpha)
#print "p_value:", p_value_gamma
#print "threshold for 0.05 alpha:", thresh_gamma
#print "p_value <", alpha, ", i.e. test sais p and q are dependend::", p_value_gamma<alpha
# sample from null distribution (these may be plotted or whatsoever)
# mean should be close to zero, variance stronly depends on data/kernel
# bootstrapping, biased statistic
#print "sampling null distribution using bootstrapping"
hsic.set_null_approximation_method(BOOTSTRAP)
hsic.set_bootstrap_iterations(100)
null_samples=hsic.bootstrap_null()
#print "null mean:", mean(null_samples)
#print "null variance:", var(null_samples)
#hist(null_samples, 100); show()
return p_value_boot, thresh_boot, p_value_gamma, thresh_gamma, statistic, null_samples
示例6: statistics_quadratic_time_mmd
# 需要导入模块: from shogun.Features import RealFeatures [as 别名]
# 或者: from shogun.Features.RealFeatures import add_subset [as 别名]
def statistics_quadratic_time_mmd ():
from shogun.Features import RealFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel
from shogun.Statistics import QuadraticTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
from shogun.Distance import EuclideanDistance
from shogun.Mathematics import Statistics, Math
# note that the quadratic time mmd has to store kernel matrices
# which upper bounds the sample size
n=500
dim=2
difference=0.5
# use data generator class to produce example data
data=DataGenerator.generate_mean_data(n,dim,difference)
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)
# create shogun feature representation
features=RealFeatures(data)
# compute median data distance in order to use for Gaussian kernel width
# 0.5*median_distance normally (factor two in Gaussian kernel)
# However, shoguns kernel width is different to usual parametrization
# Therefore 0.5*2*median_distance^2
# Use a subset of data for that, only 200 elements. Median is stable
subset=Math.randperm_vec(features.get_num_vectors())
subset=subset[0:200]
features.add_subset(subset)
dist=EuclideanDistance(features, features)
distances=dist.get_distance_matrix()
features.remove_subset()
median_distance=Statistics.matrix_median(distances, True)
sigma=median_distance**2
print "median distance for Gaussian kernel:", sigma
kernel=GaussianKernel(10,sigma)
mmd=QuadraticTimeMMD(kernel,features, n)
# perform test: compute p-value and test if null-hypothesis is rejected for
# a test level of 0.05 using different methods to approximate
# null-distribution
statistic=mmd.compute_statistic()
alpha=0.05
print "computing p-value using bootstrapping"
mmd.set_null_approximation_method(BOOTSTRAP)
# normally, at least 250 iterations should be done, but that takes long
mmd.set_bootstrap_iterations(10)
# bootstrapping allows usage of unbiased or biased statistic
mmd.set_statistic_type(UNBIASED)
p_value=mmd.compute_p_value(statistic)
print "p_value:", p_value
print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
# only can do this if SHOGUN was compiled with LAPACK so check
if "sample_null_spectrum" in dir(QuadraticTimeMMD):
print "computing p-value using spectrum method"
mmd.set_null_approximation_method(MMD2_SPECTRUM)
# normally, at least 250 iterations should be done, but that takes long
mmd.set_num_samples_sepctrum(50)
mmd.set_num_eigenvalues_spectrum(n-10)
# spectrum method computes p-value for biased statistics only
mmd.set_statistic_type(BIASED)
p_value=mmd.compute_p_value(statistic)
print "p_value:", p_value
print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
print "computing p-value using gamma method"
mmd.set_null_approximation_method(MMD2_GAMMA)
# gamma method computes p-value for biased statistics only
mmd.set_statistic_type(BIASED)
p_value=mmd.compute_p_value(statistic)
print "p_value:", p_value
print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
# sample from null distribution (these may be plotted or whatsoever)
# mean should be close to zero, variance stronly depends on data/kernel
# bootstrapping, biased statistic
print "sampling null distribution using bootstrapping"
mmd.set_null_approximation_method(BOOTSTRAP)
mmd.set_statistic_type(BIASED)
mmd.set_bootstrap_iterations(10)
null_samples=mmd.bootstrap_null()
print "null mean:", mean(null_samples)
print "null variance:", var(null_samples)
# sample from null distribution (these may be plotted or whatsoever)
# mean should be close to zero, variance stronly depends on data/kernel
# spectrum, biased statistic
print "sampling null distribution using spectrum method"
mmd.set_null_approximation_method(MMD2_SPECTRUM)
mmd.set_statistic_type(BIASED)
# 200 samples using 100 eigenvalues
null_samples=mmd.sample_null_spectrum(50,10)
print "null mean:", mean(null_samples)
print "null variance:", var(null_samples)