本文整理汇总了Python中sklearn.neighbors.KernelDensity.sample方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.sample方法的具体用法?Python KernelDensity.sample怎么用?Python KernelDensity.sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.KernelDensity
的用法示例。
在下文中一共展示了KernelDensity.sample方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_kernel_density_sampling
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_kernel_density_sampling(n_samples=100, n_features=3):
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
bandwidth = 0.2
for kernel in ['gaussian', 'tophat']:
# draw a tophat sample
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
samp = kde.sample(100)
assert_equal(X.shape, samp.shape)
# check that samples are in the right range
nbrs = NearestNeighbors(n_neighbors=1).fit(X)
dist, ind = nbrs.kneighbors(X, return_distance=True)
if kernel == 'tophat':
assert np.all(dist < bandwidth)
elif kernel == 'gaussian':
# 5 standard deviations is safe for 100 samples, but there's a
# very small chance this test could fail.
assert np.all(dist < 5 * bandwidth)
# check unsupported kernels
for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
assert_raises(NotImplementedError, kde.sample, 100)
# non-regression test: used to return a scalar
X = rng.randn(4, 1)
kde = KernelDensity(kernel="gaussian").fit(X)
assert_equal(kde.sample().shape, (1, 1))
示例2: XSampleBW
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
class XSampleBW(XSample):
def __init__(self, N, sampfun, comm=MPI.COMM_WORLD):
super(XSampleBW, self).__init__(N, sampfun, comm)
self.I = (-1.5, 1.5) # avoiding spurious bumps in the tails
self.h_crit = critical_bandwidth(self.data, self.I)
#print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit))
self.var = np.var(self.data)
self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(self.data.reshape(-1, 1))
@property
def statistic(self):
return self.h_crit
def resampled_statistic_below_scaled_statistic(self, lambda_scale):
'''
P( h_{crit}^* <= \lambda*h_{crit})
= P(KDE(X^*, \lambda* h_{crit}) is unimodal)
'''
return self.is_unimodal_resample(lambda_scale)
def is_unimodal_resample(self, lambda_val):
data = self.kde_h_crit.sample(self.N).reshape(-1)/np.sqrt(1+self.h_crit**2/self.var)
#print "np.var(data)/self.var = {}".format(np.var(data)/self.var)
return is_unimodal_kde(self.h_crit*lambda_val, data, self.I)
def probability_of_unimodal_above(self, lambda_val, gamma):
return self.prob_resampled_statistic_below_bound_above_gamma(lambda_val, gamma)
示例3: kde_fit_quantiles
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def kde_fit_quantiles(rtquants, nsamples=1000, bw=.1):
""" takes quantile estimates and fits cumulative density function
returns samples to pass to sns.kdeplot()
"""
kdefit = KernelDensity(kernel='gaussian', bandwidth=bw).fit(rtquants)
samples = kdefit.sample(n_samples=nsamples).flatten()
return samples
示例4: test_kde_sample_weights
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_kde_sample_weights():
n_samples = 400
size_test = 20
weights_neutral = np.full(n_samples, 3.)
for d in [1, 2, 10]:
rng = np.random.RandomState(0)
X = rng.rand(n_samples, d)
weights = 1 + (10 * X.sum(axis=1)).astype(np.int8)
X_repetitions = np.repeat(X, weights, axis=0)
n_samples_test = size_test // d
test_points = rng.rand(n_samples_test, d)
for algorithm in ['auto', 'ball_tree', 'kd_tree']:
for metric in ['euclidean', 'minkowski', 'manhattan',
'chebyshev']:
if algorithm != 'kd_tree' or metric in KDTree.valid_metrics:
kde = KernelDensity(algorithm=algorithm, metric=metric)
# Test that adding a constant sample weight has no effect
kde.fit(X, sample_weight=weights_neutral)
scores_const_weight = kde.score_samples(test_points)
sample_const_weight = kde.sample(random_state=1234)
kde.fit(X)
scores_no_weight = kde.score_samples(test_points)
sample_no_weight = kde.sample(random_state=1234)
assert_allclose(scores_const_weight, scores_no_weight)
assert_allclose(sample_const_weight, sample_no_weight)
# Test equivalence between sampling and (integer) weights
kde.fit(X, sample_weight=weights)
scores_weight = kde.score_samples(test_points)
sample_weight = kde.sample(random_state=1234)
kde.fit(X_repetitions)
scores_ref_sampling = kde.score_samples(test_points)
sample_ref_sampling = kde.sample(random_state=1234)
assert_allclose(scores_weight, scores_ref_sampling)
assert_allclose(sample_weight, sample_ref_sampling)
# Test that sample weights has a non-trivial effect
diff = np.max(np.abs(scores_no_weight - scores_weight))
assert diff > 0.001
# Test invariance with respect to arbitrary scaling
scale_factor = rng.rand()
kde.fit(X, sample_weight=(scale_factor * weights))
scores_scaled_weight = kde.score_samples(test_points)
assert_allclose(scores_scaled_weight, scores_weight)
示例5: kde3d
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def kde3d(x, y, z, data_point):
values = np.vstack([x, y, z]).T
# Use grid search cross-validation to optimize the bandwidth
# params = {'bandwidth': np.logspace(-1, 1, 20)}
kde = KernelDensity(bandwidth=0.3)
kde.fit(values)
kde_coords = kde.sample(10000)
log_pdf = kde.score_samples(kde_coords)
percentile = np.sum(log_pdf < kde.score(data_point))/10000.
return (percentile)
示例6: pval_silverman
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def pval_silverman(data, I='auto', N_bootstrap=1000, comm=MPI.COMM_WORLD):
I = get_I(data, I)
data = comm.bcast(data)
h_crit = critical_bandwidth(data, I)
var_data = np.var(data)
KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
resamp_fun = lambda: is_unimodal_kde(
h_crit, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
smaller_equal_crit_bandwidth = bootstrap(resamp_fun, N_bootstrap, dtype=np.bool_,
comm=comm)
return np.mean(~smaller_equal_crit_bandwidth)
示例7: downsample
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def downsample(self, X, n):
# we've already fit()ted, but we're worried that our X is so
# large our classifier will be too slow in practice. we can
# downsample by running a kde on X and sampling from it (this
# will be slow, but happens only once), and then using those
# points as the new X.
if len(X) < n:
return X
kde = KernelDensity()
kde.fit(X)
return kde.sample(n)
示例8: colorKDE
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
class colorKDE(object):
def __init__(self,data=np.array([])):
self.data = data
def runKDE(self,bandwidth=0.2,use_opt=False):
'''
Generate the KDE and run with given bandwith
If use_opt is specified, ruCVSearch must have been run already
'''
if use_opt:
self.kde = KernelDensity(bandwidth=self.optimal_bandwidth)
else:
self.kde = KernelDensity(bandwidth=bandwidth)
self.kde.fit(self.data)
def runCVSearch(self,search_range=np.linspace(0.01,1.0,50),folds=20):
self.grid = GridSearchCV(KernelDensity(),{'bandwidth':search_range},\
cv=folds)
self.grid.fit(self.data)
self.optimal_bandwidth=self.grid.best_params_['bandwidth']
print 'Optimal bandwidth: ' + str(self.optimal_bandwidth)
def score_samples(self,x):
'''
Replicate score_samples functionality so both saves
can be treated the same
'''
return self.kde.score_samples(x)
def sample(self,n_samples):
'''
Replicate samples functionality so both saves
can be treated the same
'''
return self.kde.sample(n_samples=n_samples)
def save(self,filename,full=True):
'''
Save current state of the object
If full is false, only save self.kde
'''
if full:
#save the entire object, including data
pickle.dump(self,open(filename,'wb'),protocol=-1)
else:
#only save the .kde object
pickle.dump(self.kde,open(filename,'wb'),protocol=-1)
示例9: test_silverman_adaptive_resampling
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_silverman_adaptive_resampling(data, alpha, I='auto',
N_bootstrap_max=10000, comm=MPI.COMM_WORLD):
data = comm.bcast(data)
I = get_I(data, I)
h_crit = critical_bandwidth(data, I)
var_data = np.var(data)
KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
resamp_fun = lambda: not is_unimodal_kde(
h_crit, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
try:
return float(probability_above(resamp_fun, alpha, max_samp=N_bootstrap_max, comm=comm,
batch=100, bound_significance=0.05, exception_at_max_samp=True,
printing=False))
except MaxSampExceededException:
return alpha
示例10: pval_calibrated_bandwidth
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def pval_calibrated_bandwidth(data, alpha_cal, null, I='auto',
N_bootstrap=1000, comm=MPI.COMM_WORLD,
calibration_file=None):
'''
NB!: Test is only calibrated to correct level for alpha_cal.
'''
data = comm.bcast(data)
I = get_I(data, I)
try:
lambda_alpha = load_lambda('bw_ad', null, alpha_cal, calibration_file)
except KeyError:
lambda_alpha = load_lambda('bw', null, alpha_cal, calibration_file)
h_crit = critical_bandwidth(data, I)
var_data = np.var(data)
KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
resamp_fun = lambda: is_unimodal_kde(
h_crit*lambda_alpha, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
smaller_equal_crit_bandwidth = bootstrap(resamp_fun, N_bootstrap, dtype=np.bool_, comm=comm)
return np.mean(~smaller_equal_crit_bandwidth)
示例11: XSampleFMBW
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
class XSampleFMBW(XSampleBW):
def __init__(self, N, comm=MPI.COMM_SELF):
self.comm = comm
self.rank = self.comm.Get_rank()
self.I = (-1.5, a+1) # CHECK: Is appropriate bound? OK.
self.lamtol = 0
self.mtol = mtol
self.N = N
if self.rank == 0:
N1 = binom.rvs(N, 2.0/3)
#print "N1 = {}".format(N1)
N2 = N - N1
data = np.hstack([np.random.randn(N1), np.random.randn(N2)+a])
else:
data = None
data = self.comm.bcast(data)
self.data = data
self.var = np.var(data)
self.h_crit = fisher_marron_critical_bandwidth(data, self.lamtol, self.mtol, self.I)
#print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit))
self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(data.reshape(-1, 1))
def is_unimodal_resample(self, lambda_val):
data = self.kde_h_crit.sample(self.N).reshape(-1)/np.sqrt(1+self.h_crit**2/self.var)
#print "np.var(data)/self.var = {}".format(np.var(data)/self.var)
return is_unimodal_kde_fm(self.h_crit*lambda_val, data, self.lamtol, self.mtol, self.I)
def probability_of_unimodal_above(self, lambda_val, gamma):
'''
G_n(\lambda) = P(\hat h_{crit}^*/\hat h_{crit} <= \lambda)
= P(\hat h_{crit}^* <= \lambda*\hat h_{crit})
= P(KDE(X^*, \lambda*\hat h_{crit}) is unimodal)
'''
# print "bootstrapping 1000 samples at rank {}:".format(self.rank)
# smaller_equal_crit_bandwidth = bootstrap(lambda: self.is_unimodal_resample(lambda_val), 1000, dtype=np.bool_)
# pval = np.mean(~smaller_equal_crit_bandwidth)
# print "result at rank {}: pval = {}".format(self.rank, pval)+"\n"+"-"*20
return probability_above(lambda: self.is_unimodal_resample(lambda_val),
gamma, max_samp=20000, comm=self.comm, batch=20)
示例12: test_calibrated_bandwidth_adaptive_resampling
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_calibrated_bandwidth_adaptive_resampling(data, alpha, null, I='auto',
N_bootstrap_max=10000, comm=MPI.COMM_WORLD,
calibration_file=None):
data = comm.bcast(data)
I = get_I(data, I)
try:
lambda_alpha = load_lambda('bw_ad', null, alpha, calibration_file)
# loading lambda computed with adaptive probablistic bisection search
except KeyError:
lambda_alpha = load_lambda('bw', null, alpha, calibration_file)
# loading lambda computed with probabilistic bisection search
h_crit = critical_bandwidth(data, I)
var_data = np.var(data)
KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
resamp_fun = lambda: not is_unimodal_kde(
h_crit*lambda_alpha, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
try:
return float(probability_above(resamp_fun, alpha, max_samp=N_bootstrap_max, comm=comm,
batch=100, bound_significance=0.05, exception_at_max_samp=True,
printing=False))
except MaxSampExceededException:
return alpha
示例13: sklearn_log_density
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def sklearn_log_density(sample_points, evaluation_points):
"""
Estimate the log probability density function from which a set of sample
points was drawn and return the estimated density at the evaluation points.
*sample_points* is an [n x m] matrix.
*evaluation_points* is the set of points at which to evaluate the kde.
Note: if any dimension has all points equal then the entire distribution
is treated as a dirac distribution with infinite density at each point.
This makes the entropy calculation better behaved (narrowing the
distribution increases the entropy) but is not so useful in other contexts.
Other packages will (correctly) ignore dimensions of width zero.
"""
# Ugly hack warning: if *evaluation_points* is an integer, then sample
# that many points from the kde and return the log density at each
# sampled point. Since the code that uses this is looking only at
# the mean log density, it doesn't need the sample points themselves.
# This interface should be considered internal to the entropy module
# and not used by outside functions. If you need it externally, then
# restructure the api so that the function always returns both the
# points and the density, as well as any other function (such as the
# denisty function and the sister function scipy_stats_density) so
# that all share the new interface.
from sklearn.neighbors import KernelDensity
# Standardize data so we can use spherical kernels and uniform bandwidth
data, mu, sigma = standardize(sample_points)
# Note that sigma will be zero for dimensions w_o where all points are equal.
# With P(w) = P(w, w_o) / P(w_o | w) and P(w_o) = 1 for all points in
# the set, then P(w) = P(w, w_o) and we can ignore the zero dimensions.
# However, as another ugly hack, we want the differential entropy to go
# to -inf as the distribution narrows, so pretend that P = 0 everywhere.
# Uncomment the following line to return the sample probability instead.
## sigma[sigma == 0.] = 1.
# Silverman bandwidth estimator
n, d = sample_points.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
kde.fit(data)
if isinstance(evaluation_points, int):
# For generated points, they already follow the distribution
points = kde.sample(n)
elif evaluation_points is not None:
# Standardized evaluation points to match sample distribution
# Note: for dimensions where all sample points are equal, sigma
# has been artificially set equal to one. This means that the
# evaluation points which do not match the sample value will
# use the simple differences for the z-score rather than
# pushing them out to plus/minus infinity.
points = (evaluation_points - mu)/(sigma + (sigma == 0.))
else:
points = sample_points
# Evaluate pdf, scaling the resulting density by sigma to correct the area.
# If sigma is zero, return entropy as -inf; this seems to not be the
# case for discrete distributions (consider Bernoulli with p=1, q=0,
# => H = -p log p - q log q = 0), so need to do something else, both
# for the kde and for the entropy calculation.
with np.errstate(divide='ignore'):
log_pdf = kde.score_samples(points) - np.sum(np.log(sigma))
return log_pdf
示例14: get_standart_deviation
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
result, stats = get_standart_deviation(delta, PHAT_targets_valid[:, 0], method="full")
print(result)
full_set = np.hstack((PHAT_features_train, PHAT_targets_train))
# bring all magnitudes to redshift range
rescaled_set = np.copy(full_set)
rescaled_set[:, 0:-1] = rescaled_set[:, 0:-1] # *feature_av
rescaled_set[:, -1] = rescaled_set[:, -1]
# Draw a sample set every time
kde = KernelDensity(bandwidth=0.001)
kde.fit(rescaled_set)
for i in range(500, 9000, 2000):
aug_data = kde.sample(i)
# aug_data = np.vstack((aug_data, full_set))
# initalize predictor
tree_para = {"min_samples_leaf": 5}
clf = AdaBoostRegressor(DecisionTreeRegressor(**tree_para), loss="exponential", n_estimators=20)
# fit predictor
clf.fit(aug_data[:, 0:-1], aug_data[:, -1])
predicted_aug = clf.predict(PHAT_features_valid)
# collect stats
delta_aug = predicted_aug - PHAT_targets_valid[:, 0]
feature_imp_aug = clf.feature_importances_
result_aug, stats_aug = get_standart_deviation(delta_aug, PHAT_targets_valid[:, 0], method="full")