当前位置: 首页>>代码示例>>Python>>正文


Python KernelDensity.sample方法代码示例

本文整理汇总了Python中sklearn.neighbors.KernelDensity.sample方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.sample方法的具体用法?Python KernelDensity.sample怎么用?Python KernelDensity.sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.neighbors.KernelDensity的用法示例。


在下文中一共展示了KernelDensity.sample方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_kernel_density_sampling

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:34,代码来源:test_kde.py

示例2: XSampleBW

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
class XSampleBW(XSample):

    def __init__(self, N, sampfun, comm=MPI.COMM_WORLD):
        super(XSampleBW, self).__init__(N, sampfun, comm)
        self.I = (-1.5, 1.5)  # avoiding spurious bumps in the tails
        self.h_crit = critical_bandwidth(self.data, self.I)
        #print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit))
        self.var = np.var(self.data)
        self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(self.data.reshape(-1, 1))

    @property
    def statistic(self):
        return self.h_crit

    def resampled_statistic_below_scaled_statistic(self, lambda_scale):
        '''
            P( h_{crit}^* <= \lambda*h_{crit})
                = P(KDE(X^*, \lambda* h_{crit}) is unimodal)
        '''
        return self.is_unimodal_resample(lambda_scale)

    def is_unimodal_resample(self, lambda_val):
        data = self.kde_h_crit.sample(self.N).reshape(-1)/np.sqrt(1+self.h_crit**2/self.var)
        #print "np.var(data)/self.var = {}".format(np.var(data)/self.var)
        return is_unimodal_kde(self.h_crit*lambda_val, data, self.I)

    def probability_of_unimodal_above(self, lambda_val, gamma):
        return self.prob_resampled_statistic_below_bound_above_gamma(lambda_val, gamma)
开发者ID:kjohnsson,项目名称:modality,代码行数:30,代码来源:bandwidth.py

示例3: kde_fit_quantiles

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def kde_fit_quantiles(rtquants, nsamples=1000, bw=.1):
    """ takes quantile estimates and fits cumulative density function
    returns samples to pass to sns.kdeplot()
    """
    kdefit = KernelDensity(kernel='gaussian', bandwidth=bw).fit(rtquants)
    samples = kdefit.sample(n_samples=nsamples).flatten()
    return samples
开发者ID:dunovank,项目名称:radd_kd,代码行数:9,代码来源:analyze.py

示例4: test_kde_sample_weights

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_kde_sample_weights():
    n_samples = 400
    size_test = 20
    weights_neutral = np.full(n_samples, 3.)
    for d in [1, 2, 10]:
        rng = np.random.RandomState(0)
        X = rng.rand(n_samples, d)
        weights = 1 + (10 * X.sum(axis=1)).astype(np.int8)
        X_repetitions = np.repeat(X, weights, axis=0)
        n_samples_test = size_test // d
        test_points = rng.rand(n_samples_test, d)
        for algorithm in ['auto', 'ball_tree', 'kd_tree']:
            for metric in ['euclidean', 'minkowski', 'manhattan',
                           'chebyshev']:
                if algorithm != 'kd_tree' or metric in KDTree.valid_metrics:
                    kde = KernelDensity(algorithm=algorithm, metric=metric)

                    # Test that adding a constant sample weight has no effect
                    kde.fit(X, sample_weight=weights_neutral)
                    scores_const_weight = kde.score_samples(test_points)
                    sample_const_weight = kde.sample(random_state=1234)
                    kde.fit(X)
                    scores_no_weight = kde.score_samples(test_points)
                    sample_no_weight = kde.sample(random_state=1234)
                    assert_allclose(scores_const_weight, scores_no_weight)
                    assert_allclose(sample_const_weight, sample_no_weight)

                    # Test equivalence between sampling and (integer) weights
                    kde.fit(X, sample_weight=weights)
                    scores_weight = kde.score_samples(test_points)
                    sample_weight = kde.sample(random_state=1234)
                    kde.fit(X_repetitions)
                    scores_ref_sampling = kde.score_samples(test_points)
                    sample_ref_sampling = kde.sample(random_state=1234)
                    assert_allclose(scores_weight, scores_ref_sampling)
                    assert_allclose(sample_weight, sample_ref_sampling)

                    # Test that sample weights has a non-trivial effect
                    diff = np.max(np.abs(scores_no_weight - scores_weight))
                    assert diff > 0.001

                    # Test invariance with respect to arbitrary scaling
                    scale_factor = rng.rand()
                    kde.fit(X, sample_weight=(scale_factor * weights))
                    scores_scaled_weight = kde.score_samples(test_points)
                    assert_allclose(scores_scaled_weight, scores_weight)
开发者ID:neverlanding,项目名称:scikit-learn,代码行数:48,代码来源:test_kde.py

示例5: kde3d

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def kde3d(x, y, z, data_point):
    values = np.vstack([x, y, z]).T
    # Use grid search cross-validation to optimize the bandwidth
    # params = {'bandwidth': np.logspace(-1, 1, 20)}
    kde = KernelDensity(bandwidth=0.3)
    kde.fit(values)
    kde_coords = kde.sample(10000)
    log_pdf = kde.score_samples(kde_coords)
    percentile = np.sum(log_pdf < kde.score(data_point))/10000.
    return (percentile)
开发者ID:scplbl,项目名称:singlEpoClass,代码行数:12,代码来源:survivalFunc.py

示例6: pval_silverman

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def pval_silverman(data, I='auto', N_bootstrap=1000, comm=MPI.COMM_WORLD):
    I = get_I(data, I)
    data = comm.bcast(data)
    h_crit = critical_bandwidth(data, I)
    var_data = np.var(data)
    KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
    resamp_fun = lambda: is_unimodal_kde(
        h_crit, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
    smaller_equal_crit_bandwidth = bootstrap(resamp_fun, N_bootstrap, dtype=np.bool_,
                                             comm=comm)
    return np.mean(~smaller_equal_crit_bandwidth)
开发者ID:kjohnsson,项目名称:modality,代码行数:13,代码来源:resampling_tests.py

示例7: downsample

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
 def downsample(self, X, n):
     # we've already fit()ted, but we're worried that our X is so
     # large our classifier will be too slow in practice. we can
     # downsample by running a kde on X and sampling from it (this
     # will be slow, but happens only once), and then using those
     # points as the new X.
     if len(X) < n:
         return X
     kde = KernelDensity()
     kde.fit(X)
     return kde.sample(n)
开发者ID:caovanloi,项目名称:AEDensityEstimation,代码行数:13,代码来源:BaseOneClass.py

示例8: colorKDE

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
class colorKDE(object):
	def __init__(self,data=np.array([])):
		self.data = data
	
		
	def runKDE(self,bandwidth=0.2,use_opt=False):
		'''
		Generate the KDE and run with given bandwith
		
		If use_opt is specified, ruCVSearch must have been run already
		'''
		if use_opt:
			self.kde = KernelDensity(bandwidth=self.optimal_bandwidth)
		else:
			self.kde = KernelDensity(bandwidth=bandwidth)
		
		self.kde.fit(self.data)
		
	def runCVSearch(self,search_range=np.linspace(0.01,1.0,50),folds=20):
		self.grid = GridSearchCV(KernelDensity(),{'bandwidth':search_range},\
			cv=folds)
		self.grid.fit(self.data)
		self.optimal_bandwidth=self.grid.best_params_['bandwidth']
		print 'Optimal bandwidth: ' + str(self.optimal_bandwidth)
		
	def score_samples(self,x):
		'''
		Replicate score_samples functionality so both saves
		can be treated the same
		'''
		return self.kde.score_samples(x)
		
	def sample(self,n_samples):
		'''
		Replicate samples functionality so both saves
		can be treated the same
		'''
		return self.kde.sample(n_samples=n_samples)
		
	
	def save(self,filename,full=True):
		'''
		Save current state of the object
		
		If full is false, only save self.kde
		'''
		if full:
			#save the entire object, including data
			pickle.dump(self,open(filename,'wb'),protocol=-1)
			
		else:
			#only save the .kde object
			pickle.dump(self.kde,open(filename,'wb'),protocol=-1)
开发者ID:zachjennings,项目名称:gc_select,代码行数:55,代码来源:colorKDE.py

示例9: test_silverman_adaptive_resampling

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_silverman_adaptive_resampling(data, alpha, I='auto',
                                       N_bootstrap_max=10000, comm=MPI.COMM_WORLD):
    data = comm.bcast(data)
    I = get_I(data, I)
    h_crit = critical_bandwidth(data, I)
    var_data = np.var(data)
    KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
    resamp_fun = lambda: not is_unimodal_kde(
        h_crit, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
    try:
        return float(probability_above(resamp_fun, alpha, max_samp=N_bootstrap_max, comm=comm,
                     batch=100, bound_significance=0.05, exception_at_max_samp=True,
                     printing=False))
    except MaxSampExceededException:
        return alpha
开发者ID:kjohnsson,项目名称:modality,代码行数:17,代码来源:resampling_tests.py

示例10: pval_calibrated_bandwidth

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def pval_calibrated_bandwidth(data, alpha_cal, null, I='auto',
                              N_bootstrap=1000, comm=MPI.COMM_WORLD,
                              calibration_file=None):
    '''
        NB!: Test is only calibrated to correct level for alpha_cal.
    '''
    data = comm.bcast(data)
    I = get_I(data, I)
    try:
        lambda_alpha = load_lambda('bw_ad', null, alpha_cal, calibration_file)
    except KeyError:
        lambda_alpha = load_lambda('bw', null, alpha_cal, calibration_file)
    h_crit = critical_bandwidth(data, I)
    var_data = np.var(data)
    KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
    resamp_fun = lambda: is_unimodal_kde(
        h_crit*lambda_alpha, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
    smaller_equal_crit_bandwidth = bootstrap(resamp_fun, N_bootstrap, dtype=np.bool_, comm=comm)
    return np.mean(~smaller_equal_crit_bandwidth)
开发者ID:kjohnsson,项目名称:modality,代码行数:21,代码来源:resampling_tests.py

示例11: XSampleFMBW

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
    class XSampleFMBW(XSampleBW):

        def __init__(self, N, comm=MPI.COMM_SELF):
            self.comm = comm
            self.rank = self.comm.Get_rank()
            self.I = (-1.5, a+1)  # CHECK: Is appropriate bound? OK.
            self.lamtol = 0
            self.mtol = mtol
            self.N = N
            if self.rank == 0:
                N1 = binom.rvs(N, 2.0/3)
                #print "N1 = {}".format(N1)
                N2 = N - N1
                data = np.hstack([np.random.randn(N1), np.random.randn(N2)+a])
            else:
                data = None
            data = self.comm.bcast(data)
            self.data = data
            self.var = np.var(data)
            self.h_crit = fisher_marron_critical_bandwidth(data, self.lamtol, self.mtol, self.I)
            #print_all_ranks(self.comm, "self.h_crit = {}".format(self.h_crit))
            self.kde_h_crit = KernelDensity(kernel='gaussian', bandwidth=self.h_crit).fit(data.reshape(-1, 1))

        def is_unimodal_resample(self, lambda_val):
            data = self.kde_h_crit.sample(self.N).reshape(-1)/np.sqrt(1+self.h_crit**2/self.var)
            #print "np.var(data)/self.var = {}".format(np.var(data)/self.var)
            return is_unimodal_kde_fm(self.h_crit*lambda_val, data, self.lamtol, self.mtol, self.I)

        def probability_of_unimodal_above(self, lambda_val, gamma):
            '''
                G_n(\lambda) = P(\hat h_{crit}^*/\hat h_{crit} <= \lambda)
                             = P(\hat h_{crit}^* <= \lambda*\hat h_{crit})
                             = P(KDE(X^*, \lambda*\hat h_{crit}) is unimodal)
            '''
            # print "bootstrapping 1000 samples at rank {}:".format(self.rank)
            # smaller_equal_crit_bandwidth = bootstrap(lambda: self.is_unimodal_resample(lambda_val), 1000, dtype=np.bool_)
            # pval = np.mean(~smaller_equal_crit_bandwidth)
            # print "result at rank {}: pval = {}".format(self.rank, pval)+"\n"+"-"*20
            return probability_above(lambda: self.is_unimodal_resample(lambda_val),
                                     gamma, max_samp=20000, comm=self.comm, batch=20)
开发者ID:kjohnsson,项目名称:modality,代码行数:42,代码来源:bandwidth.py

示例12: test_calibrated_bandwidth_adaptive_resampling

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def test_calibrated_bandwidth_adaptive_resampling(data, alpha, null, I='auto',
                                                  N_bootstrap_max=10000, comm=MPI.COMM_WORLD,
                                                  calibration_file=None):
    data = comm.bcast(data)
    I = get_I(data, I)
    try:
        lambda_alpha = load_lambda('bw_ad', null, alpha, calibration_file)
           # loading lambda computed with adaptive probablistic bisection search
    except KeyError:
        lambda_alpha = load_lambda('bw', null, alpha, calibration_file)
           # loading lambda computed with probabilistic bisection search
    h_crit = critical_bandwidth(data, I)
    var_data = np.var(data)
    KDE_h_crit = KernelDensity(kernel='gaussian', bandwidth=h_crit).fit(data.reshape(-1, 1))
    resamp_fun = lambda: not is_unimodal_kde(
        h_crit*lambda_alpha, KDE_h_crit.sample(len(data)).ravel()/np.sqrt(1+h_crit**2/var_data), I)
    try:
        return float(probability_above(resamp_fun, alpha, max_samp=N_bootstrap_max, comm=comm,
                     batch=100, bound_significance=0.05, exception_at_max_samp=True,
                     printing=False))
    except MaxSampExceededException:
        return alpha
开发者ID:kjohnsson,项目名称:modality,代码行数:24,代码来源:resampling_tests.py

示例13: sklearn_log_density

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
def sklearn_log_density(sample_points, evaluation_points):
    """
    Estimate the log probability density function from which a set of sample
    points was drawn and return the estimated density at the evaluation points.

    *sample_points* is an [n x m] matrix.

    *evaluation_points* is the set of points at which to evaluate the kde.

    Note: if any dimension has all points equal then the entire distribution
    is treated as a dirac distribution with infinite density at each point.
    This makes the entropy calculation better behaved (narrowing the
    distribution increases the entropy) but is not so useful in other contexts.
    Other packages will (correctly) ignore dimensions of width zero.
    """
    # Ugly hack warning: if *evaluation_points* is an integer, then sample
    # that many points from the kde and return the log density at each
    # sampled point.  Since the code that uses this is looking only at
    # the mean log density, it doesn't need the sample points themselves.
    # This interface should be considered internal to the entropy module
    # and not used by outside functions.  If you need it externally, then
    # restructure the api so that the function always returns both the
    # points and the density, as well as any other function (such as the
    # denisty function and the sister function scipy_stats_density) so
    # that all share the new interface.

    from sklearn.neighbors import KernelDensity

    # Standardize data so we can use spherical kernels and uniform bandwidth
    data, mu, sigma = standardize(sample_points)

    # Note that sigma will be zero for dimensions w_o where all points are equal.
    # With P(w) = P(w, w_o) / P(w_o | w) and P(w_o) = 1 for all points in
    # the set, then P(w) = P(w, w_o) and we can ignore the zero dimensions.
    # However, as another ugly hack, we want the differential entropy to go
    # to -inf as the distribution narrows, so pretend that P = 0 everywhere.
    # Uncomment the following line to return the sample probability instead.
    ## sigma[sigma == 0.] = 1.

    # Silverman bandwidth estimator
    n, d = sample_points.shape
    bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))

    #print("starting grid search for bandwidth over %d points"%n)
    #from sklearn.grid_search import GridSearchCV
    #from numpy import logspace
    #params = {'bandwidth': logspace(-1, 1, 20)}
    #fitter = GridSearchCV(KernelDensity(), params)
    #fitter.fit(data)
    #kde = fitter.best_estimator_
    #print("best bandwidth: {0}".format(kde.bandwidth))
    #import time; T0 = time.time()
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
                        rtol=1e-6, atol=1e-6)
    kde.fit(data)

    if isinstance(evaluation_points, int):
        # For generated points, they already follow the distribution
        points = kde.sample(n)
    elif evaluation_points is not None:
        # Standardized evaluation points to match sample distribution
        # Note: for dimensions where all sample points are equal, sigma
        # has been artificially set equal to one.  This means that the
        # evaluation points which do not match the sample value will
        # use the simple differences for the z-score rather than
        # pushing them out to plus/minus infinity.
        points = (evaluation_points - mu)/(sigma + (sigma == 0.))
    else:
        points = sample_points

    # Evaluate pdf, scaling the resulting density by sigma to correct the area.
    # If sigma is zero, return entropy as -inf;  this seems to not be the
    # case for discrete distributions (consider Bernoulli with p=1, q=0,
    #  => H = -p log p - q log q = 0), so need to do something else, both
    # for the kde and for the entropy calculation.
    with np.errstate(divide='ignore'):
        log_pdf = kde.score_samples(points) - np.sum(np.log(sigma))

    return log_pdf
开发者ID:bumps,项目名称:bumps,代码行数:81,代码来源:entropy.py

示例14: get_standart_deviation

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import sample [as 别名]
result, stats = get_standart_deviation(delta, PHAT_targets_valid[:, 0], method="full")
print(result)

full_set = np.hstack((PHAT_features_train, PHAT_targets_train))

# bring all magnitudes to redshift range
rescaled_set = np.copy(full_set)
rescaled_set[:, 0:-1] = rescaled_set[:, 0:-1]  # *feature_av
rescaled_set[:, -1] = rescaled_set[:, -1]

# Draw a sample set every time
kde = KernelDensity(bandwidth=0.001)
kde.fit(rescaled_set)
for i in range(500, 9000, 2000):
    aug_data = kde.sample(i)
    # aug_data = np.vstack((aug_data, full_set))

    # initalize predictor
    tree_para = {"min_samples_leaf": 5}
    clf = AdaBoostRegressor(DecisionTreeRegressor(**tree_para), loss="exponential", n_estimators=20)

    # fit predictor
    clf.fit(aug_data[:, 0:-1], aug_data[:, -1])
    predicted_aug = clf.predict(PHAT_features_valid)

    # collect stats
    delta_aug = predicted_aug - PHAT_targets_valid[:, 0]
    feature_imp_aug = clf.feature_importances_

    result_aug, stats_aug = get_standart_deviation(delta_aug, PHAT_targets_valid[:, 0], method="full")
开发者ID:MaxiMaerz,项目名称:Masterthesis,代码行数:32,代码来源:PHAT1_best.py


注:本文中的sklearn.neighbors.KernelDensity.sample方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。