本文整理汇总了Python中sklearn.neighbors.KernelDensity.score_samples方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.score_samples方法的具体用法?Python KernelDensity.score_samples怎么用?Python KernelDensity.score_samples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.KernelDensity
的用法示例。
在下文中一共展示了KernelDensity.score_samples方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cistrans
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def cistrans(args):
cob = co.COB(args.cob)
if args.out == None:
args.out = '{}_cistrans'.format(cob.name)
# np.newaxis adds an empty axis in that position of the slice
# the sklearn module requires the values to be in the rows:
# http://scikit-learn.org/stable/auto_examples/neighbors/plot_kde_1d.html
cis = cob.coex \
.score[cob.coex.distance <= args.cis_distance]\
.values[:,np.newaxis]
trans = cob.coex\
.score[np.isinf(cob.coex.distance)]\
.values[:,np.newaxis]
X_plot = np.linspace(-10,10,1000)[:,np.newaxis]
print(
'Found {:,} cis interactions and {:,} trans interactions'.format(
cis.shape[0],
trans.shape[0]
))
# Fit the kernel
kd=KernelDensity(bandwidth=0.2)
kd.fit(cis)
cis_kde = np.exp(kd.score_samples(X_plot))
plt.fill(X_plot,cis_kde,alpha=0.5,label='Cis Interactions')
# Fit the trans
kd.fit(trans[0:50000])
trans_kde = np.exp(kd.score_samples(X_plot))
plt.fill(X_plot,trans_kde,alpha=0.5,label='Trans Interactions')
plt.legend()
plt.title('Cis vs Trans Density: {}'.format(cob.name))
# Calculate the mann whitney U test
u,pval = sp.stats.mannwhitneyu(cis[:,0],trans[:,0])
print('P-val: {}'.format(pval))
plt.savefig(args.out+'.png')
示例2: figure_6_14
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def figure_6_14():
"""Reproduces figure 6.14 in ESLii displaying a density estimate for sbp
levels in chd/no-chd groups using a Gaussian kernel density estimate
"""
sa = eslii.read_sa_heart_data()
sbp = sa["sbp"]
sbp_chd = sa[sa["chd"] == 1]["sbp"].copy()
sbp_chd.sort()
sbp_no_chd = sa[sa["chd"] == 0]["sbp"].copy()
sbp_no_chd.sort()
kde_chd = KernelDensity(kernel='gaussian', bandwidth=7.5).fit(
sbp_chd.reshape(len(sbp_chd), 1))
chd_log_dens = kde_chd.score_samples(sbp_chd.reshape((len(sbp_chd), 1)))
plt.subplot(121)
plt.plot(sbp_chd, np.exp(chd_log_dens), label="CHD")
kde_no_chd = KernelDensity(kernel='gaussian', bandwidth=7.5).fit(
sbp_no_chd.reshape(len(sbp_no_chd), 1))
no_chd_log_dens = kde_no_chd.score_samples(
sbp_no_chd.reshape((len(sbp_no_chd), 1)))
plt.plot(sbp_no_chd, np.exp(no_chd_log_dens), label="no CHD")
plt.legend(loc='best')
sbp_range = np.linspace(min(sbp), max(sbp), 100).reshape((100, 1))
chd_dens = np.exp(kde_chd.score_samples(sbp_range))
no_chd_dens = np.exp(kde_no_chd.score_samples(sbp_range))
p_chd = float(len(sbp_chd))/(len(sbp_chd) + len(sbp_no_chd))
posterior_est = [p_chd * chd_dens[i] /
(p_chd * chd_dens[i] + (1 - p_chd) * no_chd_dens[i])
for i in range(len(sbp_range))]
plt.subplot(122)
plt.plot(sbp_range, posterior_est)
plt.show()
示例3: plot_samples
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def plot_samples(self, folder = '', title = ''):
print("Saving plots...")
numbins = int(8 * math.log(len(self.samples[self.store_list[0]])))
for p in self.store_list:
if p == 'D':
continue
samples = np.array([self.samples[p]]).T
a = np.min(samples)
b = np.max(samples)
band = 0.1 * (b-a + 0.001)
kde = KD(kernel='gaussian', bandwidth=band).fit(samples)
n, bins, patches = plt.hist(self.samples[p], numbins, normed=1)
log_dens = kde.score_samples(np.array([bins]).T)
plt.plot(bins, np.exp(log_dens), 'r-')
MAP = self.get_MAP(kde, a, b)
self.params['MAP'][p] = MAP
plt.plot([MAP], np.exp(kde.score_samples([MAP])), 'go')
plt.title(title + " MAP estimate: " + str(MAP))
plt.ylabel("Posterior(" + p + ")")
plt.xlabel(p)
x1,x2,y1,y2 = plt.axis()
plt.axis((-3,3,y1,y2))
if p == 'L' or p == 'T':
plt.axis((0,1,y1,y2))
plt.savefig(folder + p + "_" + title)
plt.clf()
self.params['MAP']['D'] = self.params['D']
if not self.bkt:
print("Working on difficulty params...")
p = 'D'
data = np.array(self.samples[p])
for j in range(self.data['num_problems']):
samples = np.array([data[:,j]]).T
#print samples
a = np.min(samples)
b = np.max(samples)
band = 0.1 * (b-a + 0.001)
kde = KD(kernel='gaussian', bandwidth=band).fit(samples)
n, bins, patches = plt.hist(samples, numbins, normed=1)
log_dens = kde.score_samples(np.array([bins]).T)
plt.plot(bins, np.exp(log_dens), 'r-')
MAP = self.get_MAP(kde, a, b)
self.params['MAP']['D'][j] = MAP
plt.plot([MAP], np.exp(kde.score_samples([MAP])), 'go')
plt.title(title + " MAP estimate: " + str(MAP))
plt.ylabel("Posterior(" + p + ")")
plt.xlabel("Problem " + str(j))
x1,x2,y1,y2 = plt.axis()
plt.axis((-3,3,y1,y2))
plt.savefig(folder + "Difficulty/problem" + str(j) + "_" + title)
plt.clf()
print("Plots saved!")
示例4: TwoClassKDE
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
class TwoClassKDE(object):
"""Class for Kernel Density Estimator on two labels. Likelihood ratio at a point is ratio of class-1 likelihood estimate to class-0 likelihood estimate, times the class odds, where this is calculated as the posterior mean estimate under Beta(1, 1) prior, given the observations. If no points are observed for one of the classes, a default (improper) uniform prior is assumed for that class. """
def __init__(self, **kwargs):
"""Takes same parameters as KernelDensity estimator."""
self.kde0 = KernelDensity(**kwargs)
self.kde1 = KernelDensity(**kwargs)
def fit(self, X, y):
"""Fits KDE models on the data. X is array of data points, y is array of 0-1 labels."""
y = np.asarray(y, dtype = int)
self.n0, self.n1 = (y == 0).sum(), (y == 1).sum()
assert (self.n0 + self.n1 == len(y)), "y must be vector of 1's and 0's."
X0, X1 = X[y == 0], X[y == 1]
if (self.n0 > 0):
self.kde0.fit(X0)
if (self.n1 > 0):
self.kde1.fit(X1)
def fit_with_optimal_bandwidth(self, X, y, gridsize = 101, dynamic_range = 100, cv = 10, verbose = 0, n_jobs = 1):
"""Determines optimal bandwidth using the following strategy: For each subset (0 or 1) of the dataset, 1) set b = 1.06 * sigma * n^(-1/5), the Silverman's rule of thumb estimate for the optimal bandwidth. sigma is the sample standard deviation of the samples after zero-centering the columns (note: ideally each column will have comparable variance), 2) set up a grid (of size gridsize) of bandwidth values to try, ranging from b / alpha to b * alpha in geometric progression, where alpha = sqrt(dynamic_range), 3) compute average likelihood of the estimator on the data using cv-fold cross-validation, 4) select the bandwidth with the highest likelihood."""
y = np.asarray(y, dtype = int)
self.n0, self.n1 = (y == 0).sum(), (y == 1).sum()
assert (self.n0 + self.n1 == len(y)), "y must be vector of 1's and 0's."
X0, X1 = X[y == 0], X[y == 1]
if (self.n0 > 0):
log_b0 = np.log(1.06) + np.log((X0 - X0.mean(axis = 0)).std()) - 0.2 * np.log(self.n0)
grid0 = GridSearchCV(self.kde0, {'bandwidth' : np.exp(np.linspace(log_b0 - 0.5 * np.log(dynamic_range), log_b0 + 0.5 * np.log(dynamic_range), gridsize))}, cv = cv, verbose = verbose, n_jobs = n_jobs)
grid0.fit(X0)
self.kde0 = grid0.best_estimator_
if (self.n1 > 0):
log_b1 = np.log(1.06) + np.log((X1 - X1.mean(axis = 0)).std()) - 0.2 * np.log(self.n1)
grid1 = GridSearchCV(self.kde1, {'bandwidth' : np.exp(np.linspace(log_b1 - 0.5 * np.log(dynamic_range), log_b1 + 0.5 * np.log(dynamic_range), gridsize))}, cv = cv, verbose = verbose, n_jobs = n_jobs)
grid1.fit(X1)
self.kde1 = grid1.best_estimator_
def get_params(self, **kwargs):
return self.kde0.get_params(**kwargs)
def set_params(self, **params):
self.kde0.set_params(**params)
self.kde1.set_params(**params)
return self
def score_samples(self, X):
"""Evaluate the density model on the data. Returns vector of log-likelihood ratios of class 1 over class 0."""
p1_est = (self.n1 + 1) / (self.n0 + self.n1 + 2)
class_log_odds = np.log(p1_est) - np.log(1 - p1_est)
scores0 = self.kde0.score_samples(X) if (self.n0 > 0) else np.zeros(len(X), dtype = float)
scores1 = self.kde1.score_samples(X) if (self.n1 > 0) else np.zeros(len(X), dtype = float)
return scores1 - scores0 + class_log_odds
def score(self, X, y = None):
"""Compute the overall log-likelihood ratio under the model."""
return self.score_samples(X).sum()
def predict_proba(self, X):
"""Probability estimates."""
scores = self.score_samples(X)
p0s = 1 / (1 + np.exp(scores))
return np.array([p0s, 1 - p0s]).transpose()
def predict_log_proba(self, X):
"""Log of probability estimates."""
return np.log(self.predict_proba(X))
示例5: pdf_estimate
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def pdf_estimate(images, labels, W, method, t):
"""
Uses kernel density extimation to the compute the pdf of neural activation data.
Args:
images (numpy array): input images
labels (numpy array): input labels associated with the neuron activations
W (numpy array): weights of the hidden neurons
method (str): method to approximate the pdf
t (float): temperature of the softmax when then network was trained
returns:
(list of regressor or kde objects): list of marginal pdfs
(regressor or kde object): pdf
(numpy array): labels of the data points used to compute the pdf (useful to compute prior)
"""
classes = np.unique(labels)
n_classes = len(np.unique(labels))
n_trials = len(labels)
""" computes the activation of the hidden neurons for the given input images """
activ = ex.propagate_layerwise(images, W, t=t)
n_subsample = 1000 #number of data points to use to compute the pdf in the 'subsample' and 'fit' methods
subsample_idx = np.random.choice(n_trials, size=n_subsample, replace=False)
activ_subs = activ[subsample_idx, :]
n_train_fit = 500 #number of data point to use to fit the pdf in the 'fit' method
train_fit_idx = np.random.choice(n_trials, size=n_train_fit, replace=False)
activ_fit = activ[train_fit_idx, :]
if method=='full':
pdf_labels = np.copy(labels)
pdf_evidence = KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ)
pdf_marginals = []
for c in classes:
pdf_marginals.append(KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ[pdf_labels==c]))
if method=='subsample':
pdf_labels = labels[subsample_idx]
pdf_evidence = KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ_subs)
pdf_marginals = []
for c in classes:
pdf_marginals.append(KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ_subs[pdf_labels==c]))
if method=='fit':
pdf_labels = labels[subsample_idx]
pdf_evidence_full = KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ_subs)
pdf_evidence = KNeighborsRegressor().fit(activ_fit, pdf_evidence_full.score_samples(activ_fit))
pdf_marginals = []
for c in classes:
pdf_marginal_full = KernelDensity(bandwidth=5e-1, kernel='gaussian', rtol=1e-100).fit(activ_subs[pdf_labels==c])
pdf_marginals.append(KNeighborsRegressor().fit(activ_fit, pdf_marginal_full.score_samples(activ_fit)))
return pdf_marginals, pdf_evidence, pdf_labels
示例6: initialize_optimization_plot
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def initialize_optimization_plot(self):
if self.dataset is None:
print("Set data first before initializing plot options!")
return
if self.parameters is None:
print("Parameter needs to be set!")
return
ab_list = [
AB_INDICES['A-A'],
AB_INDICES['C-C'],
AB_INDICES['E-R'],
AB_INDICES['R-E'],
AB_INDICES['K-E'],
AB_INDICES['E-E'],
AB_INDICES['K-K'],
AB_INDICES['K-R'],
AB_INDICES['V-I'],
AB_INDICES['I-L'],
AB_INDICES['S-T'],
AB_INDICES['S-S'],
AB_INDICES['K-P'],
AB_INDICES['N-N'],
AB_INDICES['W-W'],
AB_INDICES['G-F']
]
couplings_contacts, couplings_noncontacts, avg_lambda_pair = self.dataset.get_decoy_set(size=self.size_evaluationset)
self.evaluation_set['contact'] = np.array(couplings_contacts).transpose()
self.evaluation_set['bg'] = np.array(couplings_noncontacts).transpose()
bandwidth = 0.01
self.evaluation_set_kde = {}
self.evaluation_set_kde['x_grid'] = np.linspace(-0.5, 0.5, 500)
self.evaluation_set_kde['contact'] = {}
self.evaluation_set_kde['bg'] = {}
# kernel density estimate for couplings wijab
for ab in ab_list:
kde_contact = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(self.evaluation_set['contact'][ab].reshape(-1, 1))
kde_bg = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(self.evaluation_set['bg'][ab].reshape(-1, 1))
### add empirical distribution for example data points
self.evaluation_set_kde['contact'][ab] = np.exp(kde_contact.score_samples(self.evaluation_set_kde['x_grid'].reshape(-1, 1)))
self.evaluation_set_kde['bg'][ab] = np.exp(kde_bg.score_samples(self.evaluation_set_kde['x_grid'].reshape(-1, 1)))
#sample points according to regularizer
std_dev = np.sqrt(1.0/avg_lambda_pair)
regularizer = np.random.normal(scale=std_dev, size=10000)
kde_reg = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(regularizer.reshape(-1, 1))
self.evaluation_set_kde['regularizer'] = np.exp(kde_reg.score_samples(self.evaluation_set_kde['x_grid'].reshape(-1, 1)))
示例7: test1
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def test1():
X = [[1], [2], [4], [3], [2], [8], [8], [9], [10], [12], [11], [9]]
kde = KernelDensity(kernel='gaussian', bandwidth=0.4).fit(X)
scores = kde.score_samples(X)
for x in xrange(len(scores)):
scores[x] = math.exp(scores[x])
print scores
Y = [[1], [2], [2], [1], [5], [6], [6], [7], [9], [10], [8], [7]]
density = kde.score_samples(Y)
for x in xrange(len(density)):
density[x] = math.exp(density[x])
print density
示例8: get_P_binary_v_tot
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def get_P_binary_v_tot(proj_sep, delta_v_tot, num_sys=100000):
""" This function calculates the probability of a
random star having the observed proper motion
Parameters
----------
proj_sep : float
Projected separation between two stars
delta_v_tot : float
Total velocity difference between two stars
Returns
-------
P(proj_sep, delta_v_tot) : float
Probability that angular separation, pm+RV difference
is due to a genuine binary
"""
# Catalog check
global binary_set
if binary_set is None:
generate_binary_set(num_sys=num_sys)
# Use a Gaussian KDE
global binary_v_tot_kde
# We work in log space for the set of binaries
if binary_v_tot_kde is None:
kwargs = {'kernel':'tophat'}
binary_v_tot_kde = KernelDensity(bandwidth=0.1, **kwargs)
binary_v_tot_kde.fit( np.array([np.log10(binary_set['proj_sep']), np.log10(binary_set['delta_v_tot'])]).T )
if isinstance(delta_v_tot, np.ndarray) and isinstance(proj_sep, np.ndarray):
values = np.array([np.log10(proj_sep), np.log10(delta_v_tot)]).T
prob_binary = np.exp(binary_v_tot_kde.score_samples(values))
elif isinstance(delta_v_tot, np.ndarray):
values = np.array([np.log10(proj_sep)*np.ones(len(delta_v_tot)), np.log10(delta_v_tot)]).T
prob_binary = np.exp(binary_v_tot_kde.score_samples(values))
else:
prob_binary = np.exp(binary_v_tot_kde.score_samples([np.log10(proj_sep), np.log10(delta_v_tot)]))
# Convert back from log10-space to linear-space
# the log(10) terms convert from log10 to ln
prob_binary = prob_binary / (proj_sep*np.log(10.)) / (delta_v_tot*np.log(10.))
return prob_binary
示例9: plot_agglomerative
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def plot_agglomerative():
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
m = 16
k = 3
X, y = make_blobs(n_samples= m, n_features=2, centers=k, cluster_std=1.3, random_state = 2255)
agg = AgglomerativeClustering(n_clusters=3)
eps = X.std() / 2.
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]
ax = plt.gca()
for i, x in enumerate(X):
ax.text(x[0] + .1, x[1], "%d" % i, horizontalalignment='left', verticalalignment='center')
ax.scatter(X[:, 0], X[:, 1], s=20, c='grey')
ax.set_xticks(())
ax.set_yticks(())
for i in range((m-1)):
agg.n_clusters = X.shape[0] - i
agg.fit(X)
bins = np.bincount(agg.labels_)
for cluster in range(agg.n_clusters):
if bins[cluster] > 1:
points = X[agg.labels_ == cluster]
other_points = X[agg.labels_ != cluster]
kde = KernelDensity(bandwidth= 0.9).fit(points)
scores = kde.score_samples(gridpoints)
score_inside = np.min(kde.score_samples(points))
score_outside = np.max(kde.score_samples(other_points))
levels = .80 * score_inside + .20 * score_outside
ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],
colors='k', linestyles='solid', linewidths=0.8)
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
开发者ID:ktzioumis,项目名称:dsc-3-35-07-hierarchical-agglomerative-clustering-lab-online-ds-pt-112618,代码行数:50,代码来源:plot_agg.py
示例10: kde_sklearn
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def kde_sklearn(data, grid, **kwargs):
"""
Kernel Density Estimation with Scikit-learn
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde_skl = KernelDensity(**kwargs)
kde_skl.fit(data)
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(grid)
return np.exp(log_pdf)
示例11: draw_posterior_kld_hist
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def draw_posterior_kld_hist(X_kld, X_vae, f_name, bins=25):
"""
Plot KDE-smoothed histograms.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Posterior KLd Density')
ax.set_title('Posterior KLds: Over-regularized vs. Standard')
ax.hold(True)
for (X, style, label) in [(X_kld, '-', 'ORK'), (X_vae, '--', 'VAR')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/4.0)
plot_max = X_max + (X_range/4.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style, label=label)
ax.legend()
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format='pdf', \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
示例12: find_kernel
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def find_kernel(data, numgrid = 1000, bw = 0.002):
Xtrain = data[:,0:2]
ytrain = data[2]
# Set up the data grid for the contour plot
xgrid = np.linspace(-74.1, -73.65, numgrid=1000)
ygrid = np.linspace(40.5, 40.8, numgrid=1000)
X, Y = np.meshgrid(xgrid, ygrid)
xy = np.vstack([Y.ravel(), X.ravel()]).T
# Plot map of with distributions of each species
fig = plt.figure()
# construct a kernel density estimate of the distribution
kde = KernelDensity(bandwidth=bw,
kernel='gaussian')
kde.fit(Xtrain, y = ytrain)
# evaluate only on the land: -9999 indicates ocean
Z = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
# plot contours of the density
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.title('BK CRIME')
plt.show()
return Z
示例13: sklearn_density
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def sklearn_density(sample_points, evaluation_points):
"""
Estimate the probability density function from which a set of sample
points was drawn and return the estimated density at the evaluation points.
"""
from sklearn.neighbors import KernelDensity
# Silverman bandwidth estimator
n, d = sample_points.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
# Standardize data so that we can use uniform bandwidth.
# Note that we will need to scale the resulting density by sigma to
# correct the area.
mu, sigma = mean(sample_points, axis=0), std(sample_points, axis=0)
data, points = (sample_points - mu)/sigma, (evaluation_points - mu)/sigma
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
#print("T:%6.3f fitting"%(time.time()-T0))
kde.fit(data)
#print("T:%6.3f estimating"%(time.time()-T0))
log_pdf = kde.score_samples(points)
#print("T:%6.3f done"%(time.time()-T0))
return exp(log_pdf)/np.prod(sigma) # undo the x scaling on the data points
示例14: sklearn_kde
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def sklearn_kde(data, points):
from sklearn.neighbors import KernelDensity
# Silverman bandwidth estimator
n, d = data.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
# standardize data so that we can use uniform bandwidth
mu, sigma = mean(data, axis=0), std(data, axis=0)
data, points = (data - mu)/sigma, (points - mu)/sigma
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
#print("T:%6.3f fitting"%(time.time()-T0))
kde.fit(data)
#print("T:%6.3f estimating"%(time.time()-T0))
log_pdf = kde.score_samples(points)
#print("T:%6.3f done"%(time.time()-T0))
return exp(log_pdf)
示例15: get_density_based_best_sample
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import score_samples [as 别名]
def get_density_based_best_sample(X, known_votes, possibilities):
total_votes = sum(map(lambda x: len(x), known_votes))
print total_votes
X = X.toarray()
current_vectors = numpy.copy(X)
#print 'X', X
#print 'known_votes ', known_votes
original_docs = len(X)
possibilities = set([x[0] for x in possibilities])
#print possibilities
for i, sample in enumerate(known_votes):
for k in range(len(sample)):
current_vectors = numpy.append(current_vectors, [X[i]], axis=0)
#print 'current_vectors ', current_vectors, len(current_vectors)
#assert current_vectors != X
model = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(current_vectors)
scores = model.score_samples(X)
if (total_votes % 3):
#Explore low density regions
sorted_scores = sorted(enumerate(scores), key = lambda x: x[1], reverse=True)
else:
#Exploit high density regions 1 times out of 3
sorted_scores = sorted(enumerate(scores), key = lambda x: x[1])
#print sorted_scores
for i in range(original_docs):
if sorted_scores[i][0] in possibilities:
#print sorted_scores[i][0]
return sorted_scores[i][0]
return None