本文整理汇总了Python中sklearn.neighbors.KernelDensity类的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity类的具体用法?Python KernelDensity怎么用?Python KernelDensity使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KernelDensity类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test2
def test2():
arr = np.concatenate((np.linspace(0, 10, 10), np.linspace(2, 4, 10), np.linspace(7, 10, 10)))[:, np.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(arr)
X = np.linspace(0,10,1000)[:, np.newaxis]
log_dens = kde.score_samples(X)
plt.plot(X, log_dens)
plt.show()
示例2: kdescatter
def kdescatter(xs, ys, log_color=False, atol=1e-4, rtol=1e-4,
n_jobs=1, n_samp_scaling=100, n_samp_tuning=1000, ax=None,
**kwargs):
if ax is None:
import matplotlib.pyplot as plt
ax = plt
kwargs.setdefault('linewidths', 0)
kwargs.setdefault('s', 20)
kwargs.setdefault('cmap', 'winter')
X = np.asarray([xs, ys]).T
n = X.shape[0]
samp_X = X[np.random.choice(n, min(n_samp_scaling, n), replace=False)]
median_sqdist = np.median(euclidean_distances(samp_X, squared=True))
bws = np.logspace(-2, 2, num=10) * np.sqrt(median_sqdist)
est = GridSearchCV(KernelDensity(), {'bandwidth': bws}, n_jobs=n_jobs)
est.fit(X[np.random.choice(n, min(n_samp_tuning, n), replace=False)])
bw = est.best_params_['bandwidth']
kde = KernelDensity(bandwidth=bw)
kde.fit(X)
densities = kde.score_samples(X)
if not log_color:
np.exp(densities, out=densities)
ax.scatter(xs, ys, c=densities, **kwargs)
示例3: max_prob
def max_prob(df):
df_tmp = df.copy()
arr = []
for ind in df_tmp.index:
row = df_tmp.loc[ind]
d = row.dropna().values
# d = d.dropna()
if len(d)==0:
centre = np.NaN
arr.append(centre)
continue
# arr = vals.sort(axis=0)
# df_ordered = pd.DataFrame(vals, index=df.index, columns=df.columns)
x_grid = np.linspace(d.min(), d.max(), 50)
x_grid = x_grid.reshape(-1,1)
d = d.reshape(-1,1)
kde = KernelDensity().fit(d)
log_dens = kde.score_samples(x_grid)
vals = np.exp(log_dens).round(4)
centre = x_grid[vals.argmax()][0]
centre2 = round(centre, 4)
# TODO first element adds unnecessary decimal places (use decimal places class to fix)
arr.append(centre2)
return arr
示例4: surface_density
def surface_density(c, bandwidth=0.2, grid_step=0.02):
"""
Given particle positions as a coordinate object, compute the
surface density using a kernel density estimate.
"""
if not HAS_SKLEARN:
raise ImportError("scikit-learn is required to use this function.")
xgrid = np.arange(2., 9.+0.1, grid_step) # deg
ygrid = np.arange(26.5, 33.5+0.1, grid_step) # deg
shp = (xgrid.size, ygrid.size)
meshies = np.meshgrid(xgrid, ygrid)
grid = np.vstack(map(np.ravel, meshies)).T
x = c.l.degree
y = c.b.degree
skypos = np.vstack((x,y)).T
kde = KernelDensity(bandwidth=bandwidth, kernel='epanechnikov')
kde.fit(skypos)
dens = np.exp(kde.score_samples(grid)).reshape(meshies[0].shape)
log_dens = np.log10(dens)
return grid, log_dens
示例5: plot_sklearn_kde
def plot_sklearn_kde(df, support, column='AirTime', bins=50):
"""
Plots a KDE and a histogram using sklearn.KernelDensity.
Uses Gaussian kernels.
The optimal bandwidth is calculated according to Silverman's rule of thumb.
Parameters
----------
df: A pandas.DataFrame
support: A 1-d numpy array.
Input data points for the probabilit density function.
Returns
-------
A matplotlib.axes.Axes instance.
"""
bw = get_silverman_bandwidth(df, column)
kde = KernelDensity(kernel='gaussian', bandwidth=bw)
x = df[column]
kde.fit(x[:, np.newaxis])
y = kde.score_samples(support[:, np.newaxis])
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(np.ravel(x), bins=bins, alpha=0.5, color=sns.xkcd_rgb["denim blue"], normed=True)
ax.plot(support, np.exp(y))
ax.set_xlabel(column, fontsize=14)
ax.set_ylabel('Density', fontsize=14)
ax.set_title('Kernel Density Plot', fontsize=14)
sns.despine(ax=ax, offset=5, trim=True)
return ax
示例6: kde_opt4
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt4 model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
BGK10_output = kdeBGK10(X_feat)
if BGK10_output is None:
kde = gaussian_kde(X_feat, "scott")
kde = gaussian_kde(X_feat, kde.factor * 0.741379)
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
else:
bandwidth, mesh, density = BGK10_output
kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
kde.fit(X_feat[:, np.newaxis])
y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
示例7: kde_sklearn
def kde_sklearn(data, grid, **kwargs):
"""
Kernel Density Estimation with Scikit-learn
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde_skl = KernelDensity(**kwargs)
kde_skl.fit(data)
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(grid)
return np.exp(log_pdf)
示例8: draw_posterior_kld_hist
def draw_posterior_kld_hist(X_kld, X_vae, f_name, bins=25):
"""
Plot KDE-smoothed histograms.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Posterior KLd Density')
ax.set_title('Posterior KLds: Over-regularized vs. Standard')
ax.hold(True)
for (X, style, label) in [(X_kld, '-', 'ORK'), (X_vae, '--', 'VAR')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/4.0)
plot_max = X_max + (X_range/4.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style, label=label)
ax.legend()
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format='pdf', \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
示例9: kde_sklearn
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
"""Kernel Density Estimation with Scikit-learn"""
kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
kde_skl.fit(x[:, np.newaxis])
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
return np.exp(log_pdf)
示例10: pdf
def pdf(self, token, years, bandwidth=5):
"""
Estimate a density function from a token's rank series.
Args:
token (str)
years (range)
Returns: OrderedDict {year: density}
"""
series = self.series(token)
data = []
for year, wpm in series.items():
data += [year] * round(wpm)
data = np.array(data)[:, np.newaxis]
pdf = KernelDensity(bandwidth=bandwidth).fit(data)
samples = OrderedDict()
for year in years:
samples[year] = np.exp(pdf.score(year))
return samples
示例11: plot_kde_histogram2
def plot_kde_histogram2(X1, X2, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X1/X2. Assume data is 1D.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hold(True)
for (X, style) in [(X1, '-'), (X2, '--')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style)
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
示例12: plot_kde_histogram
def plot_kde_histogram(X, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X. Assume data is univariate.
"""
import matplotlib.pyplot as plt
X = X.ravel()
np.random.shuffle(X)
X = X[0:min(X.shape[0], 1000000)]
X_samp = X[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
# make a figure
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)))
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
示例13: find_kernel
def find_kernel(data, numgrid = 1000, bw = 0.002):
Xtrain = data[:,0:2]
ytrain = data[2]
# Set up the data grid for the contour plot
xgrid = np.linspace(-74.1, -73.65, numgrid=1000)
ygrid = np.linspace(40.5, 40.8, numgrid=1000)
X, Y = np.meshgrid(xgrid, ygrid)
xy = np.vstack([Y.ravel(), X.ravel()]).T
# Plot map of with distributions of each species
fig = plt.figure()
# construct a kernel density estimate of the distribution
kde = KernelDensity(bandwidth=bw,
kernel='gaussian')
kde.fit(Xtrain, y = ytrain)
# evaluate only on the land: -9999 indicates ocean
Z = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
# plot contours of the density
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.title('BK CRIME')
plt.show()
return Z
示例14: KDE_plt
def KDE_plt(categories,inter_arrivals):
KDEs = []
for i in range(0,len(categories)):
X = np.asarray(extract_cat_samples(inter_arrivals,categories,i))#for single inter-arrivals in a category
#X = np_matrix(categories[i][0])#for avg(inter-arrival)/person in a category
kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X)
KDEs.append(kde) #to use for prob_return()
max_sample = max_interarrival_mean(categories,inter_arrivals,i)
X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis]
log_dens = kde.score_samples(X_plot)
plt.figure(i)
plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian'))
#plt.draw()
#plt.pause(0.001)
#plt.title("Non-Parametric Density Estimation for category=%s Visitors"%(i))
plt.hist(combine_inner_lists(extract_cat_samples(inter_arrivals,categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
# plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
plt.xlabel("inter-arrival time (days)")
plt.ylabel("PDF")
plt.legend()
save_as='./app/static/img/cat_result/kde/kdeplt_cat'+str(i)+'.png' # dump result into kde folder
plt.savefig(save_as)
plt.show(block=False)
plt.close(plt.figure(i))
return KDEs
示例15: test_kernel_density_sampling
def test_kernel_density_sampling(n_samples=100, n_features=3):
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
bandwidth = 0.2
for kernel in ['gaussian', 'tophat']:
# draw a tophat sample
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
samp = kde.sample(100)
assert_equal(X.shape, samp.shape)
# check that samples are in the right range
nbrs = NearestNeighbors(n_neighbors=1).fit(X)
dist, ind = nbrs.kneighbors(X, return_distance=True)
if kernel == 'tophat':
assert np.all(dist < bandwidth)
elif kernel == 'gaussian':
# 5 standard deviations is safe for 100 samples, but there's a
# very small chance this test could fail.
assert np.all(dist < 5 * bandwidth)
# check unsupported kernels
for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
assert_raises(NotImplementedError, kde.sample, 100)
# non-regression test: used to return a scalar
X = rng.randn(4, 1)
kde = KernelDensity(kernel="gaussian").fit(X)
assert_equal(kde.sample().shape, (1, 1))