本文整理汇总了Python中sklearn.neighbors.KernelDensity.fit方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.fit方法的具体用法?Python KernelDensity.fit怎么用?Python KernelDensity.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.KernelDensity
的用法示例。
在下文中一共展示了KernelDensity.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: EstimateDensity
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def EstimateDensity(self,name,df,histogram,f,s,ax):
# if the desired output is in Histogram format
if(histogram):
finRes = []
lab = []
for i in xrange(5):
res = np.array(df[ df[f] == i][s])
if(res.shape[0]>0):
finRes.append(res)
lab.append(name[0]+ ' = ' + str(i))
pl.hist(finRes, bins=2, normed=True, histtype='bar',label = lab)
# if the desired output is simple plot
else:
for i in xrange(5):
res = np.array(df[ df[f] == i][s])
if(res.shape[0]>0):
res = res.reshape(res.shape[0],1)
X_plot = np.array(np.linspace(-1, 5,20)).reshape(20,1)
kde= KernelDensity(kernel='exponential', bandwidth=0.05)
kde.fit(res)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot,np.exp(log_dens),label=name[0]+ ' = ' + str(i))
ax.legend()
ax.set_title(name[1] + " distrubution for changing " + name[0])
示例2: plot_sklearn_kde
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def plot_sklearn_kde(df, support, column='AirTime', bins=50):
"""
Plots a KDE and a histogram using sklearn.KernelDensity.
Uses Gaussian kernels.
The optimal bandwidth is calculated according to Silverman's rule of thumb.
Parameters
----------
df: A pandas.DataFrame
support: A 1-d numpy array.
Input data points for the probabilit density function.
Returns
-------
A matplotlib.axes.Axes instance.
"""
bw = get_silverman_bandwidth(df, column)
kde = KernelDensity(kernel='gaussian', bandwidth=bw)
x = df[column]
kde.fit(x[:, np.newaxis])
y = kde.score_samples(support[:, np.newaxis])
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(np.ravel(x), bins=bins, alpha=0.5, color=sns.xkcd_rgb["denim blue"], normed=True)
ax.plot(support, np.exp(y))
ax.set_xlabel(column, fontsize=14)
ax.set_ylabel('Density', fontsize=14)
ax.set_title('Kernel Density Plot', fontsize=14)
sns.despine(ax=ax, offset=5, trim=True)
return ax
示例3: kde_opt4
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt4 model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
BGK10_output = kdeBGK10(X_feat)
if BGK10_output is None:
kde = gaussian_kde(X_feat, "scott")
kde = gaussian_kde(X_feat, kde.factor * 0.741379)
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
else:
bandwidth, mesh, density = BGK10_output
kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
kde.fit(X_feat[:, np.newaxis])
y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
示例4: kde_sklearn
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_sklearn(data, grid, **kwargs):
"""
Kernel Density Estimation with Scikit-learn
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde_skl = KernelDensity(**kwargs)
kde_skl.fit(data)
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(grid)
return np.exp(log_pdf)
示例5: surface_density
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def surface_density(c, bandwidth=0.2, grid_step=0.02):
"""
Given particle positions as a coordinate object, compute the
surface density using a kernel density estimate.
"""
if not HAS_SKLEARN:
raise ImportError("scikit-learn is required to use this function.")
xgrid = np.arange(2., 9.+0.1, grid_step) # deg
ygrid = np.arange(26.5, 33.5+0.1, grid_step) # deg
shp = (xgrid.size, ygrid.size)
meshies = np.meshgrid(xgrid, ygrid)
grid = np.vstack(map(np.ravel, meshies)).T
x = c.l.degree
y = c.b.degree
skypos = np.vstack((x,y)).T
kde = KernelDensity(bandwidth=bandwidth, kernel='epanechnikov')
kde.fit(skypos)
dens = np.exp(kde.score_samples(grid)).reshape(meshies[0].shape)
log_dens = np.log10(dens)
return grid, log_dens
示例6: kdescatter
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kdescatter(xs, ys, log_color=False, atol=1e-4, rtol=1e-4,
n_jobs=1, n_samp_scaling=100, n_samp_tuning=1000, ax=None,
**kwargs):
if ax is None:
import matplotlib.pyplot as plt
ax = plt
kwargs.setdefault('linewidths', 0)
kwargs.setdefault('s', 20)
kwargs.setdefault('cmap', 'winter')
X = np.asarray([xs, ys]).T
n = X.shape[0]
samp_X = X[np.random.choice(n, min(n_samp_scaling, n), replace=False)]
median_sqdist = np.median(euclidean_distances(samp_X, squared=True))
bws = np.logspace(-2, 2, num=10) * np.sqrt(median_sqdist)
est = GridSearchCV(KernelDensity(), {'bandwidth': bws}, n_jobs=n_jobs)
est.fit(X[np.random.choice(n, min(n_samp_tuning, n), replace=False)])
bw = est.best_params_['bandwidth']
kde = KernelDensity(bandwidth=bw)
kde.fit(X)
densities = kde.score_samples(X)
if not log_color:
np.exp(densities, out=densities)
ax.scatter(xs, ys, c=densities, **kwargs)
示例7: sklearn_kde
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def sklearn_kde(data, points):
from sklearn.neighbors import KernelDensity
# Silverman bandwidth estimator
n, d = data.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
# standardize data so that we can use uniform bandwidth
mu, sigma = mean(data, axis=0), std(data, axis=0)
data, points = (data - mu)/sigma, (points - mu)/sigma
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
#print("T:%6.3f fitting"%(time.time()-T0))
kde.fit(data)
#print("T:%6.3f estimating"%(time.time()-T0))
log_pdf = kde.score_samples(points)
#print("T:%6.3f done"%(time.time()-T0))
return exp(log_pdf)
示例8: find_kernel
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def find_kernel(data, numgrid = 1000, bw = 0.002):
Xtrain = data[:,0:2]
ytrain = data[2]
# Set up the data grid for the contour plot
xgrid = np.linspace(-74.1, -73.65, numgrid=1000)
ygrid = np.linspace(40.5, 40.8, numgrid=1000)
X, Y = np.meshgrid(xgrid, ygrid)
xy = np.vstack([Y.ravel(), X.ravel()]).T
# Plot map of with distributions of each species
fig = plt.figure()
# construct a kernel density estimate of the distribution
kde = KernelDensity(bandwidth=bw,
kernel='gaussian')
kde.fit(Xtrain, y = ytrain)
# evaluate only on the land: -9999 indicates ocean
Z = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
# plot contours of the density
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.title('BK CRIME')
plt.show()
return Z
示例9: sklearn_density
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def sklearn_density(sample_points, evaluation_points):
"""
Estimate the probability density function from which a set of sample
points was drawn and return the estimated density at the evaluation points.
"""
from sklearn.neighbors import KernelDensity
# Silverman bandwidth estimator
n, d = sample_points.shape
bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))
# Standardize data so that we can use uniform bandwidth.
# Note that we will need to scale the resulting density by sigma to
# correct the area.
mu, sigma = mean(sample_points, axis=0), std(sample_points, axis=0)
data, points = (sample_points - mu)/sigma, (evaluation_points - mu)/sigma
#print("starting grid search for bandwidth over %d points"%n)
#from sklearn.grid_search import GridSearchCV
#from numpy import logspace
#params = {'bandwidth': logspace(-1, 1, 20)}
#fitter = GridSearchCV(KernelDensity(), params)
#fitter.fit(data)
#kde = fitter.best_estimator_
#print("best bandwidth: {0}".format(kde.bandwidth))
#import time; T0 = time.time()
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
rtol=1e-6, atol=1e-6)
#print("T:%6.3f fitting"%(time.time()-T0))
kde.fit(data)
#print("T:%6.3f estimating"%(time.time()-T0))
log_pdf = kde.score_samples(points)
#print("T:%6.3f done"%(time.time()-T0))
return exp(log_pdf)/np.prod(sigma) # undo the x scaling on the data points
示例10: cistrans
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def cistrans(args):
cob = co.COB(args.cob)
if args.out == None:
args.out = '{}_cistrans'.format(cob.name)
# np.newaxis adds an empty axis in that position of the slice
# the sklearn module requires the values to be in the rows:
# http://scikit-learn.org/stable/auto_examples/neighbors/plot_kde_1d.html
cis = cob.coex \
.score[cob.coex.distance <= args.cis_distance]\
.values[:,np.newaxis]
trans = cob.coex\
.score[np.isinf(cob.coex.distance)]\
.values[:,np.newaxis]
X_plot = np.linspace(-10,10,1000)[:,np.newaxis]
print(
'Found {:,} cis interactions and {:,} trans interactions'.format(
cis.shape[0],
trans.shape[0]
))
# Fit the kernel
kd=KernelDensity(bandwidth=0.2)
kd.fit(cis)
cis_kde = np.exp(kd.score_samples(X_plot))
plt.fill(X_plot,cis_kde,alpha=0.5,label='Cis Interactions')
# Fit the trans
kd.fit(trans[0:50000])
trans_kde = np.exp(kd.score_samples(X_plot))
plt.fill(X_plot,trans_kde,alpha=0.5,label='Trans Interactions')
plt.legend()
plt.title('Cis vs Trans Density: {}'.format(cob.name))
# Calculate the mann whitney U test
u,pval = sp.stats.mannwhitneyu(cis[:,0],trans[:,0])
print('P-val: {}'.format(pval))
plt.savefig(args.out+'.png')
示例11: kde_sklearn
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
"""Kernel Density Estimation with Scikit-learn"""
kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
kde_skl.fit(x[:, np.newaxis])
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
return np.exp(log_pdf)
示例12: set_plx_kde
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def set_plx_kde(t, bandwidth=0.3, method='sklearn_kde'):
""" Set the plx_kde
Parameters
----------
t : ndarray float
Catalog of parallax measures (units: mas)
bandwidth : float
Bandwidth for gaussian_kde (optional, 0.01 recommended)
method : string
Method for density determination (options: scipy_kde, sklearn_kde, blocks)
"""
global plx_kde
if method is 'scipy_kde':
if plx_kde is None:
# We are only going to allow parallaxes above some minimum value
if bandwidth is None:
plx_kde = gaussian_kde(t['plx'][t['plx']>0.0])
else:
plx_kde = gaussian_kde(t['plx'][t['plx']>0.0], bw_method=bandwidth)
elif method is 'sklearn_kde':
if plx_kde is None:
kwargs = {'kernel':'tophat'}
if bandwidth is None:
plx_kde = KernelDensity(**kwargs)
else:
plx_kde = KernelDensity(bandwidth=bandwidth, **kwargs)
if c.kde_subset:
plx_ran = np.copy(t['plx'][t['plx']>0.0])
np.random.shuffle(plx_ran)
plx_kde.fit( plx_ran[0:5000, np.newaxis] )
else:
plx_kde.fit( t['plx'][t['plx']>0.0][:, np.newaxis] )
elif method is 'blocks':
global plx_bins_blocks
global plx_hist_blocks
# Set up Bayesian Blocks
print("Calculating Bayesian Blocks...")
nbins = np.min([len(t), 40000])
bins = bayesian_blocks(t['plx'][t['plx']>0.0][0:nbins])
hist, bins = np.histogram(t['plx'][t['plx']>0.0][0:nbins], bins=bins, normed=True)
# Pad with zeros
plx_bins_blocks = np.append(-1.0e100, bins)
hist_pad = np.append(0.0, hist)
plx_hist_blocks = np.append(hist_pad, 0.0)
print("Bayesian Blocks set.")
else:
print("You must include a valid method")
print("Options: kde or blocks")
return
示例13: estimate_density
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def estimate_density(city):
"""Return a Gaussian KDE of venues in `city`."""
kde = KernelDensity(bandwidth=175, rtol=1e-4)
surround = xp.build_surrounding(DB.venue, city, likes=-1, checkins=1)
kde.fit(surround.venues[:, :2])
max_density = approximate_maximum_density(kde, surround.venues[:, :2])
# pylint: disable=E1101
return lambda xy: np.exp(kde.score_samples(xy))/max_density
示例14: train_patient_flow_estimator
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def train_patient_flow_estimator(df, bandwidth=1.0):
"""Train density estimator based on patient metric"""
X = df.drop(['ADMIT_DATE'], axis=1).values
estimator = KernelDensity(bandwidth=bandwidth,
kernel='gaussian',
metric='pyfunc',
metric_params={'func': patient_metric})
estimator.fit(X)
return estimator
示例15: CrossValidationScore
# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def CrossValidationScore(Xs,h, kernel='gaussian'):
kde = KernelDensity(h, kernel=kernel)
ret = 0.
for i in range(len(Xs)):
x = np.concatenate([Xs[0:i],Xs[i+1:-1]])
kde.fit(x)
ret +=kde.score_samples(Xs[i].reshape(1,-1))
ret/=(1.*len(Xs))
return ret