本文整理汇总了Python中sklearn.neighbors.kde.KernelDensity方法的典型用法代码示例。如果您正苦于以下问题:Python kde.KernelDensity方法的具体用法?Python kde.KernelDensity怎么用?Python kde.KernelDensity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.kde
的用法示例。
在下文中一共展示了kde.KernelDensity方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_dist
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_dist(data_list, method):
Xnumpy = np.asarray(data_list)
X = Xnumpy.reshape(-1, 1)
dist = None
if method == "raw":
dist = data_list # raw column data
if method == "kd":
kde = KernelDensity(
kernel=C.kd["kernel"],
bandwidth=C.kd["bandwidth"]
).fit(X)
dist = kde.score_samples(X)
elif method == "odsvm":
svmachine = svm.OneClassSVM(
nu=C.odsvm["nu"],
kernel=C.odsvm["kernel"],
gamma=C.odsvm["gamma"]
)
dist = svmachine.fit(X)
return dist
示例2: plot_kernel_density
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def plot_kernel_density(col, verbose=True):
"""Plots kernel density function of column
From:
https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/
Parameters
----------
col : np.ndarray
verbose : boolean
iff True, display the graph
Returns
-------
matplotlib.figure.Figure
Figure containing plot
"""
#address pass entire matrix
# TODO respect missing_val
# TODO what does n do?
col = utils.check_col(col)
x_grid = np.linspace(min(col), max(col), 1000)
grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.1,1.0,30)}, cv=20) # 20-fold cross-validation
grid.fit(col[:, None])
kde = grid.best_estimator_
pdf = np.exp(kde.score_samples(x_grid[:, None]))
fig, ax = plt.subplots()
#fig = plt.figure()
ax.plot(x_grid, pdf, linewidth=3, alpha=0.5, label='bw=%.2f' % kde.bandwidth)
ax.hist(col, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
ax.legend(loc='upper left')
ax.set_xlim(min(col), max(col))
if verbose:
plt.show()
return fig
示例3: get_numerical_signature
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_numerical_signature(values, S):
'''
Learns a distribution of the values
Then generates a sample of size S
'''
# Transform data to numpy array
Xnumpy = np.asarray(values)
X = Xnumpy.reshape(-1, 1)
# Learn kernel
kde = KernelDensity(
kernel=C.kd["kernel"],
bandwidth=C.kd["bandwidth"]
).fit(X)
sig_v = [kde.sample()[0][0] for x in range(S)]
return sig_v
示例4: calculate_kde
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def calculate_kde(points, df_osm_built, df_osm_pois=None, bandwidth=400, X_weights=None, pois_weight=9, log_weight=True):
"""
Evaluate the probability density function using Kernel Density Estimation of input geo-localized data
KDE's bandwidth stands for walkable distances
If input weights are given, a Weighted Kernel Density Estimation is carried out
Parameters
----------
points : geopandas.GeoSeries
reference points to calculate indices
df_osm_built : geopandas.GeoDataFrame
data frame containing the building's geometries
df_osm_pois : geopandas.GeoDataFrame
data frame containing the points' of interest geometries
bandwidth: int
bandwidth value to be employed on the Kernel Density Estimation
X_weights : pandas.Series
indicates the weight for each input building (e.g. surface)
pois_weight : int
weight assigned to points of interest
log_weight : bool
if indicated, applies a log transformation to input weight values
Returns
----------
pandas.Series
"""
# X_b : Buildings array
X_b = [ [p.x,p.y] for p in df_osm_built.geometry.centroid.values ]
# X_p : Points array
if (df_osm_pois is None): X_p = []
else: X_p = [ [p.x,p.y] for p in df_osm_pois.geometry.centroid.values ]
# X : Full array
X = np.array( X_b + X_p )
# Points where the probability density function will be evaluated
Y = np.array( [ [p.x,p.y] for p in points.values ] )
if (not (X_weights is None) ): # Weighted Kernel Density Estimation
# Building's weight + POIs weight
X_W = np.concatenate( [X_weights.values, np.repeat( [pois_weight], len(X_p) )] )
if (log_weight): # Apply logarithm
X_W = np.log( X_W )
PDF = WeightedKernelDensityEstimation(X, X_W, bandwidth, Y)
return pd.Series( PDF / PDF.max() )
else: # Kernel Density Estimation
# Sklearn
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)
# Sklearn returns the results in the form log(density)
PDF = np.exp(kde.score_samples(Y))
return pd.Series( PDF / PDF.max() )
示例5: _evaluate_vec
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def _evaluate_vec(self, opts, step, real_points,
fake_points, validation_fake_points, prefix=''):
"""Compute the average log-likelihood and the Coverage metric.
Coverage metric is defined in arXiv paper. It counts a mass of true
data covered by the 95% quantile of the model density.
"""
# Estimating density with KDE
dist = fake_points[:-1] - fake_points[1:]
dist = dist * dist
dist = np.sqrt(np.sum(dist, axis=(1, 2, 3)))
bandwidth = np.median(dist)
num_real = len(real_points)
num_fake = len(fake_points)
if validation_fake_points is not None:
max_score = -1000000.
num_val = len(validation_fake_points)
b_grid = bandwidth * (2. ** (np.arange(14) - 7.))
for _bandwidth in b_grid:
kde = KernelDensity(kernel='gaussian', bandwidth=_bandwidth)
kde.fit(np.reshape(fake_points, [num_fake, -1]))
score = np.mean(kde.score_samples(
np.reshape(validation_fake_points, [num_val, -1])))
if score > max_score:
# logging.debug("Updating bandwidth to %.4f"
# " with likelyhood %.2f" % (_bandwidth, score))
bandwidth = _bandwidth
max_score = score
kde = KernelDensity(kernel='gaussian',
bandwidth=bandwidth)
kde.fit(np.reshape(fake_points, [num_fake, -1]))
# Computing Coverage, refer to Section 4.3 of arxiv paper
model_log_density = kde.score_samples(
np.reshape(fake_points, [num_fake, -1]))
# np.percentaile(a, 10) returns t s.t. np.mean( a <= t ) = 0.1
threshold = np.percentile(model_log_density, 5)
real_points_log_density = kde.score_samples(
np.reshape(real_points, [num_real, -1]))
ratio_not_covered = np.mean(real_points_log_density <= threshold)
log_p = np.mean(real_points_log_density)
C = 1. - ratio_not_covered
logging.info('Evaluating: log_p=%.3f, C=%.3f' % (log_p, C))
return log_p, C
示例6: get_outlier_from_KernelDensityStuff
# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_outlier_from_KernelDensityStuff(df, column = "", binning = "", threshold = 6, method = "gaussian", sort_by = ""):
from sklearn.neighbors.kde import KernelDensity as harry
if(column ==""):
print("I need a column")
quit()
out_df = pd.DataFrame(data = None, columns = df.columns)
if(binning != ""):
for potter in df[binning].unique():
tdf = df[df[binning] == potter]
if(sort_by!= ""):
tdf = tdf.sort_values(sort_by)
if(column == "deriv_ksn"):
tdf["deriv_ksn"] = pd.Series(np.abs(tdf.ksn.diff()/tdf.chi.diff()),index = tdf.index)
tdf["deriv_ksn"].iloc[0] = 0
print(tdf.shape[0])
dumbledore = np.copy(tdf[column].values.reshape((-1,1)))
severus = harry(kernel = method).fit(dumbledore)
snake = np.abs(severus.score_samples(dumbledore))
McGonagal = []
for gobelins in snake:
if gobelins<threshold:
McGonagal.append(False)
else:
McGonagal.append(True)
aCat = tdf[McGonagal]
out_df = pd.concat([out_df,aCat])
return out_df