Python kde.KernelDensity方法代码示例

本文整理汇总了Python中sklearn.neighbors.kde.KernelDensity方法的典型用法代码示例。如果您正苦于以下问题:Python kde.KernelDensity方法的具体用法?Python kde.KernelDensity怎么用?Python kde.KernelDensity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.neighbors.kde的用法示例。


示例1: get_dist

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_dist(data_list, method):
    Xnumpy = np.asarray(data_list)
    X = Xnumpy.reshape(-1, 1)
    dist = None
    if method == "raw":
        dist = data_list  # raw column data
    if method == "kd":
        kde = KernelDensity(
        dist = kde.score_samples(X)
    elif method == "odsvm":
        svmachine = svm.OneClassSVM(
        dist = svmachine.fit(X)
    return dist 

示例2: plot_kernel_density

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def plot_kernel_density(col, verbose=True): 
    """Plots kernel density function of column


    col : np.ndarray
    verbose : boolean
        iff True, display the graph

        Figure containing plot

    #address pass entire matrix
    # TODO respect missing_val
    # TODO what does n do?
    col = utils.check_col(col)
    x_grid = np.linspace(min(col), max(col), 1000)

    grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.1,1.0,30)}, cv=20) # 20-fold cross-validation
    grid.fit(col[:, None])

    kde = grid.best_estimator_
    pdf = np.exp(kde.score_samples(x_grid[:, None]))

    fig, ax = plt.subplots()
    #fig = plt.figure()
    ax.plot(x_grid, pdf, linewidth=3, alpha=0.5, label='bw=%.2f' % kde.bandwidth)
    ax.hist(col, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
    ax.legend(loc='upper left')
    ax.set_xlim(min(col), max(col))
    if verbose:
    return fig 

示例3: get_numerical_signature

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_numerical_signature(values, S):
    Learns a distribution of the values
    Then generates a sample of size S
    # Transform data to numpy array
    Xnumpy = np.asarray(values)
    X = Xnumpy.reshape(-1, 1)
    # Learn kernel
    kde = KernelDensity(
    sig_v = [kde.sample()[0][0] for x in range(S)]
    return sig_v 

示例4: calculate_kde

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def calculate_kde(points, df_osm_built, df_osm_pois=None, bandwidth=400, X_weights=None, pois_weight=9, log_weight=True):
	Evaluate the probability density function using Kernel Density Estimation of input geo-localized data
	KDE's bandwidth stands for walkable distances
	If input weights are given, a Weighted Kernel Density Estimation is carried out

	points : geopandas.GeoSeries
		reference points to calculate indices
	df_osm_built : geopandas.GeoDataFrame
		data frame containing the building's geometries
	df_osm_pois : geopandas.GeoDataFrame
		data frame containing the points' of interest geometries
	bandwidth: int
		bandwidth value to be employed on the Kernel Density Estimation
	X_weights : pandas.Series
		indicates the weight for each input building (e.g. surface)
	pois_weight : int
		weight assigned to points of interest
	log_weight : bool
		if indicated, applies a log transformation to input weight values

	# X_b : Buildings array
	X_b = [ [p.x,p.y] for p in df_osm_built.geometry.centroid.values ]
	# X_p : Points array
	if (df_osm_pois is None): X_p = []
	else: X_p = [ [p.x,p.y] for p in df_osm_pois.geometry.centroid.values ]
	# X : Full array
	X = np.array( X_b + X_p )

	# Points where the probability density function will be evaluated
	Y = np.array( [ [p.x,p.y] for p in points.values ] )

	if (not (X_weights is None) ): # Weighted Kernel Density Estimation
		# Building's weight + POIs weight
		X_W = np.concatenate( [X_weights.values, np.repeat( [pois_weight], len(X_p) )] )

		if (log_weight): # Apply logarithm
			X_W = np.log( X_W )

		PDF = WeightedKernelDensityEstimation(X, X_W, bandwidth, Y)		
		return pd.Series( PDF / PDF.max() )		
	else: # Kernel Density Estimation
		# Sklearn 
		kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)		
		# Sklearn returns the results in the form log(density)
		PDF = np.exp(kde.score_samples(Y))
		return pd.Series( PDF / PDF.max() ) 

示例5: _evaluate_vec

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def _evaluate_vec(self, opts, step, real_points,
                      fake_points, validation_fake_points, prefix=''):
        """Compute the average log-likelihood and the Coverage metric.
        Coverage metric is defined in arXiv paper. It counts a mass of true
        data covered by the 95% quantile of the model density.

        # Estimating density with KDE
        dist = fake_points[:-1] - fake_points[1:]
        dist = dist * dist
        dist = np.sqrt(np.sum(dist, axis=(1, 2, 3)))
        bandwidth = np.median(dist)
        num_real = len(real_points)
        num_fake = len(fake_points)
        if validation_fake_points is not None:
            max_score = -1000000.
            num_val = len(validation_fake_points)
            b_grid = bandwidth * (2. ** (np.arange(14) - 7.))
            for _bandwidth in b_grid:
                kde = KernelDensity(kernel='gaussian', bandwidth=_bandwidth)
                kde.fit(np.reshape(fake_points, [num_fake, -1]))
                score = np.mean(kde.score_samples(
                    np.reshape(validation_fake_points, [num_val, -1])))
                if score > max_score:
                    # logging.debug("Updating bandwidth to %.4f"
                    #             " with likelyhood %.2f" % (_bandwidth, score))
                    bandwidth = _bandwidth
                    max_score = score
        kde = KernelDensity(kernel='gaussian',
        kde.fit(np.reshape(fake_points, [num_fake, -1]))

        # Computing Coverage, refer to Section 4.3 of arxiv paper
        model_log_density = kde.score_samples(
            np.reshape(fake_points, [num_fake, -1]))
        # np.percentaile(a, 10) returns t s.t. np.mean( a <= t ) = 0.1
        threshold = np.percentile(model_log_density, 5)
        real_points_log_density = kde.score_samples(
            np.reshape(real_points, [num_real, -1]))
        ratio_not_covered = np.mean(real_points_log_density <= threshold)

        log_p = np.mean(real_points_log_density)
        C = 1. - ratio_not_covered

        logging.info('Evaluating: log_p=%.3f, C=%.3f' % (log_p, C))
        return log_p, C 

示例6: get_outlier_from_KernelDensityStuff

# 需要导入模块: from sklearn.neighbors import kde [as 别名]
# 或者: from sklearn.neighbors.kde import KernelDensity [as 别名]
def get_outlier_from_KernelDensityStuff(df, column = "", binning = "", threshold = 6, method = "gaussian", sort_by = ""):

    from sklearn.neighbors.kde import KernelDensity as harry

    if(column ==""):
        print("I need a column")

    out_df = pd.DataFrame(data = None, columns = df.columns)

    if(binning != ""):
        for potter in df[binning].unique():
            tdf = df[df[binning] == potter]
            if(sort_by!= ""):
                tdf = tdf.sort_values(sort_by)

            if(column == "deriv_ksn"):
                tdf["deriv_ksn"] = pd.Series(np.abs(tdf.ksn.diff()/tdf.chi.diff()),index = tdf.index)
                tdf["deriv_ksn"].iloc[0] = 0

            dumbledore = np.copy(tdf[column].values.reshape((-1,1)))

            severus = harry(kernel = method).fit(dumbledore)

            snake = np.abs(severus.score_samples(dumbledore))

            McGonagal = []
            for gobelins in snake:
                if gobelins<threshold:

            aCat = tdf[McGonagal]

            out_df = pd.concat([out_df,aCat])

    return out_df 
