当前位置: 首页>>代码示例>>Python>>正文


Python KernelDensity.fit方法代码示例

本文整理汇总了Python中sklearn.neighbors.KernelDensity.fit方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.fit方法的具体用法?Python KernelDensity.fit怎么用?Python KernelDensity.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.neighbors.KernelDensity的用法示例。


在下文中一共展示了KernelDensity.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: EstimateDensity

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
 def EstimateDensity(self,name,df,histogram,f,s,ax):
     # if the desired output is in Histogram format
     if(histogram):
         finRes = []
         lab = []
         for i in xrange(5):
             res = np.array(df[ df[f] == i][s])
             if(res.shape[0]>0):
                 finRes.append(res)
                 lab.append(name[0]+ ' = ' + str(i))
         pl.hist(finRes, bins=2, normed=True, histtype='bar',label = lab)
         
     # if the desired output is simple plot
     else:
         for i in xrange(5):
             res = np.array(df[ df[f] == i][s])
             if(res.shape[0]>0):
                 res = res.reshape(res.shape[0],1)
                 X_plot = np.array(np.linspace(-1, 5,20)).reshape(20,1)
                 kde= KernelDensity(kernel='exponential', bandwidth=0.05)
                 kde.fit(res)
                 log_dens = kde.score_samples(X_plot)
                 ax.plot(X_plot,np.exp(log_dens),label=name[0]+ ' = ' + str(i))        
     ax.legend()
     ax.set_title(name[1] + "  distrubution for changing  " + name[0])
开发者ID:ugur47,项目名称:AllState_Purchase_Prediction_Kaggle_Challange,代码行数:27,代码来源:reporting.py

示例2: plot_sklearn_kde

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def plot_sklearn_kde(df, support, column='AirTime', bins=50):
    """
    Plots a KDE and a histogram using sklearn.KernelDensity.
    Uses Gaussian kernels.
    The optimal bandwidth is calculated according to Silverman's rule of thumb.

    Parameters
    ----------
    df: A pandas.DataFrame
    support: A 1-d numpy array.
             Input data points for the probabilit density function.

    Returns
    -------
    A matplotlib.axes.Axes instance.
    """

    bw = get_silverman_bandwidth(df, column)

    kde = KernelDensity(kernel='gaussian', bandwidth=bw)

    x = df[column]

    kde.fit(x[:, np.newaxis])
    y = kde.score_samples(support[:, np.newaxis])

    fig, ax = plt.subplots(figsize=(8, 5))
    ax.hist(np.ravel(x), bins=bins, alpha=0.5, color=sns.xkcd_rgb["denim blue"], normed=True)
    ax.plot(support, np.exp(y))
    ax.set_xlabel(column, fontsize=14)
    ax.set_ylabel('Density', fontsize=14)
    ax.set_title('Kernel Density Plot', fontsize=14)
    sns.despine(ax=ax, offset=5, trim=True)

    return ax
开发者ID:nwngeek212,项目名称:UnsupervisedLearning,代码行数:37,代码来源:helper.py

示例3: kde_opt4

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt4 model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            BGK10_output = kdeBGK10(X_feat)
            if BGK10_output is None:
                kde = gaussian_kde(X_feat, "scott")
                kde = gaussian_kde(X_feat, kde.factor * 0.741379)
                y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
            else:
                bandwidth, mesh, density = BGK10_output
                kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
                kde.fit(X_feat[:, np.newaxis])
                y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
开发者ID:aikinogard,项目名称:5th_place_solution_facebook_check_ins,代码行数:33,代码来源:model.py

示例4: kde_sklearn

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_sklearn(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scikit-learn

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde_skl = KernelDensity(**kwargs)
    kde_skl.fit(data)
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(grid)
    return np.exp(log_pdf)
开发者ID:jwhendy,项目名称:plotnine,代码行数:27,代码来源:density.py

示例5: surface_density

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def surface_density(c, bandwidth=0.2, grid_step=0.02):
    """
    Given particle positions as a coordinate object, compute the
    surface density using a kernel density estimate.
    """

    if not HAS_SKLEARN:
        raise ImportError("scikit-learn is required to use this function.")

    xgrid = np.arange(2., 9.+0.1, grid_step) # deg
    ygrid = np.arange(26.5, 33.5+0.1, grid_step) # deg
    shp = (xgrid.size, ygrid.size)
    meshies = np.meshgrid(xgrid, ygrid)
    grid = np.vstack(map(np.ravel, meshies)).T

    x = c.l.degree
    y = c.b.degree
    skypos = np.vstack((x,y)).T

    kde = KernelDensity(bandwidth=bandwidth, kernel='epanechnikov')
    kde.fit(skypos)

    dens = np.exp(kde.score_samples(grid)).reshape(meshies[0].shape)
    log_dens = np.log10(dens)

    return grid, log_dens
开发者ID:adrn,项目名称:ophiuchus,代码行数:28,代码来源:plot.py

示例6: kdescatter

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kdescatter(xs, ys, log_color=False, atol=1e-4, rtol=1e-4,
               n_jobs=1, n_samp_scaling=100, n_samp_tuning=1000, ax=None,
               **kwargs):
    if ax is None:
        import matplotlib.pyplot as plt
        ax = plt

    kwargs.setdefault('linewidths', 0)
    kwargs.setdefault('s', 20)
    kwargs.setdefault('cmap', 'winter')

    X = np.asarray([xs, ys]).T
    n = X.shape[0]
    samp_X = X[np.random.choice(n, min(n_samp_scaling, n), replace=False)]
    median_sqdist = np.median(euclidean_distances(samp_X, squared=True))
    bws = np.logspace(-2, 2, num=10) * np.sqrt(median_sqdist)
    est = GridSearchCV(KernelDensity(), {'bandwidth': bws}, n_jobs=n_jobs)
    est.fit(X[np.random.choice(n, min(n_samp_tuning, n), replace=False)])
    bw = est.best_params_['bandwidth']

    kde = KernelDensity(bandwidth=bw)
    kde.fit(X)
    densities = kde.score_samples(X)
    if not log_color:
        np.exp(densities, out=densities)
    ax.scatter(xs, ys, c=densities, **kwargs)
开发者ID:dougalsutherland,项目名称:hsfuap,代码行数:28,代码来源:kde_scatter.py

示例7: sklearn_kde

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def sklearn_kde(data, points):
    from sklearn.neighbors import KernelDensity

    # Silverman bandwidth estimator
    n, d = data.shape
    bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))

    # standardize data so that we can use uniform bandwidth
    mu, sigma = mean(data, axis=0), std(data, axis=0)
    data, points = (data - mu)/sigma, (points - mu)/sigma

    #print("starting grid search for bandwidth over %d points"%n)
    #from sklearn.grid_search import GridSearchCV
    #from numpy import logspace
    #params = {'bandwidth': logspace(-1, 1, 20)}
    #fitter = GridSearchCV(KernelDensity(), params)
    #fitter.fit(data)
    #kde = fitter.best_estimator_
    #print("best bandwidth: {0}".format(kde.bandwidth))
    #import time; T0 = time.time()
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
                        rtol=1e-6, atol=1e-6)
    #print("T:%6.3f   fitting"%(time.time()-T0))
    kde.fit(data)
    #print("T:%6.3f   estimating"%(time.time()-T0))
    log_pdf = kde.score_samples(points)
    #print("T:%6.3f   done"%(time.time()-T0))
    return exp(log_pdf)
开发者ID:KennethWJiang,项目名称:bumps,代码行数:30,代码来源:entropy.py

示例8: find_kernel

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def find_kernel(data, numgrid = 1000, bw = 0.002):
	Xtrain = data[:,0:2]
	ytrain = data[2]
	# Set up the data grid for the contour plot
	xgrid = np.linspace(-74.1, -73.65, numgrid=1000)
	ygrid = np.linspace(40.5, 40.8, numgrid=1000)
	X, Y = np.meshgrid(xgrid, ygrid)

	xy = np.vstack([Y.ravel(), X.ravel()]).T

	# Plot map of with distributions of each species
	fig = plt.figure()
    # construct a kernel density estimate of the distribution
	kde = KernelDensity(bandwidth=bw,
                    kernel='gaussian')
	kde.fit(Xtrain, y = ytrain)

 # evaluate only on the land: -9999 indicates ocean
	Z = np.exp(kde.score_samples(xy))
	Z = Z.reshape(X.shape)

    # plot contours of the density
	levels = np.linspace(0, Z.max(), 25)
	plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
	plt.title('BK CRIME')
	plt.show()
	return Z
开发者ID:mnlouie,项目名称:routeoptimize,代码行数:29,代码来源:crime_kernel_weighting.py

示例9: sklearn_density

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def sklearn_density(sample_points, evaluation_points):
    """
    Estimate the probability density function from which a set of sample
    points was drawn and return the estimated density at the evaluation points.
    """
    from sklearn.neighbors import KernelDensity

    # Silverman bandwidth estimator
    n, d = sample_points.shape
    bandwidth = (n * (d + 2) / 4.)**(-1. / (d + 4))

    # Standardize data so that we can use uniform bandwidth.
    # Note that we will need to scale the resulting density by sigma to
    # correct the area.
    mu, sigma = mean(sample_points, axis=0), std(sample_points, axis=0)
    data, points = (sample_points - mu)/sigma, (evaluation_points - mu)/sigma

    #print("starting grid search for bandwidth over %d points"%n)
    #from sklearn.grid_search import GridSearchCV
    #from numpy import logspace
    #params = {'bandwidth': logspace(-1, 1, 20)}
    #fitter = GridSearchCV(KernelDensity(), params)
    #fitter.fit(data)
    #kde = fitter.best_estimator_
    #print("best bandwidth: {0}".format(kde.bandwidth))
    #import time; T0 = time.time()
    kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth,
                        rtol=1e-6, atol=1e-6)
    #print("T:%6.3f   fitting"%(time.time()-T0))
    kde.fit(data)
    #print("T:%6.3f   estimating"%(time.time()-T0))
    log_pdf = kde.score_samples(points)
    #print("T:%6.3f   done"%(time.time()-T0))
    return exp(log_pdf)/np.prod(sigma)  # undo the x scaling on the data points
开发者ID:aschankler,项目名称:bumps,代码行数:36,代码来源:entropy.py

示例10: cistrans

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def cistrans(args):
    cob = co.COB(args.cob) 
    if args.out == None:
        args.out = '{}_cistrans'.format(cob.name)
    # np.newaxis adds an empty axis in that position of the slice
    # the sklearn module requires the values to be in the rows:
    # http://scikit-learn.org/stable/auto_examples/neighbors/plot_kde_1d.html
    cis = cob.coex \
            .score[cob.coex.distance <= args.cis_distance]\
            .values[:,np.newaxis]
    trans = cob.coex\
            .score[np.isinf(cob.coex.distance)]\
            .values[:,np.newaxis]
    X_plot = np.linspace(-10,10,1000)[:,np.newaxis]
    print(
            'Found {:,} cis interactions and {:,} trans interactions'.format(
        cis.shape[0],
        trans.shape[0]
    ))
    # Fit the kernel
    kd=KernelDensity(bandwidth=0.2)
    kd.fit(cis)
    cis_kde = np.exp(kd.score_samples(X_plot))
    plt.fill(X_plot,cis_kde,alpha=0.5,label='Cis Interactions')
    # Fit the trans 
    kd.fit(trans[0:50000])
    trans_kde = np.exp(kd.score_samples(X_plot))
    plt.fill(X_plot,trans_kde,alpha=0.5,label='Trans Interactions')
    plt.legend()
    plt.title('Cis vs Trans Density: {}'.format(cob.name))
    # Calculate the mann whitney U test
    u,pval = sp.stats.mannwhitneyu(cis[:,0],trans[:,0]) 
    print('P-val: {}'.format(pval))
    plt.savefig(args.out+'.png')
开发者ID:monprin,项目名称:Camoco,代码行数:36,代码来源:cistrans.py

示例11: kde_sklearn

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
    """Kernel Density Estimation with Scikit-learn"""
    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(x[:, np.newaxis])
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
    return np.exp(log_pdf)
开发者ID:wrshoemaker,项目名称:MicroMETE,代码行数:9,代码来源:generateFigures.py

示例12: set_plx_kde

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def set_plx_kde(t, bandwidth=0.3, method='sklearn_kde'):
    """ Set the plx_kde

    Parameters
    ----------
    t : ndarray float
        Catalog of parallax measures (units: mas)
    bandwidth : float
        Bandwidth for gaussian_kde (optional, 0.01 recommended)
    method : string
        Method for density determination (options: scipy_kde, sklearn_kde, blocks)
    """

    global plx_kde

    if method is 'scipy_kde':

        if plx_kde is None:
            # We are only going to allow parallaxes above some minimum value
            if bandwidth is None:
                plx_kde = gaussian_kde(t['plx'][t['plx']>0.0])
            else:
                plx_kde = gaussian_kde(t['plx'][t['plx']>0.0], bw_method=bandwidth)

    elif method is 'sklearn_kde':
        if plx_kde is None:
            kwargs = {'kernel':'tophat'}
            if bandwidth is None:
                plx_kde = KernelDensity(**kwargs)
            else:
                plx_kde = KernelDensity(bandwidth=bandwidth, **kwargs)

            if c.kde_subset:
                plx_ran = np.copy(t['plx'][t['plx']>0.0])
                np.random.shuffle(plx_ran)
                plx_kde.fit( plx_ran[0:5000, np.newaxis] )
            else:
                plx_kde.fit( t['plx'][t['plx']>0.0][:, np.newaxis] )

    elif method is 'blocks':
        global plx_bins_blocks
        global plx_hist_blocks

        # Set up Bayesian Blocks
        print("Calculating Bayesian Blocks...")
        nbins = np.min([len(t), 40000])
        bins = bayesian_blocks(t['plx'][t['plx']>0.0][0:nbins])
        hist, bins = np.histogram(t['plx'][t['plx']>0.0][0:nbins], bins=bins, normed=True)

        # Pad with zeros
        plx_bins_blocks = np.append(-1.0e100, bins)
        hist_pad = np.append(0.0, hist)
        plx_hist_blocks = np.append(hist_pad, 0.0)
        print("Bayesian Blocks set.")

    else:
        print("You must include a valid method")
        print("Options: kde or blocks")
        return
开发者ID:astroJeff,项目名称:gaia_binaries,代码行数:61,代码来源:parallax.py

示例13: estimate_density

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def estimate_density(city):
    """Return a Gaussian KDE of venues in `city`."""
    kde = KernelDensity(bandwidth=175, rtol=1e-4)
    surround = xp.build_surrounding(DB.venue, city, likes=-1, checkins=1)
    kde.fit(surround.venues[:, :2])
    max_density = approximate_maximum_density(kde, surround.venues[:, :2])
    # pylint: disable=E1101
    return lambda xy: np.exp(kde.score_samples(xy))/max_density
开发者ID:daureg,项目名称:illalla,代码行数:10,代码来源:VenueFeature.py

示例14: train_patient_flow_estimator

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def train_patient_flow_estimator(df, bandwidth=1.0):
    """Train density estimator based on patient metric"""
    X = df.drop(['ADMIT_DATE'], axis=1).values
    estimator = KernelDensity(bandwidth=bandwidth,
                              kernel='gaussian',
                              metric='pyfunc',
                              metric_params={'func': patient_metric})
    estimator.fit(X)
    return estimator
开发者ID:andrewshir,项目名称:CollIntel,代码行数:11,代码来源:Estimator.py

示例15: CrossValidationScore

# 需要导入模块: from sklearn.neighbors import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.KernelDensity import fit [as 别名]
def CrossValidationScore(Xs,h, kernel='gaussian'):
    kde = KernelDensity(h, kernel=kernel)
    ret = 0.
    for i in range(len(Xs)):
        x = np.concatenate([Xs[0:i],Xs[i+1:-1]])
        kde.fit(x)
        ret +=kde.score_samples(Xs[i].reshape(1,-1)) 
    ret/=(1.*len(Xs))
    return ret
开发者ID:SU-AstroML,项目名称:AstroML-course,代码行数:11,代码来源:KernelCrossValidation.py


注:本文中的sklearn.neighbors.KernelDensity.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。