本文整理汇总了Python中sklearn.neighbors.KDTree.query_radius方法的典型用法代码示例。如果您正苦于以下问题:Python KDTree.query_radius方法的具体用法?Python KDTree.query_radius怎么用?Python KDTree.query_radius使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.KDTree
的用法示例。
在下文中一共展示了KDTree.query_radius方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: knn_cond_mutual_information
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def knn_cond_mutual_information(x, y, z, k, standardize = True, dualtree = False):
"""
Computes conditional mutual information between two time series x and y
conditioned on a third z (which can be multi-dimensional) as
I(x; y | z) = sum( p(x,y,z) * log( p(z)*p(x,y,z) / p(x,z)*p(y,z) ),
where p(z), p(x,z), p(y,z) and p(x,y,z) are probability distributions.
Performs k-nearest neighbours search using k-dimensional tree.
Uses sklearn.neighbors for KDTree class.
standardize - whether transform data to zero mean and unit variance
dualtree - whether to use dualtree formalism in k-d tree for the k-NN search
could lead to better performance with large N
According to Frenzel S. and Pompe B., Phys. Rev. Lett., 99, 2007.
"""
from sklearn.neighbors import KDTree
# prepare data
if standardize:
x = _center_ts(x)
y = _center_ts(y)
if isinstance(z, np.ndarray):
z = _center_ts(z)
elif isinstance(z, list):
for cond_ts in z:
cond_ts = _center_ts(cond_ts)
z = np.atleast_2d(z)
data = np.vstack([x, y, z]).T
# build k-d tree using the maximum (Chebyshev) norm
tree = KDTree(data, leaf_size = 15, metric = "chebyshev")
# find distance to k-nearest neighbour per point
dist, _ = tree.query(data, k = k + 1, return_distance = True, dualtree = dualtree)
sum_ = 0
# prepare marginal vectors xz, yz and z
n_x_z_data = np.delete(data, 1, axis = 1)
n_y_z_data = np.delete(data, 0, axis = 1)
n_z_data = np.delete(data, [0, 1], axis = 1)
# build and query k-d trees in marginal spaces for number of points in a given dist from a point
tree_x_z = KDTree(n_x_z_data, leaf_size = 15, metric = "chebyshev")
n_x_z = tree_x_z.query_radius(n_x_z_data, r = dist[:, -1], count_only = True) - 2
tree_y_z = KDTree(n_y_z_data, leaf_size = 15, metric = "chebyshev")
n_y_z = tree_y_z.query_radius(n_y_z_data, r = dist[:, -1], count_only = True) - 2
tree_z = KDTree(n_z_data, leaf_size = 15, metric = "chebyshev")
n_z = tree_z.query_radius(n_z_data, r = dist[:, -1], count_only = True) - 2
# count points
for n in range(data.shape[0]):
sum_ += _neg_harmonic(n_x_z[n]) + _neg_harmonic(n_y_z[n]) - _neg_harmonic(n_z[n])
sum_ /= data.shape[0]
return sum_ - _neg_harmonic(k-1)
示例2: study_redmapper_lrg_3d
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def study_redmapper_lrg_3d(hemi='north'):
# create 3d grid object
grid = grid3d(hemi=hemi)
# load SDSS data
sdss = load_sdss_data_both_catalogs(hemi)
# load redmapper catalog
rm = load_redmapper(hemi=hemi)
# get XYZ positions (Mpc) of both datasets
x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False)
x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False)
pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T
pos_rm = np.vstack([x_rm, y_rm, z_rm]).T
# build a couple of KDTree's, one for SDSS, one for RM.
from sklearn.neighbors import KDTree
tree_sdss = KDTree(pos_sdss, leaf_size=30)
tree_rm = KDTree(pos_rm, leaf_size=30)
lrg_counts = tree_sdss.query_radius(pos_rm, 100., count_only=True)
pl.clf()
pl.hist(lrg_counts, bins=50)
ipdb.set_trace()
示例3: concat_features_by_neighbors
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def concat_features_by_neighbors(df_labels, df_features,
X_names=['Offense Type'],
grid=["Latitude", "Longitude"],
radius=1./500.,
scale=np.array([1.,1.])):
df_labels = df_labels.dropna(subset=grid)
df_features = df_features.dropna(subset=grid)
X = df_features.as_matrix(X_names)
xy_features = df_features.as_matrix(grid)
xy_labels = df_labels.as_matrix(grid)
tree = KDTree(xy_features*scale)
vocabulary = set()
features = []
for nei in tree.query_radius(xy_labels*scale, radius):
U,I = np.unique(X[nei], return_inverse=True)
D = dict(zip(U,np.bincount(I)))
map(vocabulary.add, D)
features.append(D)
return pd.concat([df_labels, pd.DataFrame([map(fi.get, vocabulary)
for fi in features],
index=df_labels.index,
columns=vocabulary).fillna(0.)], axis=1)
示例4: match_bright
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def match_bright(x,y,x2,y2,mags,dist=1./3600.):
"""Routine that matches the truth catalog
with the input table
Args:
----
x: `float` RA of the truth objects to match (in degrees)
y: `float` dec of the truth objects to match (in degrees)
x2: `float` RA of detected objects to match (in degrees)
y2: `float` dec of detected objects to match (in degrees)
mags: `float` array containing the true input magnitudes
dist: `float` maximum distance in degrees considered to match
the objects, the default is 1 arcsecond.
Returns:
-------
brightest_ind: `int` array of indices to select the truth objects
that match the detected objects, returns -1 if no match has
been found for a particular object
"""
X = np.zeros((len(x),2))
X[:,0]=x
X[:,1]=y
Y = np.zeros((len(x2),2))
Y[:,0]=x2
Y[:,1]=y2
tree = KDTree(X,leaf_size=40)
ind = tree.query_radius(Y, r=dist)
brightest_indices = np.zeros(len(ind),dtype=np.int64)
for i,ii in enumerate(ind):
sorted_indices = np.argsort(mags[ii])
if(len(sorted_indices)>0):
brightest_indices[i] = ii[sorted_indices[0]]
else:
brightest_indices[i]=-1
return brightest_indices
示例5: count_close
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def count_close(x,y,x2,y2,distances):
"""Routine that counts the number of
objects that are within certain radius
Args:
----
x: `float` position X of objects to count
y: `float` position Y of objects to count
x2: `float` position X of the objects that serve as the center
of the circle where we look for neighbors
y2: `float` position Y of the objects that serve as the center
of the circle where we look for neighbors
distances: `float` array of radii where to count the objects
Returns:
-------
neighbors: `float` the mean number of neighbors in a circle of radii
corresponding to each entry of distances
err: `float` standard deviation of the number of neighbors in a circle
of radii corresponding to each entry of distances
"""
X = np.zeros((len(x),2))
X[:,0]=x
X[:,1]=y
Y = np.zeros((len(x2),2))
Y[:,0]=x2
Y[:,1]=y2
tree = KDTree(X,leaf_size=40)
neighbors = np.zeros(len(distances))
err = np.zeros(len(distances))
for i,distance in enumerate(distances):
neighbors[i], err[i] = np.nanmean(tree.query_radius(Y, r=distance, count_only=True)), np.nanstd(tree.query_radius(Y, r=distance, count_only=True))
return neighbors, err
示例6: counts_2d_2pt_from_pos
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def counts_2d_2pt_from_pos(pos_sdss, pos_rm, lam, rmax=220., reso=2.):
# preliminaries
nsdss = pos_sdss.shape[0]
nrm = pos_rm.shape[0]
# figure out which lambda bins each RM cluster goes into
lam_bin = get_lambda_bin(lam)
n_lam_bin = len(set(lam_bin))
# build a couple of KDTree's, one for SDSS, one for RM.
from sklearn.neighbors import KDTree
tree_sdss = KDTree(pos_sdss, leaf_size=30)
# define grids for r_pi and r_sigma.
rpigrid = np.arange(-rmax, rmax, reso)
nrpigrid = len(rpigrid)
rsigmagrid = np.arange(0, rmax, reso)
nrsigmagrid = len(rsigmagrid)
# find all BOSS galaxies within "rmax" Mpc of each RM clusters.
print '...querying tree...'
#ind, dist = tree_sdss.query_radius(pos_rm, rmax, count_only=False, return_distance=True)
print '...done querying tree...'
# loop over clusters, calculate (r_pi, r_sigma) for all nearby BOSS galaxies
# bin those counts.
counts_rpi_rsigma = [np.zeros((nrpigrid+1, nrsigmagrid+1), dtype=np.float) for i in range(n_lam_bin)]
for irm in range(nrm):
print '%i/%i'%(irm, nrm)
#these_ind = ind[irm]
these_ind, these_s = tree_sdss.query_radius(pos_rm[irm,:], rmax, count_only=False, return_distance=True)
these_ind = these_ind[0]
these_s = these_s[0]
if len(these_ind)==0: continue
this_pos_rm = pos_rm[irm, :]
these_pos_sdss = pos_sdss[these_ind, :]
#these_s = dist[irm]
these_mu = dot_los2(this_pos_rm, these_pos_sdss)
these_rpi = these_s*these_mu
these_rsigma = these_s*np.sqrt((1.-these_mu**2.))
ind_rpi = np.digitize(these_rpi, rpigrid)
ind_rsigma = np.digitize(these_rsigma, rsigmagrid)
this_lam_bin = lam_bin[irm]
for this_ind_rpi, this_ind_rsigma in zip(ind_rpi, ind_rsigma):
counts_rpi_rsigma[this_lam_bin][this_ind_rpi, this_ind_rsigma] += 1.
# normalize
# ok, really you'd want to normalize by nrm *per lambda bin*,
# but i don't think it will make any material difference.
for i in range(n_lam_bin): counts_rpi_rsigma[i] *= (1./nrm/nsdss)
return counts_rpi_rsigma
示例7: current_datapoints_threshold_filter
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def current_datapoints_threshold_filter(self, neighbour_points = 5):
"""
Filter from current datapoints, those that do not have enough neighbour points in the 2*max_dist radius (in meters).
Assumption: if there is less than neighbour_points around the data point, it can't be a part of event.
Method doesn't take into account networks.
This method is computationally cheaper, than self.current_datapoints_outliers_filter, so it is used as a prefilter.
Method updates self.current_datapoints dict.
Args:
neighbour_points (int): minimal number of neighbours, every point should have.
"""
nets = self.current_datapoints.keys()
ids = concatenate([self.current_datapoints[x]['ids'] for x in nets])
coords = concatenate([self.current_datapoints[x]['array'] for x in nets])
megatree = KDTree(coords)
for net in nets:
neighbours_number = megatree.query_radius(self.current_datapoints[net]['array'], r=self.eps*2, count_only=True)
self.current_datapoints[net]['array'] = self.current_datapoints[net]['array'][neighbours_number >= neighbour_points]
self.current_datapoints[net]['ids'] = self.current_datapoints[net]['ids'][neighbours_number >= neighbour_points]
示例8: get_pairwise_velocities_one_hemi
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def get_pairwise_velocities_one_hemi(hemi, kmax=0.1, rmax=50.):
# create 3d grid object
grid = grid3d(hemi=hemi)
# load SDSS data
sdss = load_sdss_data_both_catalogs(hemi)
# load redmapper catalog
rm = load_redmapper(hemi=hemi)
# get XYZ positions (Mpc) of both datasets
x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False)
x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False)
pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T
pos_rm = np.vstack([x_rm, y_rm, z_rm]).T
# build a KDTree for SDSS LRG's.
from sklearn.neighbors import KDTree
tree_sdss = KDTree(pos_sdss, leaf_size=30)
# find those RM clusters that have some number of LRG's within X Mpc.
#rmax = 300. # Mpc
lrg_counts = tree_sdss.query_radius(pos_rm, rmax, count_only=True)
ind, dist = tree_sdss.query_radius(pos_rm, rmax, count_only=False, return_distance=True)
min_counts = np.percentile(lrg_counts, 10)
#min_counts = 500.
#wh_use = np.where(lrg_counts>min_counts)[0]
#for k in rm.keys(): rm[k] = rm[k][wh_use]
#lrg_counts = lrg_counts[wh_use]
#ind = ind[wh_use]
#dist = dist[wh_use]
#pos_rm = pos_rm[wh_use, :]
# loop over RM clusters, get vlos
ncl = len(rm['ra'])
vlos = np.zeros(ncl)
rmin = 5.#Mpc, tmpp, worth exploring
#r_pivot = 10.
#r_decay = 10.
redshift_grid = np.arange(0.05, 0.7, 0.01)
rfine = np.arange(rmin-1, rmax+1,1.)
# create a dictionary containing interpoltor objects, keyed on redshift
corr_delta_vel_dict = {}
from scipy import interpolate
for redshift in redshift_grid:
corr_delta_vel_dict[redshift] = interpolate.interp1d(rfine, corr_delta_vel(rfine, z=redshift, kmax=kmax))
#distance_weight =
print '*********** using kmax=%0.2f, rmax=%i'%(kmax, rmax)
for i in range(ncl):
print i,ncl
if (lrg_counts[i]<min_counts): continue
wh_not_too_close = np.where(dist[i]>rmin)[0]
these_dist = dist[i][wh_not_too_close]
these_ind = ind[i][wh_not_too_close]
# get 3d positions
these_pos_sdss = pos_sdss[these_ind, :]
this_pos_rm = pos_rm[i, :]
# dot with line of sight
these_dot_los = dot_los(this_pos_rm, these_pos_sdss)
this_redshift = rm['z_spec'][i]
closest_redshift = redshift_grid[np.argmin(np.abs(redshift_grid-this_redshift))]
this_corr_delta_vel = corr_delta_vel_dict[closest_redshift]
these_vel = this_corr_delta_vel(these_dist)
#ipdb.set_trace()
#these_vel = corr_delta_vel(these_dist, z=this_redshift, kmax=kmax)
#these_vel = np.exp(-(these_dist-r_pivot)/r_decay)
#these_vel = np.exp(-0.5*(these_dist/r_decay)**2.)
these_vlos = these_vel*these_dot_los
this_vlos = np.sum(these_vlos) #tmpp, sum or mean?
#indsort=np.argsort(these_dist)
#pl.clf(); pl.plot(these_dist[indsort], np.cumsum(these_vlos[indsort]),'.')
#ipdb.set_trace()
vlos[i] = this_vlos
rm['vlos'] = vlos
rm['weight'] = np.ones(ncl)
return rm
示例9: BigStarBasis
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
#.........这里部分代码省略.........
self.gridpoints = {}
for p in self.stellar_pars:
self.gridpoints[p] = np.unique(self._libparams[p])
# Digitize the library parameters
X = np.array([np.digitize(self._libparams[p], bins=self.gridpoints[p],
right=True) for p in self.stellar_pars])
self.X = X.T
# Build the KDTree
self._kdt = KDTree(self.X) # , metric='euclidean')
def params_to_grid(self, **targ):
"""Convert a set of parameters to grid pixel coordinates.
:param targ:
The target parameter location, as keyword arguments. The elements
of ``stellar_pars`` must be present as keywords.
:returns x:
The target parameter location in pixel coordinates.
"""
# bin index
inds = np.array([np.digitize([targ[p]], bins=self.gridpoints[p], right=False) - 1
for p in self.stellar_pars])
inds = np.squeeze(inds)
# fractional index. Could use stored denominator to be slightly faster
try:
find = [(targ[p] - self.gridpoints[p][i]) /
(self.gridpoints[p][i+1] - self.gridpoints[p][i])
for i, p in zip(inds, self.stellar_pars)]
except(IndexError):
pstring = "{0}: min={2} max={3} targ={1}\n"
s = [pstring.format(p, targ[p], *self.gridpoints[p][[0, -1]])
for p in self.stellar_pars]
raise ValueError("At least one parameter outside grid.\n{}".format(' '.join(s)))
return inds + np.squeeze(find)
def knearest_inds(self, **params):
"""Find all parameter ``vertices`` within a sphere of radius
sqrt(ndim). The parameter values are converted to pixel coordinates
before a search of the KDTree.
:param params:
Keyword arguments which must include keys corresponding to
``stellar_pars``, the parameters of the grid.
:returns inds:
The sorted indices of all vertices within sqrt(ndim) of the pixel
coordinates, corresponding to **params.
"""
# Convert from physical space to grid index space
xtarg = self.params_to_grid(**params)
# Query the tree within radius sqrt(ndim)
try:
inds = self._kdt.query_radius(xtarg.reshape(1, -1),
r=np.sqrt(self.ndim))
except(AttributeError):
inds = self._kdt.query_ball_point(xtarg.reshape(1, -1),
np.sqrt(self.ndim))
return np.sort(inds[0])
def linear_weights(self, knearest, **params):
"""Use ND-linear interpolation over the knearest neighbors.
:param knearest:
The indices of the ``vertices`` for which to calculate weights.
:param params:
The target parameter location, as keyword arguments.
:returns wght:
The weight for each vertex, computed as the volume of the hypercube
formed by the target parameter and each vertex. Vertices more than
1 away from the target in any dimension are given a weight of zero.
"""
xtarg = self.params_to_grid(**params)
x = self.X[knearest, :]
dx = xtarg - x
# Fractional pixel weights
wght = ((1 - dx) * (dx >= 0) + (1 + dx) * (dx < 0))
# set weights to zero if model is more than a pixel away
wght *= (dx > -1) * (dx < 1)
# compute hyperarea for each model and return
return wght.prod(axis=-1)
def triangle_weights(self, knearest, **params):
"""Triangulate the k-nearest models, then use the barycenter of the
enclosing simplex to interpolate.
"""
inparams = np.array([params[p] for p in self.stellar_pars])
dtri = Delaunay(self.model_points[knearest, :])
triangle_ind = dtri.find_simplex(inparams)
inds = dtri.simplices[triangle_ind, :]
transform = dtri.transform[triangle_ind, :, :]
Tinv = transform[:self.ndim, :]
x_r = inparams - transform[self.ndim, :]
bary = np.dot(Tinv, x_r)
last = 1.0 - bary.sum()
wghts = np.append(bary, last)
oo = inds.argsort()
return inds[oo], wghts[oo]
示例10: __init__
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def __init__(self, pts, k=None, r=None, kmax=None, rmax=None):
"""
Parameters
----------
pts : array, shape(n, d)
Data points. Should be already normalized if necessary.
k : int
Neighbors used to estimate the local density rho.
kmax : int
If given, only search the nearest kmax neighbors to calculate delta.
kmax is equivalent to search a sphere of size about kmax**(1/d) times
the local average separation between points.
Default is to search all points.
rmax : float
If given, only search the neighbors within rmax to calculate delta.
Default is to search all points.
Todos
-----
Optimal choice of k and gamma
Performance optimization with Cython or Numba
Substructure within density saddle point
Labeling the noise
"""
if (k is not None) and (r is not None):
raise ValueError("Only one of 'k' or 'r' can be specified!")
if (kmax is not None) and (rmax is not None):
raise ValueError("Only one of 'kmax' or 'rmax' can be specified!")
pts = np.asfarray(pts)
npts, ndim = pts.shape
Rmax = np.linalg.norm(pts.max(0) - pts.min(0))
tree = KDTree(pts)
# density
if r is not None:
k = tree.query_radius(pts, r, count_only=True)
elif k is not None:
r = tree.query(pts, k)[0][:, -1]
sphere_coeff = np.pi**(0.5 * ndim) / gamma_func(0.5 * ndim + 1)
rho = k / (sphere_coeff * r**ndim)
rho[rho == 0] = rho[rho > 0].min() / 2 # reduce by an arbitrary factor
# delta
delta = np.full(npts, Rmax, dtype='float')
chief = np.full(npts, -1, dtype='int') # superior neighbor
if kmax is not None or rmax is not None:
if kmax is not None:
dists, index = tree.query(
pts, kmax, return_distance=True, sort_results=True)
else:
index, dists = tree.query_radius(
pts, rmax, return_distance=True, sort_results=True)
for i in range(npts):
rho_i = rho[i]
for j, dist in zip(index[i], dists[i]):
if (rho[j] > rho_i):
chief_i, delta_i = j, dist
break
chief[i], delta[i] = chief_i, delta_i
else:
dists = squareform(pdist(pts))
for i in range(npts):
rho_i, delta_i = rho[i], delta[i]
for j, dist in enumerate(dists[i]):
if (rho[j] > rho_i) and (dist < delta_i):
chief_i, delta_i = j, dist
chief[i], delta[i] = chief_i, delta_i
# gamma
gamma = sphere_coeff * rho * delta**ndim # need sphere_coeff?
sorted_index = np.argsort(gamma)
sorted_gamma = gamma[sorted_index]
# properties
self.npts = npts
self.ndim = ndim
self.pts = pts
self.rho = rho
self.delta = delta
self.gamma = gamma
self.chief = chief
self.sorted_index = sorted_index
self.sorted_gamma = sorted_gamma
示例11: print
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
prospective_centers.append([xvc, yvc, zvc])
print('Number of empty cells: ' + repr(nvtot))
print('Proceeding to construct KD-Tree...')
print('')
tree = KDTree(pos, leaf_size=5)
print('KD-Tree successfully constructed')
print('')
counter = 0
for center in prospective_centers:
counter = counter + 1
print('Center ' + repr(counter) + ' of ' + repr(len(prospective_centers)))
ind, dist = tree.query_radius(center, r=maxvoid_radius,\
return_distance=True, sort_results=True)
countfail = len(dist([0]))
doIwrite = False
for i in reversed(range(len(dist[0]))):
if countfail / (4./3 * numpy.pi * dist[0][i] ** 3) < thresh * rhomed:
radiofail = dist[0][i]
iifail = len(dist[0][:i])
doIwrite = True
countfail = countfail - 1
break
if doIwrite:
numpy.savetxt(outfile, (center[0], center[1], center[2],\
radiofail, iifail), newline=' ')
示例12: avgdigamma
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def avgdigamma(points,dvec,metric='minkowski', p=float('inf')):
tree = KDTree(points, metric=DistanceMetric.get_metric(metric,p=p))
num_points = tree.query_radius(points, dvec - 1e-15, count_only=True)
return np.sum(digamma(num_points) / len(points) )
示例13: GlobalSynthCat
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
class GlobalSynthCat(object):
"""
A class for synthetic catalogs with a KDTree attribute to allow
for super fast queries.
"""
def __init__(self, cat_fn=None, catalog=None, cat_params={'nsynths':100}):
from sklearn.neighbors import KDTree
if cat_fn is not None:
self.cat = Table.read(cat_fn)
elif catalog is not None:
self.cat = catalog
else:
self.cat = build_synthetic_galaxy_catalog(**cat_params)
self.cat['synth_id'] = np.arange(1, len(self.cat) + 1)
xyz = ra_dec_to_xyz(self.cat['ra'], self.cat['dec'])
self.kdt = KDTree(np.asarray(xyz).T)
def query_radius(self, ra, dec, r):
"""
Search for sources around coordinate within circle of
radius r in arcseconds.
"""
xyz = np.array(ra_dec_to_xyz(ra, dec)).T.reshape(1, -1)
idx = self.kdt.query_radius(
xyz, angular_dist_to_euclidean_dist(r / 3600.0),
count_only=False, return_distance=False)[0]
return self.cat[idx]
def get_exp_synths(self, exp, search_radius=720):
"""
Get synths in that fall within the given exposure.
"""
wcs = exp.getWcs()
xc, yc = exp.getDimensions()//2 + exp.getXY0()
coord = wcs.pixelToSky(lsst.afw.geom.Point2D(xc, yc))
ra_c, dec_c = coord.getRa().asDegrees(), coord.getDec().asDegrees()
cat = self.query_radius(ra_c, dec_c, search_radius).copy()
if len(cat) > 0:
mask = np.zeros(len(cat), dtype=bool)
cat['x'] = -1
cat['y'] = -1
for i, src in enumerate(cat):
sky_coord = lsst.afw.geom.SpherePoint(
src['ra'] * lsst.afw.geom.degrees,
src['dec'] * lsst.afw.geom.degrees)
xy_coord = wcs.skyToPixel(sky_coord)
if exp.getBBox().contains(lsst.afw.geom.Point2I(xy_coord)):
mask[i] = True
x0, y0 = xy_coord - exp.getXY0()
cat[i]['x'] = x0
cat[i]['y'] = y0
cat = cat[mask]
return cat
def write(self, fn):
self.cat.write(fn, overwrite=True)
示例14: _total_correlation_ksg_sklearn
# 需要导入模块: from sklearn.neighbors import KDTree [as 别名]
# 或者: from sklearn.neighbors.KDTree import query_radius [as 别名]
def _total_correlation_ksg_sklearn(data, rvs, crvs=None, k=4, noise=1e-10):
"""
Compute the total correlation from observations. The total correlation is computed between the columns
specified in `rvs`, given the columns specified in `crvs`. This utilizes the KSG kNN density estimator,
and works on discrete, continuous, and mixed data.
Parameters
----------
data : np.array
Real valued time series data.
rvs : iterable of iterables
The columns for which the total correlation is to be computed.
crvs : iterable
The columns upon which the total correlation should be conditioned.
k : int
The number of nearest neighbors to use in estimating the local kernel density.
noise : float
The standard deviation of the normally-distributed noise to add to the data.
Returns
-------
tc : float
The total correlation of `rvs` given `crvs`.
Notes
-----
The total correlation is computed in bits, not nats as most KSG estimators do.
This implementation uses scikit-learn.
"""
# KSG suggest adding noise (to break symmetries?)
data = _fuzz(data, noise)
if crvs is None:
crvs = []
digamma_N = digamma(len(data))
log_2 = np.log(2)
all_rvs = list(flatten(rvs)) + crvs
rvs = [rv + crvs for rv in rvs]
d_rvs = [len(data[0, rv]) for rv in rvs]
tree = KDTree(data[:, all_rvs], metric="chebyshev")
tree_rvs = [KDTree(data[:, rv], metric="chebyshev") for rv in rvs]
epsilons = tree.query(data[:, all_rvs], k + 1)[0][:, -1] # k+1 because of self
n_rvs = [t.query_radius(data[:, rv], epsilons, count_only=True) for rv, t in zip(rvs, tree_rvs)]
log_epsilons = np.log(epsilons)
h_rvs = [-digamma(n_rv).mean() for n_rv, d in zip(n_rvs, d_rvs)]
h_all = -digamma(k)
if crvs:
tree_crvs = KDTree(data[:, crvs], metric="chebyshev")
n_crvs = tree_crvs.query_radius(data[:, crvs], epsilons, count_only=True)
h_crvs = -digamma(n_crvs).mean()
else:
h_rvs = [h_rv + digamma_N + d * (log_2 - log_epsilons).mean() for h_rv, d in zip(h_rvs, d_rvs)]
h_all += digamma_N + sum(d_rvs) * (log_2 - log_epsilons).mean()
h_crvs = 0
tc = sum([h_rv - h_crvs for h_rv in h_rvs]) - (h_all - h_crvs)
return tc / log_2