本文整理匯總了Python中scipy.stats.ks_2samp方法的典型用法代碼示例。如果您正苦於以下問題:Python stats.ks_2samp方法的具體用法?Python stats.ks_2samp怎麽用?Python stats.ks_2samp使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scipy.stats
的用法示例。
在下文中一共展示了stats.ks_2samp方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_joint
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_joint(knn_xz):
# Simulate from the joint distribution of x,z (see
# generate_real_nominal_data) and perform a KS tests at each of the
# subpopulations at the six levels of z.
data = np.asarray(knn_xz.data.values())
indicators = sorted(set(data[:,1].astype(int)))
joint_samples = knn_xz.simulate(-1, [0,1], N=len(data))
_, ax = plt.subplots()
ax.set_title('Joint Simulation')
for t in indicators:
# Plot original data.
data_subpop = data[data[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data for indicator t.
samples_subpop = [j[0] for j in joint_samples if j[1] == t]
ax.scatter(
np.add([t]*len(samples_subpop), .25), samples_subpop,
color=gu.colors[t])
# KS test.
pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
assert .05 < pvalue
ax.set_xlabel('z')
ax.set_ylabel('x')
ax.grid()
示例2: test_conditional_indicator
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_conditional_indicator(knn_xz):
# Simulate from the conditional distribution of x|z (see
# generate_real_nominal_data) and perfrom a KS tests at each of the
# subpopulations at the six levels of z.
data = np.asarray(knn_xz.data.values())
indicators = sorted(set(data[:,1].astype(int)))
_, ax = plt.subplots()
ax.set_title('Conditional Simulation Of X Given Indicator Z')
for t in indicators:
# Plot original data.
data_subpop = data[data[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data.
samples_subpop = [s[0] for s in
knn_xz.simulate(-1, [0], constraints={1:t}, N=len(data_subpop))]
ax.scatter(
np.repeat(t, len(data_subpop)) + .25,
samples_subpop, color=gu.colors[t])
# KS test.
pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
assert .1 < pvalue
ax.set_xlabel('z')
ax.set_ylabel('x')
ax.grid()
示例3: test_joint
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_joint(state):
# Simulate from the joint distribution of (x,z).
joint_samples = state.simulate(-1, [0,1], N=N_SAMPLES)
_, ax = plt.subplots()
ax.set_title('Joint Simulation')
for t in INDICATORS:
# Plot original data.
data_subpop = DATA[DATA[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data for indicator t.
samples_subpop = [j[0] for j in joint_samples if j[1] == t]
ax.scatter(
np.add([t]*len(samples_subpop), .25), samples_subpop,
color=gu.colors[t])
# KS test.
pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
assert .05 < pvalue
ax.set_xlabel('Indicator')
ax.set_ylabel('x')
ax.grid()
示例4: test_conditional_indicator
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_conditional_indicator(state):
# Simulate from the conditional X|Z
_, ax = plt.subplots()
ax.set_title('Conditional Simulation Of Data X Given Indicator Z')
for t in INDICATORS:
# Plot original data.
data_subpop = DATA[DATA[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data.
samples_subpop = [s[0] for s in
state.simulate(-1, [0], {1:t}, None, len(data_subpop))]
ax.scatter(
np.repeat(t, len(data_subpop)) + .25,
samples_subpop, color=gu.colors[t])
# KS test.
pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
assert .01 < pvalue
ax.set_xlabel('Indicator')
ax.set_ylabel('x')
ax.grid()
示例5: test_joint
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_joint(kde_xz):
# Simulate from the joint distribution of x,z (see
# generate_real_nominal_data) and perform a KS tests at each of the
# subpopulations at the six levels of z.
data = np.asarray(kde_xz.data.values())
indicators = sorted(set(data[:,1].astype(int)))
joint_samples = kde_xz.simulate(-1, [0,1], N=len(data))
_, ax = plt.subplots()
ax.set_title('Joint Simulation')
for t in indicators:
# Plot original data.
data_subpop = data[data[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data for indicator t.
samples_subpop = [j[0] for j in joint_samples if j[1] == t]
ax.scatter(
np.add([t]*len(samples_subpop), .25), samples_subpop,
color=gu.colors[t])
# KS test.
_, p = ks_2samp(data_subpop[:,0], samples_subpop)
assert .05 < p
ax.set_xlabel('z')
ax.set_ylabel('x')
ax.grid()
示例6: test_conditional_indicator
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_conditional_indicator(kde_xz):
# Simulate from the conditional distribution of x|z (see
# generate_real_nominal_data) and perfrom a KS tests at each of the
# subpopulations at the six levels of z.
data = np.asarray(kde_xz.data.values())
indicators = sorted(set(data[:,1].astype(int)))
_, ax = plt.subplots()
ax.set_title('Conditional Simulation Of X Given Indicator Z')
for t in indicators:
# Plot original data.
data_subpop = data[data[:,1] == t]
ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
# Plot simulated data.
samples_subpop = [s[0] for s in
kde_xz.simulate(-1, [0], {1:t}, None, N=len(data_subpop))]
ax.scatter(
np.repeat(t, len(data_subpop)) + .25,
samples_subpop, color=gu.colors[t])
# KS test.
_, p = ks_2samp(data_subpop[:,0], samples_subpop)
assert .1 < p
ax.set_xlabel('z')
ax.set_ylabel('x')
ax.grid()
示例7: test_simulate_y_from_partially_populated_fresh_row
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_simulate_y_from_partially_populated_fresh_row(seed):
"""Check that Loom conditions on partial observation in new rowid."""
means = ((0,20), (20,0))
sample_size = 50
mix_ratio = [0.7, 0.3]
table = 'data'
with bayeslite.bayesdb_open(seed=seed) as bdb:
sample_gaussians = axis_aligned_gaussians(means, sample_size, bdb._np_prng)
samples = mix(sample_gaussians, mix_ratio, bdb._np_prng)
register_loom(bdb)
prepare_bdb(bdb, samples, table)
rowid = insert_row(bdb, table, means[0][0], None)
simulated_samples = simulate_from_rowid(bdb, table, 1, rowid,
limit=sample_size)
y_samples = [y for _x, y in sample_gaussians[0]]
_statistic, p_value = stats.ks_2samp(y_samples, simulated_samples)
assert 0.10 < p_value
示例8: get_pca_ks_stats
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def get_pca_ks_stats(self, maxrange=5):
"""Get a dictionary of PC#: K-S test stat for each """
pc_to_phenotype_pairs = {}
num_components = self.principal_observations_df.shape[1]
if num_components < maxrange:
maxrange = num_components
phenotypes = self.principal_observations_df.phenotype.unique().tolist()
for i in range(0, maxrange):
phenotype_pair_to_ks = {}
for p1, p2 in combinations(phenotypes, 2):
p1_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p1].iloc[:,i].as_matrix()
p2_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p2].iloc[:,i].as_matrix()
phenotype_pair_to_ks[(p1, p2)] = ks_2samp(p1_pc, p2_pc)
pc_to_phenotype_pairs[i + 1] = phenotype_pair_to_ks
return pc_to_phenotype_pairs
示例9: test_pairwise_distances
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_pairwise_distances(self):
# Test that the distribution of pairwise distances is close to correct.
np.random.seed(514)
def random_ortho(dim):
u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
return np.dot(u, v)
for dim in range(2, 6):
def generate_test_statistics(rvs, N=1000, eps=1e-10):
stats = np.array([
np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
for _ in range(N)
])
# Add a bit of noise to account for numeric accuracy.
stats += np.random.uniform(-eps, eps, size=stats.shape)
return stats
expected = generate_test_statistics(random_ortho)
actual = generate_test_statistics(scipy.stats.ortho_group.rvs)
_D, p = scipy.stats.ks_2samp(expected, actual)
assert_array_less(.05, p)
示例10: feature_score
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def feature_score(self, X_ref: np.ndarray, X: np.ndarray) -> np.ndarray:
"""
Compute K-S scores per feature.
Parameters
----------
X_ref
Reference instances to compare distribution with.
X
Batch of instances.
Returns
-------
Feature level drift scores.
"""
X = X.reshape(X.shape[0], -1)
X_ref = X_ref.reshape(X_ref.shape[0], -1)
p_val = np.zeros(self.n_features, dtype=np.float32)
for f in range(self.n_features):
# TODO: update to 'exact' when bug fix is released in scipy 1.5
p_val[f] = ks_2samp(X_ref[:, f], X[:, f], alternative=self.alternative, mode='asymp')[1]
return p_val
示例11: kolmogorov_smirnov_two_sample_test
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def kolmogorov_smirnov_two_sample_test(X,y):
"""
Performs the two sample Kolmogorov-Smirnov test, testing wheter feature values of each class are drawn from identical distributions
Keyword arguments:
X -- The feature vectors
y -- The target vector
"""
kolmogorov_smirnov=[[(0,0)]]*len(X[0])
# print kolmogorov_smirnov
for feature_col in xrange(len(X[0])):
ks_test_statistic,p_value=stats.ks_2samp(X[y==0,feature_col],X[y==1,feature_col])
kolmogorov_smirnov[feature_col]=(ks_test_statistic,p_value)
#debug
for f in xrange(23):
print kolmogorov_smirnov[f]
return kolmogorov_smirnov
示例12: calc_weighted_ks2samp
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def calc_weighted_ks2samp(x, y, wx, wy):
"""
Weighted Kolmogorov-Smirnov
References:
[1] https://stackoverflow.com/a/40059727
"""
x_ix = np.argsort(x)
y_ix = np.argsort(y)
x, wx = x[x_ix], wx[x_ix]
y, wy = y[y_ix], wy[y_ix]
data = np.concatenate((x, y))
wx_cum = np.hstack([0, wx.cumsum() / wx.sum()])
wy_cum = np.hstack([0, wy.cumsum() / wy.sum()])
# Align the "steps" between the two distribution so the differences will be well defined:
x_align = wx_cum[[np.searchsorted(x, data, side="right")]]
y_align = wy_cum[[np.searchsorted(y, data, side="right")]]
stat = np.max(np.abs(x_align - y_align))
# stat = ks_2samp(wx * x, wy * y)
return stat
示例13: test_resample
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_resample():
"""
Test that the resampling process yields consistent distributions,
using a KS test.
"""
nw = 50 # number of weighted points
points = np.random.randn(nw)
weights = np.random.rand(nw)
weights /= np.sum(weights)
n = 1000 # number of non-weighted points
# sample twice from same samples
resampled1 = ws.resample(points, weights, n)
resampled2 = ws.resample(points, weights, n)
# should be same distribution
_, p = ks_2samp(resampled1, resampled2)
assert p > 1e-2
# use different points
points3 = np.random.randn(nw)
resampled3 = ws.resample(points3, weights, n)
# should be different distributions
_, p = ks_2samp(resampled1, resampled3)
assert p < 1e-2
示例14: test_resample_deterministic
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def test_resample_deterministic():
"""
Test the deterministic resampling routine.
"""
nw = 50 # number of weighed points
points = np.random.randn(nw)
weights = np.random.rand(nw)
weights /= np.sum(weights)
n = 1000 # number of non-weighted points
resampled_det = ws.resample_deterministic(points, weights, n, False)
resampled = ws.resample(points, weights, n)
# should be same distribution
_, p = ks_2samp(resampled_det, resampled)
assert p > 1e-2
resampled_det2 = ws.resample_deterministic(points, weights, n, True)
assert len(resampled_det2) == n
_, p = ks_2samp(resampled_det2, resampled)
assert p > 1e-2
示例15: compare_by_position
# 需要導入模塊: from scipy import stats [as 別名]
# 或者: from scipy.stats import ks_2samp [as 別名]
def compare_by_position(bed1,bed2,xmfa):
pos_dict = {}
for i,bed in enumerate([bed1,bed2]):
pos_dict[i] = {}
with open(bed,'r') as fi:
for line in fi:
#2 1892198 1892199 TCMMTMTTMMM 0.5 - 16
csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t'))
pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')]))
for pos in pos_dict[0]:
if pos in pos_dict[1]:
try:
u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided')
except ValueError:
u,pval = 'none','identical'
u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1])
try:
t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1])
except:
t,pval3 = 'none','missing df'
d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1])
if pval4 < 0.9:
print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4