Python stats.ks_2samp方法代码示例

本文整理汇总了Python中scipy.stats.ks_2samp方法的典型用法代码示例。如果您正苦于以下问题：Python stats.ks_2samp方法的具体用法？Python stats.ks_2samp怎么用？Python stats.ks_2samp使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.stats的用法示例。

在下文中一共展示了stats.ks_2samp方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_joint

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_joint(knn_xz):
    # Simulate from the joint distribution of x,z (see
    # generate_real_nominal_data) and perform a KS tests at each of the
    # subpopulations at the six levels of z.

    data = np.asarray(knn_xz.data.values())
    indicators = sorted(set(data[:,1].astype(int)))
    joint_samples = knn_xz.simulate(-1, [0,1], N=len(data))
    _, ax = plt.subplots()
    ax.set_title('Joint Simulation')
    for t in indicators:
        # Plot original data.
        data_subpop = data[data[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data for indicator t.
        samples_subpop = [j[0] for j in joint_samples if j[1] == t]
        ax.scatter(
            np.add([t]*len(samples_subpop), .25), samples_subpop,
            color=gu.colors[t])
        # KS test.
        pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
        assert .05 < pvalue
    ax.set_xlabel('z')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:27，代码来源:test_mvknn.py

示例2: test_conditional_indicator

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_conditional_indicator(knn_xz):
    # Simulate from the conditional distribution of x|z (see
    # generate_real_nominal_data) and perfrom a KS tests at each of the
    # subpopulations at the six levels of z.

    data = np.asarray(knn_xz.data.values())
    indicators = sorted(set(data[:,1].astype(int)))
    _, ax = plt.subplots()
    ax.set_title('Conditional Simulation Of X Given Indicator Z')
    for t in indicators:
        # Plot original data.
        data_subpop = data[data[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data.
        samples_subpop = [s[0] for s in
            knn_xz.simulate(-1, [0], constraints={1:t}, N=len(data_subpop))]
        ax.scatter(
            np.repeat(t, len(data_subpop)) + .25,
            samples_subpop, color=gu.colors[t])
        # KS test.
        pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
        assert .1 < pvalue
    ax.set_xlabel('z')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:27，代码来源:test_mvknn.py

示例3: test_joint

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_joint(state):
    # Simulate from the joint distribution of (x,z).
    joint_samples = state.simulate(-1, [0,1], N=N_SAMPLES)
    _, ax = plt.subplots()
    ax.set_title('Joint Simulation')
    for t in INDICATORS:
        # Plot original data.
        data_subpop = DATA[DATA[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data for indicator t.
        samples_subpop = [j[0] for j in joint_samples if j[1] == t]
        ax.scatter(
            np.add([t]*len(samples_subpop), .25), samples_subpop,
            color=gu.colors[t])
        # KS test.
        pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
        assert .05 < pvalue
    ax.set_xlabel('Indicator')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:22，代码来源:test_normal_categorical.py

示例4: test_conditional_indicator

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_conditional_indicator(state):
    # Simulate from the conditional X|Z
    _, ax = plt.subplots()
    ax.set_title('Conditional Simulation Of Data X Given Indicator Z')
    for t in INDICATORS:
        # Plot original data.
        data_subpop = DATA[DATA[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data.
        samples_subpop = [s[0] for s in
            state.simulate(-1, [0], {1:t}, None, len(data_subpop))]
        ax.scatter(
            np.repeat(t, len(data_subpop)) + .25,
            samples_subpop, color=gu.colors[t])
        # KS test.
        pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1]
        assert .01 < pvalue
    ax.set_xlabel('Indicator')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:22，代码来源:test_normal_categorical.py

示例5: test_joint

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_joint(kde_xz):
    # Simulate from the joint distribution of x,z (see
    # generate_real_nominal_data) and perform a KS tests at each of the
    # subpopulations at the six levels of z.

    data = np.asarray(kde_xz.data.values())
    indicators = sorted(set(data[:,1].astype(int)))
    joint_samples = kde_xz.simulate(-1, [0,1], N=len(data))
    _, ax = plt.subplots()
    ax.set_title('Joint Simulation')
    for t in indicators:
        # Plot original data.
        data_subpop = data[data[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data for indicator t.
        samples_subpop = [j[0] for j in joint_samples if j[1] == t]
        ax.scatter(
            np.add([t]*len(samples_subpop), .25), samples_subpop,
            color=gu.colors[t])
        # KS test.
        _, p = ks_2samp(data_subpop[:,0], samples_subpop)
        assert .05 < p
    ax.set_xlabel('z')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:27，代码来源:test_mvkde.py

示例6: test_conditional_indicator

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_conditional_indicator(kde_xz):
    # Simulate from the conditional distribution of x|z (see
    # generate_real_nominal_data) and perfrom a KS tests at each of the
    # subpopulations at the six levels of z.

    data = np.asarray(kde_xz.data.values())
    indicators = sorted(set(data[:,1].astype(int)))
    _, ax = plt.subplots()
    ax.set_title('Conditional Simulation Of X Given Indicator Z')
    for t in indicators:
        # Plot original data.
        data_subpop = data[data[:,1] == t]
        ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t])
        # Plot simulated data.
        samples_subpop = [s[0] for s in
            kde_xz.simulate(-1, [0], {1:t}, None, N=len(data_subpop))]
        ax.scatter(
            np.repeat(t, len(data_subpop)) + .25,
            samples_subpop, color=gu.colors[t])
        # KS test.
        _, p = ks_2samp(data_subpop[:,0], samples_subpop)
        assert .1 < p
    ax.set_xlabel('z')
    ax.set_ylabel('x')
    ax.grid()

开发者ID:probcomp，项目名称:cgpm，代码行数:27，代码来源:test_mvkde.py

示例7: test_simulate_y_from_partially_populated_fresh_row

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_simulate_y_from_partially_populated_fresh_row(seed):
    """Check that Loom conditions on partial observation in new rowid."""
    means = ((0,20), (20,0))
    sample_size = 50
    mix_ratio = [0.7, 0.3]
    table = 'data'

    with bayeslite.bayesdb_open(seed=seed) as bdb:
        sample_gaussians = axis_aligned_gaussians(means, sample_size, bdb._np_prng)
        samples = mix(sample_gaussians, mix_ratio, bdb._np_prng)
        register_loom(bdb)
        prepare_bdb(bdb, samples, table)

        rowid = insert_row(bdb, table, means[0][0], None)
        simulated_samples = simulate_from_rowid(bdb, table, 1, rowid,
            limit=sample_size)

    y_samples = [y for _x, y in sample_gaussians[0]]
    _statistic, p_value = stats.ks_2samp(y_samples, simulated_samples)
    assert 0.10 < p_value

开发者ID:probcomp，项目名称:bayeslite，代码行数:22，代码来源:test_loom_simulate_bivariate_gaussian.py

示例8: get_pca_ks_stats

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def get_pca_ks_stats(self, maxrange=5):
        """Get a dictionary of PC#: K-S test stat for each """
        pc_to_phenotype_pairs = {}
        num_components = self.principal_observations_df.shape[1]
        if num_components < maxrange:
            maxrange = num_components

        phenotypes = self.principal_observations_df.phenotype.unique().tolist()
        for i in range(0, maxrange):
            phenotype_pair_to_ks = {}
            for p1, p2 in combinations(phenotypes, 2):
                p1_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p1].iloc[:,i].as_matrix()
                p2_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p2].iloc[:,i].as_matrix()
                phenotype_pair_to_ks[(p1, p2)] = ks_2samp(p1_pc, p2_pc)
            pc_to_phenotype_pairs[i + 1] = phenotype_pair_to_ks

        return pc_to_phenotype_pairs

开发者ID:SBRG，项目名称:ssbio，代码行数:19，代码来源:atlas3.py

示例9: test_pairwise_distances

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_pairwise_distances(self):
        # Test that the distribution of pairwise distances is close to correct.
        np.random.seed(514)

        def random_ortho(dim):
            u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
            return np.dot(u, v)

        for dim in range(2, 6):
            def generate_test_statistics(rvs, N=1000, eps=1e-10):
                stats = np.array([
                    np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
                    for _ in range(N)
                ])
                # Add a bit of noise to account for numeric accuracy.
                stats += np.random.uniform(-eps, eps, size=stats.shape)
                return stats

            expected = generate_test_statistics(random_ortho)
            actual = generate_test_statistics(scipy.stats.ortho_group.rvs)

            _D, p = scipy.stats.ks_2samp(expected, actual)

            assert_array_less(.05, p)

开发者ID:Relph1119，项目名称:GraphicDesignPatternByPython，代码行数:26，代码来源:test_multivariate.py

示例10: feature_score

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def feature_score(self, X_ref: np.ndarray, X: np.ndarray) -> np.ndarray:
        """
        Compute K-S scores per feature.

        Parameters
        ----------
        X_ref
            Reference instances to compare distribution with.
        X
            Batch of instances.

        Returns
        -------
        Feature level drift scores.
        """
        X = X.reshape(X.shape[0], -1)
        X_ref = X_ref.reshape(X_ref.shape[0], -1)
        p_val = np.zeros(self.n_features, dtype=np.float32)
        for f in range(self.n_features):
            # TODO: update to 'exact' when bug fix is released in scipy 1.5
            p_val[f] = ks_2samp(X_ref[:, f], X[:, f], alternative=self.alternative, mode='asymp')[1]
        return p_val

开发者ID:SeldonIO，项目名称:alibi-detect，代码行数:24，代码来源:ks.py

示例11: kolmogorov_smirnov_two_sample_test

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def kolmogorov_smirnov_two_sample_test(X,y):
	"""
	Performs the two sample Kolmogorov-Smirnov test, testing wheter feature values of each class are drawn from identical distributions

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	"""

	kolmogorov_smirnov=[[(0,0)]]*len(X[0])
	# print kolmogorov_smirnov
	for feature_col in xrange(len(X[0])):
			ks_test_statistic,p_value=stats.ks_2samp(X[y==0,feature_col],X[y==1,feature_col])
			kolmogorov_smirnov[feature_col]=(ks_test_statistic,p_value)

	#debug
	for f in xrange(23):
		print kolmogorov_smirnov[f]

	return kolmogorov_smirnov

开发者ID:alexpnt，项目名称:default-credit-card-prediction，代码行数:22，代码来源:feature_selection.py

示例12: calc_weighted_ks2samp

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def calc_weighted_ks2samp(x, y, wx, wy):
    """
    Weighted Kolmogorov-Smirnov

    References:
        [1] https://stackoverflow.com/a/40059727
    """
    x_ix = np.argsort(x)
    y_ix = np.argsort(y)
    x, wx = x[x_ix], wx[x_ix]
    y, wy = y[y_ix], wy[y_ix]
    data = np.concatenate((x, y))
    wx_cum = np.hstack([0, wx.cumsum() / wx.sum()])
    wy_cum = np.hstack([0, wy.cumsum() / wy.sum()])
    # Align the "steps" between the two distribution so the differences will be well defined:
    x_align = wx_cum[[np.searchsorted(x, data, side="right")]]
    y_align = wy_cum[[np.searchsorted(y, data, side="right")]]
    stat = np.max(np.abs(x_align - y_align))
    # stat = ks_2samp(wx * x, wy * y)
    return stat

开发者ID:IBM，项目名称:causallib，代码行数:22，代码来源:stat_utils.py

示例13: test_resample

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_resample():
    """
    Test that the resampling process yields consistent distributions,
    using a KS test.
    """
    nw = 50  # number of weighted points
    points = np.random.randn(nw)
    weights = np.random.rand(nw)
    weights /= np.sum(weights)

    n = 1000  # number of non-weighted points
    # sample twice from same samples
    resampled1 = ws.resample(points, weights, n)
    resampled2 = ws.resample(points, weights, n)

    # should be same distribution
    _, p = ks_2samp(resampled1, resampled2)
    assert p > 1e-2

    # use different points
    points3 = np.random.randn(nw)
    resampled3 = ws.resample(points3, weights, n)
    # should be different distributions
    _, p = ks_2samp(resampled1, resampled3)
    assert p < 1e-2

开发者ID:ICB-DCM，项目名称:pyABC，代码行数:27，代码来源:test_weighted_statistics.py

示例14: test_resample_deterministic

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def test_resample_deterministic():
    """
    Test the deterministic resampling routine.
    """
    nw = 50  # number of weighed points
    points = np.random.randn(nw)
    weights = np.random.rand(nw)
    weights /= np.sum(weights)

    n = 1000  # number of non-weighted points
    resampled_det = ws.resample_deterministic(points, weights, n, False)

    resampled = ws.resample(points, weights, n)

    # should be same distribution
    _, p = ks_2samp(resampled_det, resampled)
    assert p > 1e-2

    resampled_det2 = ws.resample_deterministic(points, weights, n, True)
    assert len(resampled_det2) == n

    _, p = ks_2samp(resampled_det2, resampled)
    assert p > 1e-2

开发者ID:ICB-DCM，项目名称:pyABC，代码行数:25，代码来源:test_weighted_statistics.py

示例15: compare_by_position

# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import ks_2samp [as 别名]
def compare_by_position(bed1,bed2,xmfa):
    pos_dict = {}

    for i,bed in enumerate([bed1,bed2]):
        pos_dict[i] = {}
        with open(bed,'r') as fi:
                for line in fi:
                #2  1892198 1892199 TCMMTMTTMMM 0.5 -   16
                    csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t'))
                    pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')]))

    for pos in pos_dict[0]:
        if pos in pos_dict[1]:
            try:
                u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided')
            except ValueError:
                u,pval = 'none','identical'
            u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1])
            try:
                t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1])
            except:
                t,pval3 = 'none','missing df'
            d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1])
            if pval4 < 0.9:
                print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4

开发者ID:al-mcintyre，项目名称:mCaller，代码行数:27，代码来源:compare_genomes.py

注：本文中的scipy.stats.ks_2samp方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。