本文整理汇总了Python中scipy.stats.chi2_contingency方法的典型用法代码示例。如果您正苦于以下问题:Python stats.chi2_contingency方法的具体用法?Python stats.chi2_contingency怎么用?Python stats.chi2_contingency使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.stats
的用法示例。
在下文中一共展示了stats.chi2_contingency方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _CompareCategoricalFeatures
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def _CompareCategoricalFeatures(self, array1, array2):
df1 = pd.DataFrame(Counter(array1), index=[1])
df2 = pd.DataFrame(Counter(array2), index=[2])
df = pd.concat((df1, df2), axis=0)
df = df.fillna(0)
descrip1, descrip2 = df.iloc[0, :], df.iloc[1, :]
descrip1 = ['{}: {}'.format(descrip1.index[x], descrip1.iloc[x]) for x in range(descrip1.size)]
descrip2 = ['{}: {}'.format(descrip2.index[x], descrip2.iloc[x]) for x in range(descrip2.size)]
description = {}
_, description['p-value'], _, _ = chi2_contingency(df.values, correction=True)
description['method'] = 'Chi-Square'
description['description'] = [', '.join(descrip1),
', '.join(descrip2)]
return description
示例2: doHitProcess
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def doHitProcess(inp):
idx, hits, n_f1_hits, n_f2_hits = inp
p1_0, p1_1 = n_f1_hits-hits[0], hits[0]
p2_0, p2_1 = n_f2_hits-hits[1], hits[1]
#if no actives in either set return
if p1_1 == 0 and p2_1 == 0: return
#calculate percentage of hits for file1 and file2
pcp1_1 = float(p1_1)/float(p1_0)
pcp2_1 = float(p2_1)/float(p2_0)
#if no inactives in either set, set chi2 to 1.0 and pvalue to 0
if p1_0 == 0 and p2_0 == 0: return 1.0, idx, p1_1, pcp1_1, p2_1, pcp2_1, 1.0, 'NA'
chi, pvalue = chi2_contingency([[p1_1,p1_0],[p2_1,p2_0]])[:2]
#calculate odds ratio
try: odr = (float(p1_1)/float(p1_0))/(float(p2_1)/float(p2_0))
except ZeroDivisionError: odr = np.inf
#calculate risk ratio
try: rr = (float(p1_1)/(float(p1_1)+float(p1_0)))/(float(p2_1)/(float(p2_1)+float(p2_0)))
except ZeroDivisionError: rr = np.inf
return odr, idx, p1_1, pcp1_1, p2_1, pcp2_1, rr, pvalue
#calculate the chi2 and odds ratio between pathway and disease predictions
示例3: divideData
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def divideData(data, minValue, maxValue):
"""
遍历所有可能的分段,返回卡方统计量最高的分段
"""
maxChi2 = 0
index = -1
maxPValue = 0
for i in range(minValue+1, maxValue):
category = pd.cut(data["hours_per_week"], [minValue, i, maxValue],
include_lowest=True)
cross = pd.crosstab(data["label"], category)
chi2, pValue, _, _ = scs.chi2_contingency(cross)
if chi2 > maxChi2:
maxPValue = pValue
maxChi2 = chi2
index = i
return maxPValue, maxChi2, index
示例4: g_test
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def g_test(data, correction=False):
"""
G-test (likelihood ratio test).
Parameters
----------
data :
the contingency table
correction :
whether to apply continuity corrections
Returns
-------
g :
the test statistic
p :
the p-value
df:
the number of degrees of freedom
expected:
the expected frequencies
References
----------
https://en.wikipedia.org/wiki/G-test
"""
if isinstance(data, pd.DataFrame):
data = data.values
# remove zero rows/columns
data = data[~np.all(data == 0, axis=1)]
data = data[:, ~np.all(data == 0, axis=0)]
if data.sum() == 0:
return 0, 1.0, 1, None
return stats.chi2_contingency(data, correction=correction,
lambda_="log-likelihood")
示例5: test_chi2_association
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def test_chi2_association():
np.random.seed(8743)
table = np.random.randint(10, 30, size=(4, 4))
from scipy.stats import chi2_contingency
rslt_scipy = chi2_contingency(table)
b = ctab.Table(table).test_nominal_association()
assert_allclose(b.statistic, rslt_scipy[0])
assert_allclose(b.pvalue, rslt_scipy[1])
示例6: cramers_v_stat
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def cramers_v_stat(confusion_matrix):
"""Calculate Cramérs V statistic for categorial-categorial association."""
chi2 = stats.chi2_contingency(confusion_matrix)[0]
n = confusion_matrix.sum()
phi2 = chi2 / n
r, k = confusion_matrix.shape
return math.sqrt(phi2 / min((r-1), (k-1)))
示例7: cramers_v_corrected_stat
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def cramers_v_corrected_stat(confusion_matrix):
"""Calculate Cramérs V statistic for categorial-categorial association.
Uses correction from Bergsma and Wicher, Journal of the Korean Statistical
Society 42 (2013): 323-328.
"""
chi2 = stats.chi2_contingency(confusion_matrix)[0]
n = confusion_matrix.sum()
phi2 = chi2 / n
r, k = confusion_matrix.shape
phi2_corr = max(0, phi2 - ((k-1)*(r-1)) / (n-1))
r_corr = r - ((r-1)**2) / (n-1)
k_corr = k - ((k-1)**2) / (n-1)
return math.sqrt(phi2_corr / min((r_corr-1), (k_corr-1)))
示例8: chisquare_test
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def chisquare_test(pDataFile1, pDataFile2, pAlpha):
# pair of accepted/unaccepted and pvalue
# True is rejection of H0
# False acceptance of H0
test_result = []
accepted = []
rejected = []
# Find the critical value for alpha confidence level
critical_value = stats.chi2.ppf(q=1 - pAlpha, df=1)
zero_values_counter = 0
for i, (group1, group2) in enumerate(zip(pDataFile1, pDataFile2)):
try:
chi2, p_value, dof, ex = stats.chi2_contingency(
[group1, group2], correction=False)
if chi2 >= critical_value:
test_result.append(p_value)
rejected.append([i, p_value])
else:
test_result.append(p_value)
accepted.append([i, p_value])
except ValueError:
zero_values_counter += 1
test_result.append(np.nan)
accepted.append([i, 1.0])
if zero_values_counter > 0:
log.info('{} samples were not tested because at least one condition contained no data in both groups.'.format(
zero_values_counter))
return test_result, accepted, rejected
示例9: doHitProcess
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def doHitProcess(inp):
idx, hits, n_f1_hits, n_f2_hits = inp
if hits[0] == 0 and hits[1] == 0: return
if hits[0] == 0: return idx, 999.0, 0, 0, hits[1], float(hits[1])/float(n_f2_hits), 'NA', 'NA'
if hits[1] == 0: return idx, 0.0, hits[0], float(hits[0])/float(n_f1_hits), 0, 0, 'NA', 'NA'
h1_p = float(hits[0])/float(n_f1_hits)
h2_p = float(hits[1])/float(n_f2_hits)
chi, pvalue, _, _ = stats.chi2_contingency([[hits[1],n_f2_hits-hits[1]],[hits[0],n_f1_hits-hits[0]]])
return idx, round(h2_p/h1_p,3), hits[0], h1_p, hits[1], h2_p, chi, pvalue
#calculate the enrichment ratio between predictions
示例10: test_basic
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def test_basic(self):
# median_test calls chi2_contingency to compute the test statistic
# and p-value. Make sure it hasn't screwed up the call...
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8]
stat, p, m, tbl = stats.median_test(x, y)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
stat, p, m, tbl = stats.median_test(x, y, correction=False)
assert_equal(m, 4)
assert_equal(tbl, [[1, 2], [4, 2]])
exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
assert_allclose(stat, exp_stat)
assert_allclose(p, exp_p)
示例11: test_run_simulator
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def test_run_simulator(self):
"""Test running in a simulator."""
qr = QuantumRegister(2, 'q')
cr = ClassicalRegister(2, 'c')
qc = QuantumCircuit(qr, cr, name='hadamard')
qc.h(qr)
qc.measure(qr, cr)
qobj = assemble(transpile([ReferenceCircuits.bell(), qc], backend=self.sim_backend),
backend=self.sim_backend)
shots = qobj.config.shots
job = self.sim_backend.run(qobj, validate_qobj=True)
result = job.result()
counts_qx1 = result.get_counts(0)
counts_qx2 = result.get_counts(1)
counts_ex1 = {'00': shots / 2, '11': shots / 2}
counts_ex2 = {'00': shots / 4, '11': shots / 4, '10': shots / 4, '01': shots / 4}
states1 = counts_qx1.keys() | counts_ex1.keys()
states2 = counts_qx2.keys() | counts_ex2.keys()
# contingency table
ctable1 = numpy.array([[counts_qx1.get(key, 0) for key in states1],
[counts_ex1.get(key, 0) for key in states1]])
ctable2 = numpy.array([[counts_qx2.get(key, 0) for key in states2],
[counts_ex2.get(key, 0) for key in states2]])
self.log.info('states1: %s', str(states1))
self.log.info('states2: %s', str(states2))
self.log.info('ctable1: %s', str(ctable1))
self.log.info('ctable2: %s', str(ctable2))
contingency1 = chi2_contingency(ctable1)
contingency2 = chi2_contingency(ctable2)
self.log.info('chi2_contingency1: %s', str(contingency1))
self.log.info('chi2_contingency2: %s', str(contingency2))
self.assertGreater(contingency1[1], 0.01)
self.assertGreater(contingency2[1], 0.01)
示例12: chi2
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def chi2(X,y):
'''计算一组数据的卡方值,弥补sklearn中的chi2只支持2*2的缺憾
parameter
----------
X:可以是单个特征,也可以是一组特征
y:目标变量
return
------
chi2_value: np.array 数组
chi2_pvalue:np.array 数组
'''
X=np.asarray(X)
if len(X.shape)==1:
X=X.reshape((len(X),1))
X=pd.DataFrame(X)
chi2_value=[]
chi2_pvalue=[]
for c in X.columns:
fo=pd.crosstab(X[c],y)
s=stats.chi2_contingency(fo)
chi2_value.append(s[0])
chi2_pvalue.append(s[1])
return (np.array(chi2_value),np.array(chi2_pvalue))
# 待定
示例13: _chisqure_fo
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def _chisqure_fo(fo):
if any(fo==0):
fo=fo+1
s=stats.chi2_contingency(fo)
return s[0],s[1]
示例14: chi2_test
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def chi2_test(fo,alpha=0.05):
import scipy.stats as stats
fo=pd.DataFrame(fo)
chiStats = stats.chi2_contingency(observed=fo)
#critical_value = stats.chi2.ppf(q=1-alpha,df=chiStats[2])
#observed_chi_val = chiStats[0]
# p<alpha 等价于 observed_chi_val>critical_value
chi2_data=(chiStats[1] <= alpha,chiStats[1])
return chi2_data
示例15: chi2_test
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import chi2_contingency [as 别名]
def chi2_test(X, y):
"""
Args:
X (np.ndarray): Binary feature matrix
y (np.ndarray): Binary response vector
Returns:
np.array: A vector of p-values, one for every feature.
"""
X0 = 1 - X
if hasattr(y, "values"):
y = y.values
Y = y.reshape((-1, 1))
Y = np.append(1 - Y, Y, axis=1)
Tbl1 = np.dot(Y.T, X)
Tbl0 = np.dot(Y.T, X0)
m = X.shape[1]
pvals = np.empty(m) * np.NaN
for i in range(m):
if np.all([Tbl1[:, i] == 0]) or np.all([Tbl0[:, i] == 0]):
pvals[i] = 1
else:
r = stats.chi2_contingency([Tbl0[:, i], Tbl1[:, i]], True)
pvals[i] = r[1]
return pvals