本文整理汇总了Python中pysnptools.snpreader.Bed.sid_to_index方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.sid_to_index方法的具体用法?Python Bed.sid_to_index怎么用?Python Bed.sid_to_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysnptools.snpreader.Bed
的用法示例。
在下文中一共展示了Bed.sid_to_index方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_match_cpp
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
def test_match_cpp(self):
'''
match
FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100
'''
logging.info("TestSingleSnp test_match_cpp")
snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
sim_idx = snps.sid_to_index(sim_sid)
test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
test_idx = snps.sid_to_index(test_sid)
for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
for frame in [frame_h2, frame_log_delta]:
referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
assert len(frame) == len(reference)
for _, row in reference.iterrows():
sid = row.SNP
pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
示例2: main
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
def main(args):
print('reading seeed snps')
seed_snps = pd.read_csv(args.seed_snps, header=None, names=['SNP'], index_col='SNP')
seed_snps['ibs_length'] = 0
seed_snps['ibd'] = 0
print('reading typed snps')
typed_snps = pd.read_csv(args.typed_snps, header=None, names=['SNP'])
print('reading genotypes')
data = Bed(args.bfile)
X = data.read().val
typed_snps_indices = np.sort(data.sid_to_index(typed_snps.SNP))
typed_snps_bp = data.col_property[typed_snps_indices,2]
print(len(seed_snps), 'snps in list')
print(data.iid_count, data.sid_count, 'are dimensions of X')
def analyze_snp(i):
# find first typed snp after query snp
snp_bp = data.col_property[i,2]
v = np.where(typed_snps_bp > snp_bp)[0]
if len(v) > 0:
typed_i = v[0]
else:
typed_i = len(typed_snps_indices)-1
n1, n2 = np.where(X[:,i] == 1)[0]
if (X[n1,typed_snps_indices[typed_i]] - X[n2, typed_snps_indices[typed_i]])**2 == 4:
return 0, 0
typed_il, typed_ir = fis.find_boundaries(
X[n1,typed_snps_indices],
X[n2,typed_snps_indices],
typed_i)
typed_ir -= 1
il = typed_snps_indices[typed_il]
ir = typed_snps_indices[typed_ir]
cM = data.col_property[ir, 1] - \
data.col_property[il, 1]
ibd = (np.mean(X[n1,il:ir] == X[n2,il:ir]) > 0.99)
return cM, int(ibd)
for (i, snp) in iter.show_progress(
it.izip(data.sid_to_index(seed_snps.index), seed_snps.index),
total=len(seed_snps)):
# total=10):
seed_snps.ix[snp, ['ibs_length', 'ibd']] = analyze_snp(i)
print(seed_snps.iloc[:100])
seed_snps.to_csv(args.outfile, sep='\t')
示例3: __init__
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
def __init__(self,args):
if args.window_type not in ['BP','SNP']:
raise ValueError('Window type not supported')
bed_1 = Bed(args.bfile) #
af1 = self.get_allele_frequency(bed_1,args) #
print(len(af1), "SNPs in file 1")
snps_1 = (af1>args.maf)&(af1<1-args.maf) #
print(np.sum(snps_1), "SNPs in file 1 after MAF filter")
if (args.from_bp is not None) and (args.to_bp is not None):
k = (bed_1.pos[:,2]>args.from_bp)&(bed_1.pos[:,2]<args.to_bp)
snps_1 = snps_1&k
snps_to_use = bed_1.sid[snps_1]
if args.extract is not None:
keep = np.array([l.strip() for l in open(args.extract,'r')])
snps_to_use = np.intersect1d(snps_to_use,keep)
print(len(snps_to_use),"SNPs remaining after extraction")
bed_1_index = np.sort(bed_1.sid_to_index(snps_to_use)) #
pos = bed_1.pos[bed_1_index] #
bim_1=pd.read_table(bed_1.filename+'.bim',header=None,
names=['chm','id','pos_mb','pos_bp','a1','a2'])
af = af1[bed_1_index] #
if args.afile is not None:
a1 = pd.read_table(args.afile,header=None,sep='\s*',
names=['id1','id2','theta'])
else:
a1 = None
self.af = af
self.M = len(bed_1_index) #
self.windows = self.get_windows(pos,args) #
self.chr = pos[:,0]
self.pos = pos[:,2]
self.id = bed_1.sid[bed_1_index]
self.A1 = bim_1['a1'].loc[bed_1_index]
self.A2 = bim_1['a2'].loc[bed_1_index]
self.scores = self.compute(bed_1,bed_1_index,af,a1,args) #
示例4: test_match_cpp
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
def test_match_cpp(self):
'''
match
FaSTLMM.207\Data\DemoData>fastlmmc -snpPairs -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.pairs.txt -logDelta 0 -verbose 100
'''
logging.info("TestEpistasis test_match_cpp")
from pysnptools.snpreader import Bed
snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"))
pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
sim_idx = snps.sid_to_index(sim_sid)
test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
test_idx = snps.sid_to_index(test_sid)
frame = epistasis(snps[:,test_idx], pheno,covar=covar, G0 = snps[:,sim_idx],log_delta=0)
sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue'])
referenceOutfile = TestFeatureSelection.reference_file("epistasis/topsnps.pairs.txt")
import pandas as pd
table = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
assert len(pvalue_list) == len(table)
for row in table.iterrows():
snp0cpp,snp1cpp,pvaluecpp,i1,i2 = row[1]
for i in xrange(len(pvalue_list)):
found = False
pvaluepy = pvalue_list[i]
snp0py = sid0[i]
snp1py = sid1[i]
if (snp0py == snp0cpp and snp1py == snp1cpp) or (snp0py == snp1cpp and snp1py == snp0cpp):
found = True
diff = abs(pvaluecpp - pvaluepy)/pvaluecpp
assert diff < .035, "'{0}' '{1}' pvalue_list differ too much {4} -- {2} vs {3}".format(snp0cpp,snp1cpp,pvaluecpp,pvaluepy,diff)
break
assert found
示例5: _Epistasis
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
#.........这里部分代码省略.........
logging.info("internal_delta={0}".format(self.internal_delta))
logging.info("external_log_delta={0}".format(self.external_log_delta))
do_pair_count = 0
do_pair_time = time.time()
def do_work(self, lmm, sid0_list, sid1_list):
dataframe = pd.DataFrame(
index=np.arange(len(sid0_list)),
columns=('SNP0', 'Chr0', 'GenDist0', 'ChrPos0', 'SNP1', 'Chr1', 'GenDist1', 'ChrPos1', 'PValue', 'NullLogLike', 'AltLogLike')
)
#!!Is this the only way to set types in a dataframe?
dataframe['Chr0'] = dataframe['Chr0'].astype(np.float)
dataframe['GenDist0'] = dataframe['GenDist0'].astype(np.float)
dataframe['ChrPos0'] = dataframe['ChrPos0'].astype(np.float)
dataframe['Chr1'] = dataframe['Chr1'].astype(np.float)
dataframe['GenDist1'] = dataframe['GenDist1'].astype(np.float)
dataframe['ChrPos1'] = dataframe['ChrPos1'].astype(np.float)
dataframe['PValue'] = dataframe['PValue'].astype(np.float)
dataframe['NullLogLike'] = dataframe['NullLogLike'].astype(np.float)
dataframe['AltLogLike'] = dataframe['AltLogLike'].astype(np.float)
#This is some of the code for a different way that reads and dot-products 50% more, but does less copying. Seems about the same speed
#sid0_index_list = self.test_snps.sid_to_index(sid0_list)
#sid1_index_list = self.test_snps.sid_to_index(sid1_list)
#sid_index_union_dict = {}
#sid0_index_index_list = self.create_index_index(sid_index_union_dict, sid0_index_list)
#sid1_index_index_list = self.create_index_index(sid_index_union_dict, sid1_index_list)
#snps0_read = self.test_snps[:,sid0_index_list].read().standardize()
#snps1_read = self.test_snps[:,sid1_index_list].read().standardize()
sid_union = set(sid0_list).union(sid1_list)
sid_union_index_list = sorted(self.test_snps.sid_to_index(sid_union))
snps_read = self.test_snps[:,sid_union_index_list].read().standardize()
sid0_index_list = snps_read.sid_to_index(sid0_list)
sid1_index_list = snps_read.sid_to_index(sid1_list)
products = snps_read.val[:,sid0_index_list] * snps_read.val[:,sid1_index_list] # in the products matrix, each column i is the elementwise product of sid i in each list
X = np.hstack((self.covar, snps_read.val, products))
UX = lmm.U.T.dot(X)
k = lmm.S.shape[0]
N = X.shape[0]
if (k<N):
UUX = X - lmm.U.dot(UX)
else:
UUX = None
for pair_index, sid0 in enumerate(sid0_list):
sid1 = sid1_list[pair_index]
sid0_index = sid0_index_list[pair_index]
sid1_index = sid1_index_list[pair_index]
index_list = np.array([pair_index]) #index to product
index_list = index_list + len(sid_union_index_list) #Shift by the number of snps in the union
index_list = np.hstack((np.array([sid0_index,sid1_index]),index_list)) # index to sid0 and sid1
index_list = index_list + self.covar.shape[1] #Shift by the number of values in the covar
index_list = np.hstack((np.arange(self.covar.shape[1]),index_list)) #indexes of the covar
index_list_less_product = index_list[:-1] #index to everything but the product
#Null -- the two additive SNPs
lmm.X = X[:,index_list_less_product]
lmm.UX = UX[:,index_list_less_product]
if (k<N):
lmm.UUX = UUX[:,index_list_less_product]
else:
lmm.UUX = None
res_null = lmm.nLLeval(delta=self.internal_delta, REML=False)
ll_null = -res_null["nLL"]
#Alt -- now with the product feature
lmm.X = X[:,index_list]
lmm.UX = UX[:,index_list]
if (k<N):
lmm.UUX = UUX[:,index_list]
else:
lmm.UUX = None
res_alt = lmm.nLLeval(delta=self.internal_delta, REML=False)
ll_alt = -res_alt["nLL"]
test_statistic = ll_alt - ll_null
degrees_of_freedom = 1
pvalue = stats.chi2.sf(2.0 * test_statistic, degrees_of_freedom)
logging.debug("<{0},{1}>, null={2}, alt={3}, pvalue={4}".format(sid0,sid1,ll_null,ll_alt,pvalue))
dataframe.iloc[pair_index] = [
sid0, snps_read.pos[sid0_index,0], snps_read.pos[sid0_index,1], snps_read.pos[sid0_index,2],
sid1, snps_read.pos[sid1_index,0], snps_read.pos[sid1_index,1], snps_read.pos[sid1_index,2],
pvalue, ll_null, ll_alt]
self.do_pair_count += 1
if self.do_pair_count % 100 == 0:
start = self.do_pair_time
self.do_pair_time = time.time()
logging.info("do_pair_count={0}, time={1}".format(self.do_pair_count,self.do_pair_time-start))
return dataframe
示例6: fit
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
class fit(object):
def __init__(self,args):
self.bed = Bed(args.bfile) #
self.N = self.bed.iid_count
if args.covfile is not None:
cov = pd.read_table(args.covfile,header=None)
self.cov = sm.add_constant(ju._reorder(cov,self.bed.iid))
self.ncov = self.cov.shape[1] # + constant
else:
self.cov = np.ones((self.N,1))
self.ncov = 1 # Constant
if args.phenofile is not None:
Y = pd.read_table(args.phenofile,header=None,na_values='-9')
else:
try:
Y = pd.read_table(args.bfile+'.pheno',header=None,na_values='-9')
except IOError:
print("Phenotype file not found.")
exit(1)
self.Y = ju._reorder(Y,self.bed.iid)
af = ju.get_allele_frequency(self.bed,args) #
snps = (af>args.maf)&(af<1-args.maf) #
if (args.from_bp is not None) and (args.to_bp is not None):
k = (bed.pos[:,2]>args.from_bp)&(bed.pos[:,2]<args.to_bp)
snp1 = snps&k
snps_to_use = self.bed.sid[snps]
if args.extract is not None:
keep = np.array([l.strip() for l in open(args.extract,'r')])
snps_to_use = np.intersect1d(snps_to_use,keep)
self.bed_index = np.sort(self.bed.sid_to_index(snps_to_use)) #
pos = self.bed.pos[self.bed_index] #
bim=pd.read_table(self.bed.filename+'.bim',header=None,
names=['chm','id','pos_mb','pos_bp','a1','a2'])
self.af = af[self.bed_index] #
self.M = len(self.bed_index) #
self.windows = ju.get_windows(pos,self.M,args.window_size,args.window_type)
self.pos = pos[:,2]
self.chr = pos[:,0]
self.id = self.bed.sid[self.bed_index]
self.A1 = bim['a1'].loc[self.bed_index]
self.A2 = bim['a2'].loc[self.bed_index]
self.logistic = False
self.chimin = stats.chi2.ppf(1-args.minp,2)
# Fit null
if (not args.linear) and (self.Y.min() >= 0 and self.Y.max() <= 1):
self.null = sm.Logit(self.Y, self.cov, missing='drop').fit(disp=0)
self.logistic = True
else:
self.null = sm.OLS(self.Y, self.cov, missing='drop').fit(disp=0)
if self.ncov > 1:
self.cov = sm.add_constant(self.null.fittedvalues)
self.marg_res, self.joint_res = self.compute(args)
def compute(self,args):
t=time()
marg_res = []
joint_res = []
Z = []
windex = 0
li,ri = self.windows[windex]
nstr = np.max((args.SNPs_to_read,ri-li))
offset = li
G = self.bed[:,self.bed_index[li:(li+nstr)]].read().val
G = ju._impute_missing(G) # replace missing with mean
self.compute_marg(marg_res,Z,G,li,args)
A = ju._norm_data(G)
while ri < offset+nstr:
st = li-offset
fi = ri-offset
# All correlations of SNP j with SNPs in its window
R = np.dot(np.atleast_2d(A[:,st]/self.N),A[:,(st+1):fi]).flatten()
Zl = Z[li]
Zr = np.array(Z[(li+1):ri])
# Use marginal Z-scores and R to compute expected joint chi2s
ChiP = (1/(1-R**2))*(Zl**2+Zr**2-2*R*Zl*Zr)
ChiP[R**2 < args.r2min] = -1
self.compute_joint(joint_res,G,ChiP,offset,li,ri,args)
windex += 1
li,ri = self.windows[windex]
for i in xrange(offset+nstr,self.M,nstr):
sys.stdout.flush()
sys.stdout.write("SNP: %d, %f\r" % (i, time()-t))
Gn = self.bed[:,self.bed_index[i:(i+nstr)]].read().val
Gn = ju._impute_missing(Gn)
An = ju._norm_data(Gn)
self.compute_marg(marg_res,Z,Gn,i,args)
G = np.hstack((G,Gn))
A = np.hstack((A,An))
if G.shape[1] > args.SNPs_to_store:
G = G[:,nstr:]
A = A[:,nstr:]
offset += nstr
while ri < i+nstr:
st = li-offset
fi = ri-offset
# All correlations of SNP j with SNPs in its window
R = np.dot(np.atleast_2d(A[:,st]/self.N),A[:,(st+1):fi]).flatten()
Zl = Z[li]
Zr = np.array(Z[(li+1):ri])
#.........这里部分代码省略.........
示例7: sample
# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
class sample(object):
def __init__(self,args):
self.bed = Bed(args.bfile) #
self.N = self.bed.iid_count
if args.covfile is not None:
cov = pd.read_table(args.covfile,header=None)
self.cov = sm.add_constant(ju._reorder(cov,self.bed.iid))
self.ncov = self.cov.shape[1] # + constant
else:
self.cov = np.ones((self.N,1))
self.ncov = 1 # Constant
af = ju.get_allele_frequency(self.bed,args) #
snps = (af>args.maf)&(af<1-args.maf) #
if (args.from_bp is not None) and (args.to_bp is not None):
k = (bed.pos[:,2]>args.from_bp)&(bed.pos[:,2]<args.to_bp)
snp1 = snps&k
snps_to_use = self.bed.sid[snps]
if args.extract is not None:
keep = np.array([l.strip() for l in open(args.extract,'r')])
snps_to_use = np.intersect1d(snps_to_use,keep)
self.bed_index = np.sort(self.bed.sid_to_index(snps_to_use)) #
pos = self.bed.pos[self.bed_index] #
bim=pd.read_table(self.bed.filename+'.bim',header=None,
names=['chm','id','pos_mb','pos_bp','a1','a2'])
self.af = af[self.bed_index] #
self.M = len(self.bed_index) #
self.windows = ju.get_windows(pos,self.M,args.window_size,args.window_type)
self.sample_windows = ju.get_windows(pos,self.M,args.sample_window_size,
args.sample_window_type)
self.pos = pos[:,2]
self.chr = pos[:,0]
self.id = self.bed.sid[self.bed_index]
self.A1 = bim['a1'].loc[self.bed_index]
self.A2 = bim['a2'].loc[self.bed_index]
self.numSamples = args.numSamples
self.JMaxStats, self.ZMaxStats = self.sample(args)
self.JMinP = stats.chi2.sf(self.JMaxStats,2)
self.ZMinP = stats.chi2.sf(self.ZMaxStats**2,1)
self.minP = np.minimum(self.JMinP,self.ZMinP)
def sample(self,args):
t=time()
nz = 0
ZMaxStats = np.zeros((self.numSamples,1))
JMaxStats = np.zeros((self.numSamples,1))
windex = 0
sli,sri = self.sample_windows[windex]
tli,tri = self.windows[windex]
nstr = np.max((args.SNPs_to_read,sri-sli))
offset = sli
G = self.bed[:,self.bed_index[sli:(sli+nstr)]].read().val
G = ju._impute_missing(G)
A = ju._norm_data(G)
# Sample Z-scores and do joint tests of first window
R = np.dot(A[:,sli:sri].T/self.N,A[:,sli:sri])
Z=np.random.multivariate_normal(np.zeros((R.shape[0])),R,args.numSamples)
nz += R.shape[0]
zli,zri = sli,sri # position of Z relative to full genotype
gli,gri = zli,zri # position of Z relative to genotype in memory
Rp = R[(tli+1):tri,0]
to_test = Rp**2 > args.r2min
Rp = Rp[to_test]
Zl = np.atleast_2d(Z[:,0]).T
Zr = np.array(Z[:,1:(tri-tli)])[:,to_test]
ChiP = (1/(1-Rp**2))*(Zl**2+Zr**2-2*Rp*Zl*Zr)
ZMaxStats = np.atleast_2d(np.hstack((ZMaxStats,abs(Z))).max(1)).T
# ZMaxStats = np.maximum(ZMaxStats,abs(Z.max(1)))
# JMaxStats = np.maximum(JMaxStats,ChiP.max(1))
JMaxStats = np.atleast_2d(np.hstack((JMaxStats,ChiP)).max(1)).T
# Slide through genotype in memory
while True:
windex += 1
sli,sri = self.sample_windows[windex]
tli,tri = self.windows[windex]
if sri >= offset+nstr: break
tst,tfi,sst,sfi = np.array([tli,tri,sli,sri])-offset
#print sli, sri, zli, zri, gli, gri, Z.shape[1]
if zli < sli: # drop zli..sli and update indices
Z = Z[:,(sli-zli):]
zli,gli = sli,sst
if zri < sri: # marginal sample everything from zri..sri
S = A[:,gli:gri] # G that overlaps Z
Sn = A[:,gri:sri] # G about to have Z scores sampled
r12 = S.T.dot(Sn)/self.N
r11 = Sn.T.dot(Sn)/self.N
Zn = self.sample_func(Z,S,Sn,r11,r12,args)
Z = np.hstack((Z,Zn))
nz += (sri-gri)
zri,gri = sri,sfi
ZMaxStats = np.atleast_2d(np.hstack((ZMaxStats,abs(Zn))).max(1)).T
# ZMaxStats = np.maximum(ZMaxStats,abs(Zn.flatten()))
# All correlations of SNP tli with SNPs in its window
# Surely these are already computed and some cleverness
# can be used to re-use them but its a fast calculation anyways
if sri-sli > 1:
R = np.dot(np.atleast_2d(A[:,tst]/self.N),
A[:,(tst+1):tfi]).flatten()
to_test = R**2 > args.r2min
R = R[to_test]
Zl = np.atleast_2d(Z[:,0]).T
#.........这里部分代码省略.........