当前位置: 首页>>代码示例>>Python>>正文


Python Bed.sid_to_index方法代码示例

本文整理汇总了Python中pysnptools.snpreader.Bed.sid_to_index方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.sid_to_index方法的具体用法?Python Bed.sid_to_index怎么用?Python Bed.sid_to_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysnptools.snpreader.Bed的用法示例。


在下文中一共展示了Bed.sid_to_index方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_match_cpp

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestSingleSnp test_match_cpp")
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
            frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
            frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
            for frame in [frame_h2, frame_log_delta]:
                referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
                reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
                assert len(frame) == len(reference)
                for _, row in reference.iterrows():
                    sid = row.SNP
                    pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
                    reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
                    assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
开发者ID:MicrosoftGenomics,项目名称:FaST-LMM,代码行数:29,代码来源:test_single_snp.py

示例2: main

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
def main(args):
    print('reading seeed snps')
    seed_snps = pd.read_csv(args.seed_snps, header=None, names=['SNP'], index_col='SNP')
    seed_snps['ibs_length'] = 0
    seed_snps['ibd'] = 0

    print('reading typed snps')
    typed_snps = pd.read_csv(args.typed_snps, header=None, names=['SNP'])

    print('reading genotypes')
    data = Bed(args.bfile)
    X = data.read().val
    typed_snps_indices = np.sort(data.sid_to_index(typed_snps.SNP))
    typed_snps_bp = data.col_property[typed_snps_indices,2]

    print(len(seed_snps), 'snps in list')
    print(data.iid_count, data.sid_count, 'are dimensions of X')

    def analyze_snp(i):
        # find first typed snp after query snp
        snp_bp = data.col_property[i,2]
        v = np.where(typed_snps_bp > snp_bp)[0]
        if len(v) > 0:
            typed_i = v[0]
        else:
            typed_i = len(typed_snps_indices)-1

        n1, n2 = np.where(X[:,i] == 1)[0]
        if (X[n1,typed_snps_indices[typed_i]] - X[n2, typed_snps_indices[typed_i]])**2 == 4:
            return 0, 0

        typed_il, typed_ir = fis.find_boundaries(
                X[n1,typed_snps_indices],
                X[n2,typed_snps_indices],
                typed_i)
        typed_ir -= 1

        il = typed_snps_indices[typed_il]
        ir = typed_snps_indices[typed_ir]
        cM = data.col_property[ir, 1] - \
                data.col_property[il, 1]
        ibd = (np.mean(X[n1,il:ir] == X[n2,il:ir]) > 0.99)
        return cM, int(ibd)

    for (i, snp) in iter.show_progress(
            it.izip(data.sid_to_index(seed_snps.index), seed_snps.index),
            total=len(seed_snps)):
            # total=10):
        seed_snps.ix[snp, ['ibs_length', 'ibd']] = analyze_snp(i)

    print(seed_snps.iloc[:100])
    seed_snps.to_csv(args.outfile, sep='\t')
开发者ID:hilaryfinucane,项目名称:ibd,代码行数:54,代码来源:analyze_snps.py

示例3: __init__

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
 def __init__(self,args):
     if args.window_type not in ['BP','SNP']:
         raise ValueError('Window type not supported')
     bed_1 = Bed(args.bfile) #
     af1 = self.get_allele_frequency(bed_1,args) #
     print(len(af1), "SNPs in file 1")
     snps_1 = (af1>args.maf)&(af1<1-args.maf) #
     print(np.sum(snps_1), "SNPs in file 1 after MAF filter")
     if (args.from_bp is not None) and (args.to_bp is not None):
         k = (bed_1.pos[:,2]>args.from_bp)&(bed_1.pos[:,2]<args.to_bp)
         snps_1 = snps_1&k
     snps_to_use = bed_1.sid[snps_1]
     if args.extract is not None:
         keep = np.array([l.strip() for l in open(args.extract,'r')])
         snps_to_use = np.intersect1d(snps_to_use,keep)
         print(len(snps_to_use),"SNPs remaining after extraction")
     bed_1_index = np.sort(bed_1.sid_to_index(snps_to_use)) #
     pos = bed_1.pos[bed_1_index] #
     bim_1=pd.read_table(bed_1.filename+'.bim',header=None,
                         names=['chm','id','pos_mb','pos_bp','a1','a2'])
     af = af1[bed_1_index] #
     if args.afile is not None:
         a1 =  pd.read_table(args.afile,header=None,sep='\s*',
                             names=['id1','id2','theta'])
     else:
         a1 = None
     self.af = af
     self.M = len(bed_1_index) #
     self.windows = self.get_windows(pos,args) #
     self.chr = pos[:,0]
     self.pos = pos[:,2]
     self.id = bed_1.sid[bed_1_index]
     self.A1 = bim_1['a1'].loc[bed_1_index]
     self.A2 = bim_1['a2'].loc[bed_1_index]
     self.scores = self.compute(bed_1,bed_1_index,af,a1,args) #
开发者ID:dtnchen,项目名称:Popcorn,代码行数:37,代码来源:compute.py

示例4: test_match_cpp

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>fastlmmc -snpPairs -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.pairs.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestEpistasis test_match_cpp")
        from pysnptools.snpreader import Bed
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"))
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        frame = epistasis(snps[:,test_idx], pheno,covar=covar, G0 = snps[:,sim_idx],log_delta=0)
        sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue'])

        referenceOutfile = TestFeatureSelection.reference_file("epistasis/topsnps.pairs.txt")

        import pandas as pd
        table = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
        assert len(pvalue_list) == len(table)
        for row in table.iterrows():
            snp0cpp,snp1cpp,pvaluecpp,i1,i2 = row[1]
            for i in xrange(len(pvalue_list)):
                found = False
                pvaluepy = pvalue_list[i]
                snp0py = sid0[i]
                snp1py = sid1[i]
                if (snp0py == snp0cpp and snp1py == snp1cpp) or (snp0py == snp1cpp and snp1py == snp0cpp):
                    found = True
                    diff = abs(pvaluecpp - pvaluepy)/pvaluecpp
                    assert diff < .035, "'{0}' '{1}' pvalue_list differ too much {4} -- {2} vs {3}".format(snp0cpp,snp1cpp,pvaluecpp,pvaluepy,diff)
                    break
            assert found
开发者ID:42binwang,项目名称:FaST-LMM,代码行数:39,代码来源:testepistasis.py

示例5: _Epistasis

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]

#.........这里部分代码省略.........
        logging.info("internal_delta={0}".format(self.internal_delta))
        logging.info("external_log_delta={0}".format(self.external_log_delta))


    do_pair_count = 0
    do_pair_time = time.time()

    def do_work(self, lmm, sid0_list, sid1_list):
        dataframe = pd.DataFrame(
            index=np.arange(len(sid0_list)),
            columns=('SNP0', 'Chr0', 'GenDist0', 'ChrPos0', 'SNP1', 'Chr1', 'GenDist1', 'ChrPos1', 'PValue', 'NullLogLike', 'AltLogLike')
            )
        #!!Is this the only way to set types in a dataframe?
        dataframe['Chr0'] = dataframe['Chr0'].astype(np.float)
        dataframe['GenDist0'] = dataframe['GenDist0'].astype(np.float)
        dataframe['ChrPos0'] = dataframe['ChrPos0'].astype(np.float)
        dataframe['Chr1'] = dataframe['Chr1'].astype(np.float)
        dataframe['GenDist1'] = dataframe['GenDist1'].astype(np.float)
        dataframe['ChrPos1'] = dataframe['ChrPos1'].astype(np.float)
        dataframe['PValue'] = dataframe['PValue'].astype(np.float)
        dataframe['NullLogLike'] = dataframe['NullLogLike'].astype(np.float)
        dataframe['AltLogLike'] = dataframe['AltLogLike'].astype(np.float)


        #This is some of the code for a different way that reads and dot-products 50% more, but does less copying. Seems about the same speed
        #sid0_index_list = self.test_snps.sid_to_index(sid0_list)
        #sid1_index_list = self.test_snps.sid_to_index(sid1_list)
        #sid_index_union_dict = {}
        #sid0_index_index_list = self.create_index_index(sid_index_union_dict, sid0_index_list)
        #sid1_index_index_list = self.create_index_index(sid_index_union_dict, sid1_index_list)
        #snps0_read = self.test_snps[:,sid0_index_list].read().standardize()
        #snps1_read = self.test_snps[:,sid1_index_list].read().standardize()

        sid_union = set(sid0_list).union(sid1_list)
        sid_union_index_list = sorted(self.test_snps.sid_to_index(sid_union))
        snps_read = self.test_snps[:,sid_union_index_list].read().standardize()

        sid0_index_list = snps_read.sid_to_index(sid0_list)
        sid1_index_list = snps_read.sid_to_index(sid1_list)

        products = snps_read.val[:,sid0_index_list] * snps_read.val[:,sid1_index_list] # in the products matrix, each column i is the elementwise product of sid i in each list
        X = np.hstack((self.covar, snps_read.val, products))
        UX = lmm.U.T.dot(X)
        k = lmm.S.shape[0]
        N = X.shape[0]
        if (k<N):
            UUX = X - lmm.U.dot(UX)
        else:
            UUX = None

        for pair_index, sid0 in enumerate(sid0_list):
            sid1 = sid1_list[pair_index]
            sid0_index = sid0_index_list[pair_index]
            sid1_index = sid1_index_list[pair_index]

            index_list = np.array([pair_index]) #index to product
            index_list = index_list + len(sid_union_index_list) #Shift by the number of snps in the union
            index_list = np.hstack((np.array([sid0_index,sid1_index]),index_list)) # index to sid0 and sid1
            index_list = index_list + self.covar.shape[1] #Shift by the number of values in the covar
            index_list = np.hstack((np.arange(self.covar.shape[1]),index_list)) #indexes of the covar

            index_list_less_product = index_list[:-1] #index to everything but the product

            #Null -- the two additive SNPs
            lmm.X = X[:,index_list_less_product]
            lmm.UX = UX[:,index_list_less_product]
            if (k<N):
                lmm.UUX = UUX[:,index_list_less_product]
            else:
                lmm.UUX = None
            res_null = lmm.nLLeval(delta=self.internal_delta, REML=False)
            ll_null = -res_null["nLL"]

            #Alt -- now with the product feature
            lmm.X = X[:,index_list]
            lmm.UX = UX[:,index_list]
            if (k<N):
                lmm.UUX = UUX[:,index_list]
            else:
                lmm.UUX = None
            res_alt = lmm.nLLeval(delta=self.internal_delta, REML=False)
            ll_alt = -res_alt["nLL"]

            test_statistic = ll_alt - ll_null
            degrees_of_freedom = 1
            pvalue = stats.chi2.sf(2.0 * test_statistic, degrees_of_freedom)
            logging.debug("<{0},{1}>, null={2}, alt={3}, pvalue={4}".format(sid0,sid1,ll_null,ll_alt,pvalue))

            dataframe.iloc[pair_index] = [
                 sid0, snps_read.pos[sid0_index,0],  snps_read.pos[sid0_index,1], snps_read.pos[sid0_index,2],
                 sid1, snps_read.pos[sid1_index,0],  snps_read.pos[sid1_index,1], snps_read.pos[sid1_index,2],
                 pvalue, ll_null, ll_alt]

            self.do_pair_count += 1
            if self.do_pair_count % 100 == 0:
                start = self.do_pair_time
                self.do_pair_time = time.time()
                logging.info("do_pair_count={0}, time={1}".format(self.do_pair_count,self.do_pair_time-start))

        return dataframe
开发者ID:bdepardo,项目名称:FaST-LMM,代码行数:104,代码来源:epistasis.py

示例6: fit

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
class fit(object):
    def __init__(self,args):
        self.bed = Bed(args.bfile) #
        self.N = self.bed.iid_count
        if args.covfile is not None:
            cov = pd.read_table(args.covfile,header=None)
            self.cov = sm.add_constant(ju._reorder(cov,self.bed.iid))
            self.ncov = self.cov.shape[1] # + constant
        else:
            self.cov = np.ones((self.N,1))
            self.ncov = 1 # Constant
        if args.phenofile is not None:
            Y = pd.read_table(args.phenofile,header=None,na_values='-9')
        else:
            try:
                Y = pd.read_table(args.bfile+'.pheno',header=None,na_values='-9')
            except IOError:
                print("Phenotype file not found.")
                exit(1)
        self.Y = ju._reorder(Y,self.bed.iid)
        af = ju.get_allele_frequency(self.bed,args) #
        snps = (af>args.maf)&(af<1-args.maf) #
        if (args.from_bp is not None) and (args.to_bp is not None):
            k = (bed.pos[:,2]>args.from_bp)&(bed.pos[:,2]<args.to_bp)
            snp1 = snps&k
        snps_to_use = self.bed.sid[snps]
        if args.extract is not None:
            keep = np.array([l.strip() for l in open(args.extract,'r')])
            snps_to_use = np.intersect1d(snps_to_use,keep)
        self.bed_index = np.sort(self.bed.sid_to_index(snps_to_use)) #
        pos = self.bed.pos[self.bed_index] #
        bim=pd.read_table(self.bed.filename+'.bim',header=None,
                          names=['chm','id','pos_mb','pos_bp','a1','a2'])
        self.af = af[self.bed_index] #
        self.M = len(self.bed_index) #
        self.windows = ju.get_windows(pos,self.M,args.window_size,args.window_type)
        self.pos = pos[:,2]
        self.chr = pos[:,0]
        self.id = self.bed.sid[self.bed_index]
        self.A1 = bim['a1'].loc[self.bed_index]
        self.A2 = bim['a2'].loc[self.bed_index]
        self.logistic = False
        self.chimin = stats.chi2.ppf(1-args.minp,2)

        # Fit null
        if (not args.linear) and (self.Y.min() >= 0 and self.Y.max() <= 1):
            self.null = sm.Logit(self.Y, self.cov, missing='drop').fit(disp=0)
            self.logistic = True
        else:
            self.null = sm.OLS(self.Y, self.cov, missing='drop').fit(disp=0)
        if self.ncov > 1:
            self.cov = sm.add_constant(self.null.fittedvalues)
        self.marg_res, self.joint_res = self.compute(args)

    def compute(self,args):
        t=time()
        marg_res = []
        joint_res = []
        Z = []
        windex = 0
        li,ri = self.windows[windex]
        nstr = np.max((args.SNPs_to_read,ri-li))
        offset = li
        G = self.bed[:,self.bed_index[li:(li+nstr)]].read().val
        G = ju._impute_missing(G) # replace missing with mean
        self.compute_marg(marg_res,Z,G,li,args)
        A = ju._norm_data(G)
        while ri < offset+nstr:
            st = li-offset
            fi = ri-offset
            # All correlations of SNP j with SNPs in its window
            R = np.dot(np.atleast_2d(A[:,st]/self.N),A[:,(st+1):fi]).flatten()
            Zl = Z[li]
            Zr = np.array(Z[(li+1):ri])
            # Use marginal Z-scores and R to compute expected joint chi2s
            ChiP = (1/(1-R**2))*(Zl**2+Zr**2-2*R*Zl*Zr)
            ChiP[R**2 < args.r2min] = -1
            self.compute_joint(joint_res,G,ChiP,offset,li,ri,args)
            windex += 1
            li,ri = self.windows[windex]
        for i in xrange(offset+nstr,self.M,nstr):
            sys.stdout.flush()
            sys.stdout.write("SNP: %d, %f\r" % (i, time()-t))
            Gn = self.bed[:,self.bed_index[i:(i+nstr)]].read().val
            Gn = ju._impute_missing(Gn)
            An = ju._norm_data(Gn)
            self.compute_marg(marg_res,Z,Gn,i,args)
            G = np.hstack((G,Gn))
            A = np.hstack((A,An))
            if G.shape[1] > args.SNPs_to_store:
                G = G[:,nstr:]
                A = A[:,nstr:]
                offset += nstr
            while ri < i+nstr:
                st = li-offset
                fi = ri-offset
                # All correlations of SNP j with SNPs in its window
                R = np.dot(np.atleast_2d(A[:,st]/self.N),A[:,(st+1):fi]).flatten()
                Zl = Z[li]
                Zr = np.array(Z[(li+1):ri])
#.........这里部分代码省略.........
开发者ID:brielin,项目名称:Jester,代码行数:103,代码来源:fit.py

示例7: sample

# 需要导入模块: from pysnptools.snpreader import Bed [as 别名]
# 或者: from pysnptools.snpreader.Bed import sid_to_index [as 别名]
class sample(object):
    def __init__(self,args):
        self.bed = Bed(args.bfile) #
        self.N = self.bed.iid_count
        if args.covfile is not None:
            cov = pd.read_table(args.covfile,header=None)
            self.cov = sm.add_constant(ju._reorder(cov,self.bed.iid))
            self.ncov = self.cov.shape[1] # + constant
        else:
            self.cov = np.ones((self.N,1))
            self.ncov = 1 # Constant
        af = ju.get_allele_frequency(self.bed,args) #
        snps = (af>args.maf)&(af<1-args.maf) #
        if (args.from_bp is not None) and (args.to_bp is not None):
            k = (bed.pos[:,2]>args.from_bp)&(bed.pos[:,2]<args.to_bp)
            snp1 = snps&k
        snps_to_use = self.bed.sid[snps]
        if args.extract is not None:
            keep = np.array([l.strip() for l in open(args.extract,'r')])
            snps_to_use = np.intersect1d(snps_to_use,keep)
        self.bed_index = np.sort(self.bed.sid_to_index(snps_to_use)) #
        pos = self.bed.pos[self.bed_index] #
        bim=pd.read_table(self.bed.filename+'.bim',header=None,
                          names=['chm','id','pos_mb','pos_bp','a1','a2'])
        self.af = af[self.bed_index] #
        self.M = len(self.bed_index) #
        self.windows = ju.get_windows(pos,self.M,args.window_size,args.window_type)
        self.sample_windows = ju.get_windows(pos,self.M,args.sample_window_size,
                                             args.sample_window_type)
        self.pos = pos[:,2]
        self.chr = pos[:,0]
        self.id = self.bed.sid[self.bed_index]
        self.A1 = bim['a1'].loc[self.bed_index]
        self.A2 = bim['a2'].loc[self.bed_index]
        self.numSamples = args.numSamples
        self.JMaxStats, self.ZMaxStats = self.sample(args)
        self.JMinP = stats.chi2.sf(self.JMaxStats,2)
        self.ZMinP = stats.chi2.sf(self.ZMaxStats**2,1)
        self.minP = np.minimum(self.JMinP,self.ZMinP)

    def sample(self,args):
        t=time()
        nz = 0
        ZMaxStats = np.zeros((self.numSamples,1))
        JMaxStats = np.zeros((self.numSamples,1))
        windex = 0
        sli,sri = self.sample_windows[windex]
        tli,tri = self.windows[windex]
        nstr = np.max((args.SNPs_to_read,sri-sli))
        offset = sli
        G = self.bed[:,self.bed_index[sli:(sli+nstr)]].read().val
        G = ju._impute_missing(G)
        A = ju._norm_data(G)
        # Sample Z-scores and do joint tests of first window
        R = np.dot(A[:,sli:sri].T/self.N,A[:,sli:sri])
        Z=np.random.multivariate_normal(np.zeros((R.shape[0])),R,args.numSamples)
        nz += R.shape[0]
        zli,zri = sli,sri # position of Z relative to full genotype
        gli,gri = zli,zri # position of Z relative to genotype in memory
        Rp = R[(tli+1):tri,0]
        to_test = Rp**2 > args.r2min
        Rp = Rp[to_test]
        Zl = np.atleast_2d(Z[:,0]).T
        Zr = np.array(Z[:,1:(tri-tli)])[:,to_test]
        ChiP = (1/(1-Rp**2))*(Zl**2+Zr**2-2*Rp*Zl*Zr)
        ZMaxStats = np.atleast_2d(np.hstack((ZMaxStats,abs(Z))).max(1)).T
        # ZMaxStats = np.maximum(ZMaxStats,abs(Z.max(1)))
        # JMaxStats = np.maximum(JMaxStats,ChiP.max(1))
        JMaxStats = np.atleast_2d(np.hstack((JMaxStats,ChiP)).max(1)).T
        # Slide through genotype in memory
        while True:
            windex += 1
            sli,sri = self.sample_windows[windex]
            tli,tri = self.windows[windex]
            if sri >= offset+nstr: break
            tst,tfi,sst,sfi = np.array([tli,tri,sli,sri])-offset
            #print sli, sri, zli, zri, gli, gri, Z.shape[1]
            if zli < sli: # drop zli..sli and update indices
                Z = Z[:,(sli-zli):]
                zli,gli = sli,sst
            if zri < sri: # marginal sample everything from zri..sri
                S = A[:,gli:gri] # G that overlaps Z
                Sn = A[:,gri:sri] # G about to have Z scores sampled
                r12 = S.T.dot(Sn)/self.N
                r11 = Sn.T.dot(Sn)/self.N
                Zn = self.sample_func(Z,S,Sn,r11,r12,args)
                Z = np.hstack((Z,Zn))
                nz += (sri-gri)
                zri,gri = sri,sfi
            ZMaxStats = np.atleast_2d(np.hstack((ZMaxStats,abs(Zn))).max(1)).T
            # ZMaxStats = np.maximum(ZMaxStats,abs(Zn.flatten()))
            # All correlations of SNP tli with SNPs in its window
            # Surely these are already computed and some cleverness
            #  can be used to re-use them but its a fast calculation anyways
            if sri-sli > 1:
                R = np.dot(np.atleast_2d(A[:,tst]/self.N),
                           A[:,(tst+1):tfi]).flatten()
                to_test = R**2 > args.r2min
                R = R[to_test]
                Zl = np.atleast_2d(Z[:,0]).T
#.........这里部分代码省略.........
开发者ID:brielin,项目名称:Jester,代码行数:103,代码来源:sample.py


注:本文中的pysnptools.snpreader.Bed.sid_to_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。