当前位置: 首页>>代码示例>>Python>>正文


Python snpreader.Bed类代码示例

本文整理汇总了Python中pysnptools.snpreader.Bed的典型用法代码示例。如果您正苦于以下问题:Python Bed类的具体用法?Python Bed怎么用?Python Bed使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Bed类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

 def __init__(self,args):
     if args.window_type not in ['BP','SNP']:
         raise ValueError('Window type not supported')
     bed_1 = Bed(args.bfile) #
     af1 = self.get_allele_frequency(bed_1,args) #
     print(len(af1), "SNPs in file 1")
     snps_1 = (af1>args.maf)&(af1<1-args.maf) #
     print(np.sum(snps_1), "SNPs in file 1 after MAF filter")
     if (args.from_bp is not None) and (args.to_bp is not None):
         k = (bed_1.pos[:,2]>args.from_bp)&(bed_1.pos[:,2]<args.to_bp)
         snps_1 = snps_1&k
     snps_to_use = bed_1.sid[snps_1]
     if args.extract is not None:
         keep = np.array([l.strip() for l in open(args.extract,'r')])
         snps_to_use = np.intersect1d(snps_to_use,keep)
         print(len(snps_to_use),"SNPs remaining after extraction")
     bed_1_index = np.sort(bed_1.sid_to_index(snps_to_use)) #
     pos = bed_1.pos[bed_1_index] #
     bim_1=pd.read_table(bed_1.filename+'.bim',header=None,
                         names=['chm','id','pos_mb','pos_bp','a1','a2'])
     af = af1[bed_1_index] #
     if args.afile is not None:
         a1 =  pd.read_table(args.afile,header=None,sep='\s*',
                             names=['id1','id2','theta'])
     else:
         a1 = None
     self.af = af
     self.M = len(bed_1_index) #
     self.windows = self.get_windows(pos,args) #
     self.chr = pos[:,0]
     self.pos = pos[:,2]
     self.id = bed_1.sid[bed_1_index]
     self.A1 = bim_1['a1'].loc[bed_1_index]
     self.A2 = bim_1['a2'].loc[bed_1_index]
     self.scores = self.compute(bed_1,bed_1_index,af,a1,args) #
开发者ID:dtnchen,项目名称:Popcorn,代码行数:35,代码来源:compute.py

示例2: setUpClass

 def setUpClass(self):
     self.currentFolder = os.path.dirname(os.path.realpath(__file__))
     #TODO: get data set with NANs!
     snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
     self.pheno_fn = self.currentFolder + "/examples/toydata.phe"
     self.snpdata = snpreader.read(order='F',force_python_only=True)
     self.snps = self.snpdata.val
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:7,代码来源:test.py

示例3: setUpClass

    def setUpClass(self):
        currentFolder = os.path.dirname(os.path.realpath(__file__))
        self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata"
        self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt"
        #self.cov_fn = currentFolder + "/examples/toydata.cov"

        # load data
        ###################################################################
        snp_reader = Bed(self.snp_fn)
        pheno = pstpheno.loadOnePhen(self.pheno_fn)
        #cov = pstpheno.loadPhen(self.cov_fn)
        
        # intersect sample ids
        snp_reader, pheno = pysnptools.util.intersect_apply([snp_reader, pheno])
        
        self.G = snp_reader.read(order='C').val
        self.G = stdizer.Unit().standardize(self.G)
        self.G.flags.writeable = False
        self.y = pheno['vals'][:,0]
        self.y.flags.writeable = False

        # load pcs
        #self.G_cov = cov['vals']
        self.G_cov = np.ones((len(self.y), 1))
        self.G_cov.flags.writeable = False
开发者ID:guokai8,项目名称:FaST-LMM,代码行数:25,代码来源:test.py

示例4: divideData

def divideData(filename,direct,num=5,mph=3,delet=True):
	print "Estimating heritability using "+str(num)+" components"
	[yFil,sFil]=getData(filename,mph=mph);
	n=sFil.iid_count	
	reOrd=perm(n);
	yFil=yFil[reOrd,:];
	sFil=sFil[reOrd,:];

	div=[int(math.ceil( i*n/float(num) )) for i in range(0,num+1)];
		
	varEsts=[];

	for i in range(0,num):
		print "For component "+str(i);
		sFilTemp=sFil[div[i]:div[i+1],:];

		yFilTemp=yFil[div[i]:div[i+1],:];

		fileTemp=direct+"/tempFile_"+str(i);
		Bed.write(fileTemp,sFilTemp.read());
		Pheno.write(fileTemp+".phen",yFilTemp.read())
		
		varEsts.append(varRes(fileTemp,direct));
		
		

		if delet:
			os.system("rm "+direct+"/tempFile_"+str(i)+"*");
	
	return varEsts;
开发者ID:seanken,项目名称:PrivSTRAT,代码行数:30,代码来源:estHerit.py

示例5: test_some_std

    def test_some_std(self):
        k0 = self.snpdata.read_kernel(standardizer=Unit()).val
        from pysnptools.kernelreader import SnpKernel
        k1 = self.snpdata.read_kernel(standardizer=Unit())
        np.testing.assert_array_almost_equal(k0, k1.val, decimal=10)

        from pysnptools.snpreader import SnpData
        snpdata2 = SnpData(iid=self.snpdata.iid,sid=self.snpdata.sid,pos=self.snpdata.pos,val=np.array(self.snpdata.val))
        s = str(snpdata2)
        snpdata2.standardize()
        s = str(snpdata2)

        snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
        k2 = snpreader.read_kernel(standardizer=Unit(),block_size=500).val
        np.testing.assert_array_almost_equal(k0, k2, decimal=10)

        from pysnptools.standardizer.identity import Identity
        from pysnptools.standardizer.diag_K_to_N import DiagKtoN
        for dtype in [sp.float64,sp.float32]:
            for std in [Unit(),Beta(1,25),Identity(),DiagKtoN()]:
                s = str(std)
                np.random.seed(0)
                x = np.array(np.random.randint(3,size=[60,100]),dtype=dtype)
                x2 = x[:,::2]
                x2b = np.array(x2)
                #LATER what's this about? It doesn't do non-contiguous?
                #assert not x2.flags['C_CONTIGUOUS'] and not x2.flags['F_CONTIGUOUS'] #set up to test non contiguous
                #assert x2b.flags['C_CONTIGUOUS'] or x2b.flags['F_CONTIGUOUS'] #set up to test non contiguous
                #a,b = std.standardize(x2b),std.standardize(x2)
                #np.testing.assert_array_almost_equal(a,b)
        logging.info("done")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:31,代码来源:test.py

示例6: test_match_cpp

    def test_match_cpp(self):
        '''
        match
            FaSTLMM.207\Data\DemoData>..\.cd.\bin\windows\cpp_mkl\fastlmmc -bfile snps -extract topsnps.txt -bfileSim snps -extractSim ASout.snps.txt -pheno pheno.txt -covar covariate.txt -out topsnps.singlesnp.txt -logDelta 0 -verbose 100

        '''
        logging.info("TestSingleSnp test_match_cpp")
        snps = Bed(os.path.join(self.pythonpath, "tests/datasets/selecttest/snps"), count_A1=False)
        pheno = os.path.join(self.pythonpath, "tests/datasets/selecttest/pheno.txt")
        covar = os.path.join(self.pythonpath, "tests/datasets/selecttest/covariate.txt")
        sim_sid = ["snp26250_m0_.19m1_.19","snp82500_m0_.28m1_.28","snp63751_m0_.23m1_.23","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp11253_m0_.2m1_.2","snp86250_m0_.33m1_.33","snp3753_m0_.23m1_.23","snp75003_m0_.32m1_.32","snp30002_m0_.25m1_.25","snp26252_m0_.19m1_.19","snp67501_m0_.15m1_.15","snp63750_m0_.28m1_.28","snp30001_m0_.28m1_.28","snp52502_m0_.35m1_.35","snp33752_m0_.31m1_.31","snp37503_m0_.37m1_.37","snp15002_m0_.11m1_.11","snp3751_m0_.34m1_.34","snp7502_m0_.18m1_.18","snp52503_m0_.3m1_.3","snp30000_m0_.39m1_.39","isnp4457_m0_.11m1_.11","isnp23145_m0_.2m1_.2","snp60001_m0_.39m1_.39","snp33753_m0_.16m1_.16","isnp60813_m0_.2m1_.2","snp82502_m0_.34m1_.34","snp11252_m0_.13m1_.13"]
        sim_idx = snps.sid_to_index(sim_sid)
        test_sid = ["snp26250_m0_.19m1_.19","snp63751_m0_.23m1_.23","snp82500_m0_.28m1_.28","snp48753_m0_.4m1_.4","snp45001_m0_.26m1_.26","snp52500_m0_.05m1_.05","snp75002_m0_.39m1_.39","snp41253_m0_.07m1_.07","snp86250_m0_.33m1_.33","snp15002_m0_.11m1_.11","snp33752_m0_.31m1_.31","snp26252_m0_.19m1_.19","snp30001_m0_.28m1_.28","snp11253_m0_.2m1_.2","snp67501_m0_.15m1_.15","snp3753_m0_.23m1_.23","snp52502_m0_.35m1_.35","snp30000_m0_.39m1_.39","snp30002_m0_.25m1_.25"]
        test_idx = snps.sid_to_index(test_sid)

        for G0,G1 in [(snps[:,sim_idx],KernelIdentity(snps.iid)),(KernelIdentity(snps.iid),snps[:,sim_idx])]:
            frame_h2 = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,h2=.5,leave_out_one_chrom=False,count_A1=False)
            frame_log_delta = single_snp(test_snps=snps[:,test_idx], pheno=pheno, G0=G0,G1=G1, covar=covar,log_delta=0,leave_out_one_chrom=False,count_A1=False)
            for frame in [frame_h2, frame_log_delta]:
                referenceOutfile = TestFeatureSelection.reference_file("single_snp/topsnps.single.txt")
                reference = pd.read_table(referenceOutfile,sep="\t") # We've manually remove all comments and blank lines from this file
                assert len(frame) == len(reference)
                for _, row in reference.iterrows():
                    sid = row.SNP
                    pvalue = frame[frame['SNP'] == sid].iloc[0].PValue
                    reldiff = abs(row.Pvalue - pvalue)/row.Pvalue
                    assert reldiff < .035, "'{0}' pvalue_list differ too much {4} -- {2} vs {3}".format(sid,None,row.Pvalue,pvalue,reldiff)
开发者ID:MicrosoftGenomics,项目名称:FaST-LMM,代码行数:27,代码来源:test_single_snp.py

示例7: read_plink

 def read_plink(self, fn_plink = None):
     """
     plink reader
     """
     PL = Bed(fn_plink)
     PLOB = PL.read()
     self.GT = PLOB.val
     self.POS = PLOB.pos[:,[0,1]]
     self.SID = PLOB.iid[:,1]
     self.isNormalised = False
开发者ID:kuod,项目名称:pygcta,代码行数:10,代码来源:genotypes.py

示例8: process_data

def process_data(input_path, output_path, name):
    snpreader = Bed(os.path.join(input_path, name))
    data = snpreader.read()
    values = data.val
    preproc_vals = pysnp_genpreproc(values)
    assert(np.any(np.isnan(preproc_vals)) == False)
    saved = os.path.join(output_path, name + ".h5py")
    path, keys = h5_save(path=saved, data_obj={name:preproc_vals}, dt='f')
    return {'n_subjects':data.iid_count, 'subject_ids':data.iid,
            'n_snps':data.sid_count, 'snp_ids':data.sid,
            'data_preprocessed_location': {'path':path, 'key':keys}}
开发者ID:YSanchezAraujo,项目名称:genus,代码行数:11,代码来源:utils.py

示例9: factory

    def factory(snpreader, num_snps_in_memory, standardizer, blocksize):
        if isinstance(snpreader, str):
            snpreader = Bed(snpreader)

        if num_snps_in_memory >= snpreader.sid_count:
            in_memory = InMemory(snpreader.read(order='C').standardize(standardizer), standardizer, blocksize)
            in_memory._snpreader.val.flags.writeable = False
            in_memory._val = in_memory._snpreader.val
            return in_memory
        else:
            return FromDisk(snpreader, num_snps_in_memory, standardizer, blocksize, None)
开发者ID:42binwang,项目名称:FaST-LMM,代码行数:11,代码来源:feature_selection_cv.py

示例10: test_write_x_x_cpp

 def test_write_x_x_cpp(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata")
     for order in ['C','F']:
         for dtype in [np.float32,np.float64]:
             snpdata = snpreader.read(order=order,dtype=dtype)
             snpdata.val[-1,0] = float("NAN")
             output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
             create_directory_if_necessary(output)
             Bed.write(snpdata, output)
             snpdata2 = Bed(output).read()
             assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?
开发者ID:amcdavid,项目名称:PySnpTools,代码行数:11,代码来源:test.py

示例11: test_write_x_x_cpp

 def test_write_x_x_cpp(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata")
     for order in ['C','F']:
         for dtype in [np.float32,np.float64]:
             snpdata = snpreader.read(order=order,dtype=dtype)
             snpdata.val[-1,0] = float("NAN")
             output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
             create_directory_if_necessary(output)
             Bed.write(output, snpdata)
             snpdata2 = Bed(output).read()
             np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
开发者ID:MMesbahU,项目名称:PySnpTools,代码行数:11,代码来源:test.py

示例12: test_subset_view

 def test_subset_view(self):
     snpreader2 = Bed(self.currentFolder + "/examples/toydata",count_A1=False)[:,:]
     result = snpreader2.read(view_ok=True)
     self.assertFalse(snpreader2 is result)
     result2 = result[:,:].read()
     self.assertFalse(sp.may_share_memory(result2.val,result.val))
     result3 = result[:,:].read(view_ok=True)
     self.assertTrue(sp.may_share_memory(result3.val,result.val))
     result4 = result3.read()
     self.assertFalse(sp.may_share_memory(result4.val,result3.val))
     result5 = result4.read(view_ok=True)
     self.assertTrue(sp.may_share_memory(result4.val,result5.val))
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:12,代码来源:test.py

示例13: test_npz

 def test_npz(self):
     logging.info("in test_npz")
     snpreader = Bed(self.currentFolder + "/../examples/toydata",count_A1=False)
     kerneldata1 = snpreader.read_kernel(standardizer=stdizer.Unit())
     s = str(kerneldata1)
     output = "tempdir/kernelreader/toydata.kernel.npz"
     create_directory_if_necessary(output)
     KernelNpz.write(output,kerneldata1)
     kernelreader2 = KernelNpz(output)
     kerneldata2 = kernelreader2.read()
     np.testing.assert_array_almost_equal(kerneldata1.val, kerneldata2.val, decimal=10)
     logging.info("done with test")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:12,代码来源:test.py

示例14: main

def main(args):
    print('reading seeed snps')
    seed_snps = pd.read_csv(args.seed_snps, header=None, names=['SNP'], index_col='SNP')
    seed_snps['ibs_length'] = 0
    seed_snps['ibd'] = 0

    print('reading typed snps')
    typed_snps = pd.read_csv(args.typed_snps, header=None, names=['SNP'])

    print('reading genotypes')
    data = Bed(args.bfile)
    X = data.read().val
    typed_snps_indices = np.sort(data.sid_to_index(typed_snps.SNP))
    typed_snps_bp = data.col_property[typed_snps_indices,2]

    print(len(seed_snps), 'snps in list')
    print(data.iid_count, data.sid_count, 'are dimensions of X')

    def analyze_snp(i):
        # find first typed snp after query snp
        snp_bp = data.col_property[i,2]
        v = np.where(typed_snps_bp > snp_bp)[0]
        if len(v) > 0:
            typed_i = v[0]
        else:
            typed_i = len(typed_snps_indices)-1

        n1, n2 = np.where(X[:,i] == 1)[0]
        if (X[n1,typed_snps_indices[typed_i]] - X[n2, typed_snps_indices[typed_i]])**2 == 4:
            return 0, 0

        typed_il, typed_ir = fis.find_boundaries(
                X[n1,typed_snps_indices],
                X[n2,typed_snps_indices],
                typed_i)
        typed_ir -= 1

        il = typed_snps_indices[typed_il]
        ir = typed_snps_indices[typed_ir]
        cM = data.col_property[ir, 1] - \
                data.col_property[il, 1]
        ibd = (np.mean(X[n1,il:ir] == X[n2,il:ir]) > 0.99)
        return cM, int(ibd)

    for (i, snp) in iter.show_progress(
            it.izip(data.sid_to_index(seed_snps.index), seed_snps.index),
            total=len(seed_snps)):
            # total=10):
        seed_snps.ix[snp, ['ibs_length', 'ibd']] = analyze_snp(i)

    print(seed_snps.iloc[:100])
    seed_snps.to_csv(args.outfile, sep='\t')
开发者ID:hilaryfinucane,项目名称:ibd,代码行数:52,代码来源:analyze_snps.py

示例15: test_subset

    def test_subset(self):
        logging.info("in test_subset")
        snpreader = Bed(self.currentFolder + "/../examples/toydata",count_A1=False)
        snpkernel = SnpKernel(snpreader,stdizer.Unit())
        krsub = snpkernel[::2,::2]
        kerneldata1 = krsub.read()
        expected = snpreader.read_kernel(stdizer.Unit())[::2].read()
        np.testing.assert_array_almost_equal(kerneldata1.val, expected.val, decimal=10)

        krsub2 = snpkernel[::2]
        kerneldata2 = krsub2.read()
        np.testing.assert_array_almost_equal(kerneldata2.val, expected.val, decimal=10)
        logging.info("done with test")
开发者ID:MicrosoftGenomics,项目名称:PySnpTools,代码行数:13,代码来源:test.py


注:本文中的pysnptools.snpreader.Bed类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。