本文整理汇总了Python中test_helper.get_reference函数的典型用法代码示例。如果您正苦于以下问题:Python get_reference函数的具体用法?Python get_reference怎么用?Python get_reference使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_reference函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testLeftNormalize
def testLeftNormalize(self):
#left normalize deletion
norm_pos, norm_ref, norm_alts = left_normalize(get_reference(),'chr1',2,'CGCCG',['CG'])
self.assertEqual(norm_pos,0)
self.assertEqual(norm_ref,'AACGC')
self.assertEqual(norm_alts[0],'AA')
#left normalize insertion
norm_pos, norm_ref, norm_alts = left_normalize(get_reference(),'chr4',12,'G',['GGG'])
self.assertEqual(norm_pos,7)
self.assertEqual(norm_ref,'C')
self.assertEqual(norm_alts[0],'CGG')
示例2: testRectify2
def testRectify2(self):
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GC G 20 PASS . GT 1/1\n
chr2 6 . G A 20 PASS . GT 1/1\n
"""
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GCCG GCA 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr2')
pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
gtdict = _genotype_concordance_dict()
cvs = ChromVariantStats(true_vars, pred_vars, [], [3,6], [3],gtdict)
cvs.rectify(get_reference(),100)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_OTH],1)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH],1)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_OTH],0)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],0)
示例3: testTruePosRectify
def testTruePosRectify(self):
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 3 . TC T 20 PASS . GT 1/1\n
chr4 5 . TC T 20 PASS . GT 1/1\n
chr4 8 . C T 20 PASS . GT 1/1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 4 . C T 20 PASS . GT 1/1\n
chr4 5 . TC T 20 PASS . GT 1/1\n
chr4 7 . TC T 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr4')
pred_vars = vcf_to_ChromVariants(pred_str,'chr4')
gtdict = _genotype_concordance_dict()
cvs = ChromVariantStats(true_vars,pred_vars,[5],[4,7],[3,8],gtdict)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],1)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],1)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],1)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],1)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],1)
cvs.rectify(get_reference(),100)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],1)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],2)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],0)
示例4: testRescueChromEvalVariants
def testRescueChromEvalVariants(self):
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GC G 20 PASS . GT 1/1\n
chr2 6 . G A 20 PASS . GT 1/1\n
"""
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GCCG GCA 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr2')
pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_OTH],1)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH],1)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_OTH],0)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SNP],0)
self.assertEqual(cvs.num_pred[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_fp[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_DEL],0)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_DEL],0)
示例5: testRectify
def testRectify(self):
# rectify CVS with a rescue-able indel
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 2 . TGC TAT 20 PASS . GT 1/1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . G A 20 PASS . GT 1/1\n
chr2 4 . C T 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr2')
pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
gtdict = _genotype_concordance_dict() # leave empty, we aren't testing this yet
cvs = ChromVariantStats(true_vars,pred_vars,[],[3,4],[2],gtdict)
# before rectify, no true positives
self.assertTrue(all(map(lambda x: x == 0,cvs.num_tp.itervalues())))
# one false negative indel
self.assertEqual(cvs.num_fn[VARIANT_TYPE.INDEL_OTH],1)
# two false positives SNPs
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],2)
cvs.rectify(get_reference(),100)
# after rectify, one true positive indel
self.assertEqual(cvs.num_tp[VARIANT_TYPE.INDEL_OTH],1)
# no false positives or false negatives
self.assertTrue(all(map(lambda x: x == 0, cvs.num_fp.itervalues())))
self.assertTrue(all(map(lambda x: x ==0, cvs.num_fn.itervalues())))
示例6: testChromEvaluateVariantsKnownFP
def testChromEvaluateVariantsKnownFP(self):
# one known true variant
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 0/1\n
"""
# call var where known fp is, where true var is, where nothing is known
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 0/1\n
chr1 4 . G C 20 PASS . GT 1/1\n
chr1 7 . G A 20 PASS . GT 0/1\n
"""
# known locations with NO variant
known_fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 1 . A T 20 PASS . GT ./.\n
chr1 7 . G . 20 PASS . GT 0/0\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr1')
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
known_fp_io = StringIO.StringIO(known_fp_str)
known_fp = Variants(vcf.Reader(known_fp_io),MAX_INDEL_LEN,knownFP=True)
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50,known_fp.on_chrom('chr1'))
self.assertEqual(cvs.num_fp[VARIANT_TYPE.SNP],2) # usual definition, in pred vars but not in true
self.assertEqual(cvs.calls_at_known_fp[VARIANT_TYPE.SNP],1) # call at location known to NOT have SNP
示例7: testGetSeq
def testGetSeq(self):
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr3 2 . TCGA T 20 PASS . GT 1/1\n
chr3 9 . A AAAA 20 PASS . GT 0/1\n
"""
variants = vcf_to_ChromVariants(pred_str,'chr3')
window_tup = (1,13,'chr3')
sequence = _get_seq(window_tup,variants.getAllVariants(),get_reference(),False)
self.assertEqual(sequence[0],'ATTCGAAAATCG')
self.assertEqual(sequence[1],'')
sequence = _get_seq(window_tup,variants.getAllVariants(),get_reference(),True)
self.assertEqual(sequence[0],'ATTCGATCG')
self.assertEqual(sequence[1],'ATCGATCGAAAATCG')
示例8: testChromEvaluateVariantsSV
def testChromEvaluateVariantsSV(self):
#NB: SVs aren't rescued, just checked for within breakpoint tolerance
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
#SV with exact position, exact allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
true_vars = vcf_to_ChromVariants(true_str,'chr1')
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV with exact position, difference allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAATGC 20 PASS . GT 0/1
"""
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV with position within tolerance, exact allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 4 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV outside of tolerance
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 110 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],0)
示例9: testNBaseNormalization
def testNBaseNormalization(self):
vcf_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 2 . A ATCTT 20 PASS . GT 0/1\n
chr4 3 . NN N 20 PASS . GT 0/1\n
"""
norm_iter = normalize(get_reference(),self.getVcf(vcf_str))
self.assertEqual(self.countRecords(norm_iter),1)
示例10: normalizeStringToWriter
def normalizeStringToWriter(self,vcf_str):
vcf_io = StringIO.StringIO(vcf_str)
test_vcf = vcf.Reader(vcf_io)
output_io = StringIO.StringIO()
output_writer = VCFWriter('ref.fasta','name',output_io)
map(lambda r: write(r,output_writer),normalize(get_reference(),test_vcf))
outputStr = output_io.getvalue()
outputStr = outputStr.replace('\n','\n\n')
return vcf.Reader(StringIO.StringIO(outputStr))
示例11: testFullRescue
def testFullRescue(self):
fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 2 . TGC TAT 20 PASS . GT 1/1\n
"""
fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . G A 20 PASS . GT 1/1\n
chr2 4 . C T 20 PASS . GT 1/1\n
"""
fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
rescuer = SequenceRescuer('chr2',2,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
self.assertTrue(rescuer.rescued)
self.assertEqual(rescuer.windowsRescued,(0,0))
fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GC G 20 PASS . GT 1/1\n
chr2 6 . G A 20 PASS . GT 1/1\n
"""
fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . GCCG GCA 20 PASS . GT 1/1\n
"""
fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
rescuer = SequenceRescuer('chr2',3,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
self.assertTrue(rescuer.rescued)
fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 3 . TC T 20 PASS . GT 1/1\n
chr4 8 . C T 20 PASS . GT 1/1\n
"""
fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 4 . C T 20 PASS . GT 1/1\n
chr4 7 . TC T 20 PASS . GT 1/1\n
"""
tp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr4 5 . TC T 20 PASS . GT 1/1\n
"""
fn_vars = vcf_to_ChromVariants(fn_str,'chr4')
fp_vars = vcf_to_ChromVariants(fp_str,'chr4')
tp_vars = vcf_to_ChromVariants(tp_str,'chr4')
rescuer = SequenceRescuer('chr4',3,fn_vars,fp_vars,tp_vars,get_reference(),50)
self.assertTrue(rescuer.rescued)
示例12: testCollidingVariants
def testCollidingVariants(self):
vcf_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 5 . A TGC 20 PASS . GT 1/1\n
chr1 5 . A GGG 20 PASS . GT 1/1\n
"""
norm_iter = normalize(get_reference(),self.getVcf(vcf_str))
count = self.countRecords(norm_iter)
self.assertEqual(count,1)
示例13: testAggregate
def testAggregate(self):
# build two ChromVariantStats objects
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . G A 20 PASS . GT 1/1\n
chr2 5 . C T 20 PASS . GT 1/1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr2 3 . G A 20 PASS . GT 1/1\n
chr2 7 . G C 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr2')
pred_vars = vcf_to_ChromVariants(pred_str,'chr2')
gtdict = _genotype_concordance_dict() # leave empty for now
cvs2 = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr3 3 . G A 20 PASS . GT 1/1\n
chr3 5 . C T 20 PASS . GT 1/1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr3 3 . G A 20 PASS . GT 1/1\n
chr3 4 . T A 20 PASS . GT 1/1\n
chr3 7 . G C 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr3')
pred_vars = vcf_to_ChromVariants(pred_str,'chr3')
cvs3 = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
#cvs5 = ChromVariantStats(true_vars,pred_vars,[31],[49,79],[52],_genotype_concordance_dict())
aggregator,errors = _aggregate([cvs2,cvs3])
# test some sums
self.assertEqual(cvs2.num_true[VARIANT_TYPE.SNP],2)
self.assertEqual(cvs3.num_true[VARIANT_TYPE.SNP],2)
self.assertEqual(aggregator(VARIANT_TYPE.SNP)['num_true'],4)
self.assertEqual(cvs2.num_tp[VARIANT_TYPE.SNP],1)
self.assertEqual(cvs3.num_tp[VARIANT_TYPE.SNP],1)
self.assertEqual(aggregator(VARIANT_TYPE.SNP)['good_predictions'],2)
示例14: testGenotypes
def testGenotypes(self):
# keep genotype info for a compound heterozygous call
vcf_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A C,T 20 PASS . GT 1/2\n
"""
vcf = self.getVcf(vcf_str)
record = normalize(get_reference(),vcf).next()
self.assertEqual(record.samples[0].gt_nums, "1/2")
示例15: testChromEvaluateGenotypeConcordance
def testChromEvaluateGenotypeConcordance(self):
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 0/1\n
chr1 5 . C T 20 PASS . GT 0/1\n
chr1 9 . A G 20 PASS . GT 1/1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 1/1\n
chr1 6 . C G 20 PASS . GT 0/1\n
chr1 9 . A G 20 PASS . GT 1/1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr1')
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.genotype_concordance[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HET][GENOTYPE_TYPE.HOM_VAR],1)
self.assertEqual(cvs.genotype_concordance[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HOM_VAR][GENOTYPE_TYPE.HOM_VAR],1)
# anything other than TP don't get counted in genotype concordance
self.assertEqual(cvs._nrd_counts(VARIANT_TYPE.SNP),(1,2))
# phased variants should be counted correctly
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 0|1\n
chr1 9 . A G 20 PASS . GT 1|1\n
"""
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 2 . A T 20 PASS . GT 1|0\n
chr1 9 . A G 20 PASS . GT 1|1\n
"""
true_vars = vcf_to_ChromVariants(true_str,'chr1')
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.genotype_concordance[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HET][GENOTYPE_TYPE.HET],1)
self.assertEqual(cvs.genotype_concordance[VARIANT_TYPE.SNP][GENOTYPE_TYPE.HOM_VAR][GENOTYPE_TYPE.HOM_VAR],1)
self.assertEqual(cvs._nrd_counts(VARIANT_TYPE.SNP),(0,2))