本文整理匯總了Python中svtools.vcf.file.Vcf類的典型用法代碼示例。如果您正苦於以下問題:Python Vcf類的具體用法?Python Vcf怎麽用?Python Vcf使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Vcf類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_duplicate_sample
def test_duplicate_sample(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20090805',
'##source=myImputationProgramV3.1',
'##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta',
'##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>',
'##phasing=partial',
'##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">',
'##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">',
'##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">',
'##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">',
'##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">',
'##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">',
'##ALT=<ID=DEL,Description="DELETION">',
'##FILTER=<ID=q10,Description="Quality below 10">',
'##FILTER=<ID=s50,Description="Less than 50% of samples have data">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">',
'##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">',
'##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00001']
v = Vcf()
with self.assertRaises(SystemExit):
v.add_header(header_lines)
示例2: TestGenotype
class TestGenotype(TestCase):
def setUp(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20151202',
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878' ]
self.vcf = Vcf()
self.vcf.add_header(header_lines)
self.variant_line = '1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:SU 0/0:9'
self.variant = Variant(self.variant_line.split('\t'), self.vcf)
def test_set_format(self):
g = Genotype(self.variant, '0/1')
self.assertFalse('INACTIVE' in self.variant.active_formats)
g.set_format('INACTIVE', 10)
self.assertEqual(g.format['INACTIVE'], 10)
self.assertTrue('INACTIVE' in self.variant.active_formats)
def test_get_format(self):
g = Genotype(self.variant, '0/1')
g.set_format('INACTIVE', 10)
self.assertEqual(g.get_format('INACTIVE'), 10)
def test_get_gt_string(self):
g = Genotype(self.variant, '0/1')
g.set_format('INACTIVE', 10)
self.assertEqual(g.get_gt_string(), '0/1:.:10')
示例3: test_add_genotype
def test_add_genotype(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20151202',
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878' ]
vcf = Vcf()
vcf.add_header(header_lines)
variant_line = '1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG SU 9'
variant = Variant(variant_line.split('\t'), vcf)
self.assertEqual(variant.get_gt_string(), './.:9')
示例4: VCFReader
class VCFReader(object):
def __init__(self, stream):
self.vcf_obj = Vcf()
self.stream = stream
header = list()
for line in stream:
if line[0] != '#':
raise RuntimeError('Error parsing VCF header. Line is not a header line. {}'.format(line))
header.append(line)
if line.startswith('#CHROM\t'):
# end of header
break
self.vcf_obj.add_header(header)
def __iter__(self):
for line in self.stream:
yield Variant(line.rstrip().split('\t'), self.vcf_obj)
示例5: test_all
def test_all(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20090805',
'##source=myImputationProgramV3.1',
'##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta',
'##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>',
'##phasing=partial',
'##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">',
'##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">',
'##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">',
'##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">',
'##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">',
'##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">',
'##FILTER=<ID=q10,Description="Quality below 10">',
'##FILTER=<ID=s50,Description="Less than 50% of samples have data">',
'##ALT=<ID=DEL,Description="DELETION">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">',
'##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">',
'##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003']
v = Vcf()
v.add_header(header_lines)
expected_header_lines = header_lines[:2] + header_lines[3:4] + header_lines[6:12] + header_lines[14:]
expected_header_lines[1] = '##fileDate=' + time.strftime('%Y%m%d')
self.assertEqual(v.get_header(), '\n'.join(expected_header_lines))
v.add_sample('ScottPilgrim')
self.assertEqual(v.sample_to_col('ScottPilgrim'), 12)
示例6: TestVariant
class TestVariant(TestCase):
def setUp(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20151202',
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878' ]
self.vcf = Vcf()
self.vcf.add_header(header_lines)
self.variant_line = '1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:SU 0/0:9'
self.variant = Variant(self.variant_line.split('\t'), self.vcf)
def test_set_info(self):
self.variant.set_info('SVTYPE', 'INV')
self.assertEqual(self.variant.info['SVTYPE'], 'INV')
self.variant.set_info('IMAFLAG', False)
self.assertEqual(self.variant.info['IMAFLAG'], False)
with self.assertRaises(SystemExit) as cm:
self.variant.set_info('SUPER', True)
def test_get_info(self):
self.assertEqual(self.variant.get_info('IMAFLAG'), True)
self.assertEqual(self.variant.get_info('SVTYPE'), 'BND')
with self.assertRaises(KeyError) as cm:
self.variant.get_info('CALI')
def test_get_info_string(self):
self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9;IMAFLAG')
self.variant.set_info('IMAFLAG', False)
self.assertEqual(self.variant.get_info_string(), 'SVTYPE=BND;STRANDS=-+:9')
def test_get_format_string(self):
self.assertEqual(self.variant.get_format_string(), 'GT:SU')
def test_genotype(self):
self.assertEqual(self.variant.genotype('NA12878').get_gt_string(), '0/0:9')
def test_var_string(self):
self.assertEqual(self.variant.get_var_string(), self.variant_line)
示例7: __init__
def __init__(self, stream):
self.vcf_obj = Vcf()
self.stream = stream
header = list()
for line in stream:
if line[0] != '#':
raise RuntimeError('Error parsing VCF header. Line is not a header line. {}'.format(line))
header.append(line)
if line.startswith('#CHROM\t'):
# end of header
break
self.vcf_obj.add_header(header)
示例8: test_var_string_format_caching
def test_var_string_format_caching(self):
header_lines = [
"##fileformat=VCFv4.2",
"##fileDate=20151202",
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=AS,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878",
]
vcf = Vcf()
vcf.add_header(header_lines)
variant_line = "1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:AS:SU 0/0:1:9"
uncached_line = "1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:SU:AS 0/0:9:1"
variant = Variant(variant_line.split("\t"), vcf)
gt = variant.genotypes() # force parsing
self.assertEqual(variant.get_var_string(), uncached_line)
self.assertEqual(variant.get_var_string(use_cached_gt_string=True), variant_line)
示例9: bedpeToVcf
def bedpeToVcf(bedpe_file, vcf_out):
myvcf = Vcf()
converter = BedpeToVcfConverter(myvcf)
in_header = True
# parse the bedpe data
header = list()
for line in bedpe_file:
if in_header:
if line[0:2] == '##':
header.append(line)
continue
elif line[0] == '#' and line[1] != '#':
sample_list_str = line.rstrip().split('\t', 20)[-1]
header.append('\t'.join([
'#CHROM',
'POS',
'ID',
'REF',
'ALT',
'QUAL',
'FILTER',
'INFO',
sample_list_str
] ))
continue
else:
in_header = False
myvcf.add_header(header)
myvcf.file_format='VCFv4.2'
vcf_out.write(myvcf.get_header() + '\n')
#
bedpe = Bedpe(line.rstrip().split('\t'))
variants = converter.convert(bedpe)
for v in variants:
vcf_out.write(v.get_var_string() + '\n')
# close the VCF output file
vcf_out.close()
return
示例10: setUp
def setUp(self):
self.converter = VcfToBedpeConverter()
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20090805',
'##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta',
'##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">',
'##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">',
'##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001']
self.vcf = Vcf()
self.vcf.add_header(header_lines)
示例11: setUp
def setUp(self):
header_lines = [
'##fileformat=VCFv4.2',
'##fileDate=20151202',
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
'#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA0001' ]
self.vcf = Vcf()
self.vcf.add_header(header_lines)
self.variant_line = '1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:SU 0/0:9 1/1:15'
self.variant = Variant(self.variant_line.split('\t'), self.vcf)
示例12: run_gt_refine
def run_gt_refine(vcf_in, vcf_out, diag_outfile, gender_file):
vcf = Vcf()
header = []
in_header = True
sex={}
for line in gender_file:
v = line.rstrip().split('\t')
sex[v[0]] = int(v[1])
outf=open(diag_outfile, 'w', 4096)
ct=1
for line in vcf_in:
if in_header:
if line[0] == "#":
header.append(line)
continue
else:
in_header = False
vcf.add_header(header)
vcf.add_info('SIL_GT_AVG', '1', 'Float', 'Average silhouette of genotype clusters')
#vcf.add_format('SIL_GT', '1', 'Float', 'Per-sample genotype cluster silhouette')
vcf_out.write(vcf.get_header() + '\n')
var = Variant(line.rstrip().split('\t'), vcf)
df=load_df(var, sex)
df1=get_silhouette(df)
sil_avg=df1.iloc[0, df1.columns.get_loc('sil_gt_avg')]
#sil_ind=df1.loc[:, 'sil_gt']
var.info['SIL_GT_AVG'] = '%0.2f' % sil_avg
vcf_out.write(var.get_var_string(use_cached_gt_string=True) + '\n')
if ct==1:
df1.to_csv(outf, header=True)
ct += 1
else:
df1.to_csv(outf, header=False)
vcf_out.close()
vcf_in.close()
outf.close()
gender_file.close()
return
示例13: write_copynumber
def write_copynumber(vcf_file, sample, vcf_out, cn_list):
#go through the VCF and add the read depth annotations
in_header = True
header = []
vcf = Vcf()
i = 0
s_index = -1
for line in vcf_file:
if in_header:
if line[0] == '#' and line[1] == '#':
header.append(line)
continue
if line[0] == '#' and line[1] != '#':
try:
s_index = line.rstrip().split('\t').index(sample)
except ValueError:
sys.stderr.write("Please input valid VCF, format field for " + sample + " not found in VCF")
sys.exit(1)
line = '\t'.join(map(str, line.rstrip().split('\t')[:9] + [sample]))
header.append(line)
continue
else:
in_header = False
vcf.add_header(header)
vcf.add_format('CN', 1, 'Float', 'Copy number of structural variant segment.')
vcf_out.write(vcf.get_header() + '\n')
v = line.rstrip().split('\t')
# XXX Is this second check necessary? Wouldn't this be handled above? Missing header would hit this?
if s_index == -1:
sys.stderr.write("Input a valid sample name: " + sample + " not found in a provided VCF")
sys.exit(1)
v = v[:9] + [v[s_index]]
if not any("SVTYPE=BND" in s for s in v):
if "CN" not in v[8]:
v[8] = v[8] + ":CN"
v[9] = v[9] + ":" + str(cn_list[i])
else:
cn_index = v[8].rstrip().split(":").index("CN")
gts = v[9].rstrip().split(":")
gts[cn_index] = str(cn_list[i])
v[9] = ":".join(gts)
i += 1
# write the VCF
vcf_out.write('\t'.join(v) + '\n')
vcf_out.close()
return
示例14: TestVariant
class TestVariant(TestCase):
def setUp(self):
header_lines = [
"##fileformat=VCFv4.2",
"##fileDate=20151202",
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA0001",
]
self.vcf = Vcf()
self.vcf.add_header(header_lines)
self.variant_line = (
"1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:SU 0/0:9 1/1:15"
)
self.variant = Variant(self.variant_line.split("\t"), self.vcf)
def test_parse_genotypes(self):
genotype_field_strings = ["0/1:20", "0/0:15"]
parsed_dict = self.variant._parse_genotypes(genotype_field_strings)
na12878_gt = Genotype(self.variant, genotype_field_strings[0].split(":"))
na0001_gt = Genotype(self.variant, genotype_field_strings[1].split(":"))
expected_genotype_dict = {"NA12878": na12878_gt, "NA0001": na0001_gt}
self.assertEqual(parsed_dict, expected_genotype_dict)
def test_set_info(self):
self.variant.set_info("SVTYPE", "INV")
self.assertEqual(self.variant.info["SVTYPE"], "INV")
self.variant.set_info("IMAFLAG", False)
self.assertEqual(self.variant.info["IMAFLAG"], False)
with self.assertRaises(SystemExit) as cm:
self.variant.set_info("SUPER", True)
def test_get_info(self):
self.assertEqual(self.variant.get_info("IMAFLAG"), True)
self.assertEqual(self.variant.get_info("SVTYPE"), "BND")
with self.assertRaises(KeyError) as cm:
self.variant.get_info("CALI")
def test_get_info_string(self):
self.assertEqual(self.variant.get_info_string(), "SVTYPE=BND;STRANDS=-+:9;IMAFLAG")
self.variant.set_info("IMAFLAG", False)
self.assertEqual(self.variant.get_info_string(), "SVTYPE=BND;STRANDS=-+:9")
def test_get_format_string(self):
self.assertEqual(self.variant.get_format_string(), "GT:SU")
def test_get_format_string_caching(self):
header_lines = [
"##fileformat=VCFv4.2",
"##fileDate=20151202",
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">',
'##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">',
'##INFO=<ID=IMAFLAG,Number=.,Type=Flag,Description="Test Flag code">',
'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
'##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=AS,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">',
'##FORMAT=<ID=INACTIVE,Number=1,Type=Integer,Description="A format not in use">',
"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878",
]
vcf = Vcf()
vcf.add_header(header_lines)
variant_line = "1 820915 5838_1 N ]GL000232.1:20940]N 0.00 . SVTYPE=BND;STRANDS=-+:9;IMAFLAG GT:AS:SU 0/0:1:9"
variant = Variant(variant_line.split("\t"), vcf)
self.assertEqual(variant.get_format_string(), "GT:AS:SU")
gts = variant.genotypes()
self.assertEqual(variant.get_format_string(), "GT:SU:AS")
self.assertEqual(variant.get_format_string(True), "GT:AS:SU")
def test_get_gt_string(self):
self.assertEqual(self.variant.get_gt_string(), "0/0:9 1/1:15")
def test_genotype(self):
self.assertEqual(self.variant.genotype("NA12878").get_gt_string(), "0/0:9")
def test_set_genotype(self):
new_genotype = Genotype(self.variant, ["0/1", "9"])
self.variant.set_genotype("NA12878", new_genotype)
self.assertEqual(self.variant.genotype("NA12878").get_gt_string(), "0/1:9")
def test_genotypes(self):
self.assertEqual([x.get_gt_string() for x in self.variant.genotypes()], ["0/0:9", "1/1:15"])
def test_var_string(self):
self.assertEqual(self.variant.get_var_string(), self.variant_line)
self.variant.genotype("NA12878").set_format("GT", "./.")
self.assertEqual(self.variant.get_var_string(use_cached_gt_string=True), self.variant_line)
self.assertNotEqual(self.variant.get_var_string(), self.variant_line)
def test_var_string_format_caching(self):
header_lines = [
"##fileformat=VCFv4.2",
"##fileDate=20151202",
#.........這裏部分代碼省略.........
示例15: bedpeToVcf
def bedpeToVcf(bedpe_file, vcf_out):
myvcf = Vcf()
in_header = True
# parse the bedpe data
header = list()
for line in bedpe_file:
if in_header:
if line[0:2] == '##':
header.append(line)
continue
elif line[0] == '#' and line[1] != '#':
sample_list_str = line.rstrip().split('\t', 14)[-1]
header.append('\t'.join([
'#CHROM',
'POS',
'ID',
'REF',
'ALT',
'QUAL',
'FILTER',
'INFO',
sample_list_str
] ))
continue
else:
in_header = False
myvcf.add_header(header)
myvcf.file_format='VCFv4.2'
vcf_out.write(myvcf.get_header() + '\n')
#
bedpe = Bedpe(line.rstrip().split('\t'))
if bedpe.svtype == 'BND':
bedpe1_list = [
bedpe.c1,
bedpe.b1 + 1,
bedpe.name + '_1', #ID
'N',
'<' + str(bedpe.svtype) + '>', #ALT
bedpe.score,
bedpe.filter
]
bedpe1_list.extend(bedpe.misc)
var1 = Variant(bedpe1_list, myvcf)
if bedpe.o1 == '+':
if bedpe.o2 == '-':
var1.alt = '%s[%s:%s[' % (var1.ref, bedpe.c2, bedpe.b2 + 1)
elif bedpe.o2 == '+':
var1.alt = '%s]%s:%s]' % (var1.ref, bedpe.c2, bedpe.b2 + 1)
elif bedpe.o1 == '-':
if bedpe.o2 == '+':
var1.alt = ']%s:%s]%s' % (bedpe.c2, bedpe.b2 + 1, var1.ref)
elif bedpe.o2 == '-':
var1.alt = '[%s:%s[%s' % (bedpe.c2, bedpe.b2 + 1, var1.ref)
misc = copy.deepcopy(bedpe.misc)
strands = re.split('=|:',''.join(filter(lambda x: 'STRANDS=' in x, bedpe.misc[0].split(";"))))
strands_str = str(strands[0]) + '=' + str(strands[1][::-1]) + ':' + str(strands[2])
misc[0]=misc[0].replace(''.join(filter(lambda x: 'STRANDS=' in x, bedpe.misc[0].split(";"))), strands_str)
#add the cipos ciend,cipos95 and ciend95 variables
misc[0]=misc[0].replace(''.join(filter(lambda x: 'CIPOS=' in x, bedpe.misc[0].split(";"))),'CIPOS='+ re.split('=',''.join(filter(lambda x: 'CIEND=' in x, bedpe.misc[0].split(";"))))[1])
misc[0]=misc[0].replace(''.join(filter(lambda x: 'CIEND=' in x, bedpe.misc[0].split(";"))),'CIEND='+ re.split('=',''.join(filter(lambda x: 'CIPOS=' in x, bedpe.misc[0].split(";"))))[1])
misc[0]=misc[0].replace(''.join(filter(lambda x: 'CIPOS95=' in x, bedpe.misc[0].split(";"))),'CIPOS95='+ re.split('=',''.join(filter(lambda x: 'CIEND95=' in x, bedpe.misc[0].split(";"))))[1])
misc[0]=misc[0].replace(''.join(filter(lambda x: 'CIEND95=' in x, bedpe.misc[0].split(";"))),'CIEND95='+ re.split('=',''.join(filter(lambda x: 'CIPOS95=' in x, bedpe.misc[0].split(";"))))[1])
#Change MATEID
misc[0]= misc[0].replace(''.join(filter(lambda x: 'MATEID=' in x, bedpe.misc[0].split(";"))),'MATEID=' + bedpe.name + '_2')
#ADD IDENTIFIER FOR SECONDARY BREAKEND MATE
misc[0]=misc[0].replace(''.join(filter(lambda x: 'EVENT=' in x, bedpe.misc[0].split(";"))),''.join(filter(lambda x: 'EVENT=' in x, bedpe.misc[0].split(";"))) + ';SECONDARY;')
bedpe2_list = [
bedpe.c2, #chrom1
bedpe.b2 + 1,
bedpe.name + '_2', #ID
'N',
'<' + str(bedpe.svtype) + '>', #ALT
bedpe.score,
bedpe.filter
]
bedpe2_list.extend(misc)
var2 = Variant(bedpe2_list, myvcf)
# add the strands field. For variant 2 must switch the order
if bedpe.o2 == '+':
if bedpe.o1 == '-':
var2.alt = '%s[%s:%s[' % (var2.ref, bedpe.c1, bedpe.b1 + 1)
elif bedpe.o1 == '+':
var2.alt = '%s]%s:%s]' % (var2.ref, bedpe.c1, bedpe.b1 + 1)
elif bedpe.o2 == '-':
if bedpe.o1 == '+':
var2.alt = ']%s:%s]%s' % (bedpe.c1, bedpe.b1 + 1, var2.ref)
elif bedpe.o1 == '-':
var2.alt = '[%s:%s[%s' % (bedpe.c1, bedpe.b1 + 1, var2.ref)
if bedpe.malformedFlag == 0:
vcf_out.write(var1.get_var_string() + '\n')
vcf_out.write(var2.get_var_string() + '\n')
elif bedpe.malformedFlag == 1:
vcf_out.write(var2.get_var_string() + '\n')
elif bedpe.malformedFlag == 2:
vcf_out.write(var1.get_var_string() + '\n')
else:
# set VCF info elements for simple events
bedpe_list = [
#.........這裏部分代碼省略.........