本文整理汇总了Python中rgt.GenomicRegionSet.GenomicRegionSet.write_bed方法的典型用法代码示例。如果您正苦于以下问题:Python GenomicRegionSet.write_bed方法的具体用法?Python GenomicRegionSet.write_bed怎么用?Python GenomicRegionSet.write_bed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rgt.GenomicRegionSet.GenomicRegionSet
的用法示例。
在下文中一共展示了GenomicRegionSet.write_bed方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_file
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
def create_file(self):
# Expanding summits
tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
tfbs_summit_regions.read_bed(self.tfbs_summit_fname)
for region in iter(tfbs_summit_regions):
summit = int(region.data.split()[-1]) + region.initial
region.initial = max(summit - (self.peak_ext / 2), 0)
region.final = summit + (self.peak_ext / 2)
# Calculating intersections
mpbs_regions = GenomicRegionSet("MPBS Regions")
mpbs_regions.read_bed(self.mpbs_fname)
tfbs_summit_regions.sort()
mpbs_regions.sort()
with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
tfbs_regions = GenomicRegionSet("TFBS Regions")
for region in iter(with_overlap_regions):
region.name = region.name.split(":")[0] + ":Y"
tfbs_regions.add(region)
for region in iter(without_overlap_regions):
region.name = region.name.split(":")[0] + ":N"
tfbs_regions.add(region)
tfbs_regions.sort()
tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
tfbs_regions.write_bed(tfbs_fname)
示例2: merge_DBD_regions
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
def merge_DBD_regions(path):
"""Merge all available DBD regions in BED format. """
for t in os.listdir(path):
if os.path.isdir(os.path.join(path, t)):
dbd_pool = GenomicRegionSet(t)
for rna in os.listdir(os.path.join(path,t)):
f = os.path.join(path, t, rna, "DBD_"+rna+".bed")
if os.path.exists(f):
dbd = GenomicRegionSet(rna)
dbd.read_bed(f)
for r in dbd: r.name = rna+"_"+r.name
dbd_pool.combine(dbd)
dbd_pool.write_bed(os.path.join(path, t, "DBD_"+t+".bed"))
示例3: MultipleSeqAlignment
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
new_alignment = MultipleSeqAlignment(records=seqs)
#print(len(new_alignment))
AlignIO.write(new_alignment, "mm9_"+rg.name+".fa", "fasta")
process = subprocess.Popen(["/home/joseph/Apps/PhyloCSF/PhyloCSF",
"29mammals",
"mm9_"+rg.name+".fa",
"--removeRefGaps",
"--strategy=omega",
"--orf=StopStop3",
"--minCodons=25",
"--frames=3"],
stdout=subprocess.PIPE)
out, err = process.communicate()
print(out)
#print(out.split("\t")[2])
#print(out.split("\t")[3])
#print(out.split("\t")[4])
data = rg.data.split("\t")
score = out.split("\t")[2]
rg.data = "\t".join([score] + data[1:])
bed.write_bed(args.o)
# 29/9/2015
# python /projects/reg-gen/tools/phylocsf_check.py -i /projects/ig440396_dendriticcells/exp/RNASeq/expression/isofroms/deseq/new_bed/all_TCONs.bed -o all_TCONS_phyloCSF.bed -organism mm9 -mafdir /data/genome/mm9/multiz30way/maf/
示例4: dbd_regions
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
#.........这里部分代码省略.........
#print("2")
#print("Subtract: "+str(subtract))
if dbdstart > exon[1]: dbdstart, exon[1] = exon[1], dbdstart
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=exon[1],
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
elif rbs.initial < cf and rbs.final <= tail:
#print("3")
dbdstart = exon[2]
dbdend = exon[2] - rbs.final + rbs.initial + subtract
if dbdstart > dbdend: dbdstart, dbdend = dbdend, dbdstart
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(cf)+"-"+str(rbs.final)+"_split2" ) )
dbdmap[str(rbs)] = dbd[-2].toString() + " & " + dbd[-1].toString() + " strand:-"
loop = False
break
elif rbs.initial > tail:
pass
cf += l
loop = False
else:
while loop:
cf = 0
for exon in exons:
#print(exon)
l = exon[2] - exon[1]
tail = cf + l
#print("cf: " + str(cf))
#print("tail: " + str(tail) )
if cf <= rbs.initial <= tail:
dbdstart = exon[1] + rbs.initial - cf
if rbs.final <= tail:
#print("1")
dbdend = exon[1] + rbs.final -cf
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.final) ) )
dbdmap[str(rbs)] = dbd[-1].toString() + " strand:+"
loop = False
break
elif rbs.final > tail:
subtract = l + cf - rbs.initial
#print("2")
#print("Subtract: "+str(subtract))
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=exon[2],
orientation=exons[0][3],
name=str(rbs.initial)+"-"+str(rbs.initial+subtract)+"_split1" ) )
elif rbs.initial < cf and rbs.final <= tail:
#print("3")
dbdstart = exon[1]
dbdend = exon[1] + rbs.final - rbs.initial - subtract
dbd.add( GenomicRegion(chrom=exons[0][0],
initial=dbdstart, final=dbdend,
orientation=exons[0][3],
name=str(cf)+"-"+str(rbs.final)+"_split2" ) )
dbdmap[str(rbs)] = dbd[-2].toString() + " & " + dbd[-1].toString() + " strand:+"
loop = False
break
elif rbs.initial > tail:
pass
cf += l
loop = False
if not out_file:
dbd.write_bed(filename=os.path.join(output, "DBD_"+rna_name+".bed"))
else:
# print(dbd)
# print(dbd.sequences[0])
dbd.write_bed(filename=output)
# FASTA
if fasta:
#print(dbdmap)
if not out_file:
seq = pysam.Fastafile(os.path.join(output,"rna_temp.fa"))
fasta_f = os.path.join(output, "DBD_"+rna_name+".fa")
else:
seq = pysam.Fastafile(os.path.join(temp,"rna_temp.fa"))
fasta_f = output+".fa"
with open(fasta_f, 'w') as fasta:
for rbs in sig_region:
print(">"+ rna_name +":"+str(rbs.initial)+"-"+str(rbs.final), file=fasta)
s = seq.fetch(rbs.chrom, max(0, rbs.initial), rbs.final)
for ss in [s[i:i + 80] for i in range(0, len(s), 80)]:
print(ss, file=fasta)
示例5: print
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
elif args.mode == "bed_extend":
print("input:\t" + args.i)
print("output:\t" + args.o)
bed = GenomicRegionSet("bed")
bed.read_bed(args.i)
for region in bed:
if args.oz:
if region.initial == region.final:
region.final += args.l
else:
if args.both:
region.initial -= args.l
else: pass
region.final += args.l
bed.write_bed(args.o)
############### BED get promoters #########################################
elif args.mode == "bed_get_promoters":
print("input:\t" + args.i)
print("output:\t" + args.o)
print("organism:\t" + args.organism)
gene = GenomicRegionSet("genes")
### Input BED file
if args.i.endswith(".bed"):
gene.read_bed(args.i)
promoter = GenomicRegionSet("promoter")
promoterLength = int(args.l)
for s in gene:
示例6: __init__
# 需要导入模块: from rgt.GenomicRegionSet import GenomicRegionSet [as 别名]
# 或者: from rgt.GenomicRegionSet.GenomicRegionSet import write_bed [as 别名]
class RandomTest:
def __init__(self, rna_fasta, rna_name, dna_region, organism, showdbs=False):
self.organism = organism
genome = GenomeData(organism)
self.genome_path = genome.get_genome()
# RNA: Path to the FASTA file
self.rna_fasta = rna_fasta
self.showdbs = showdbs
rnas = SequenceSet(name="rna", seq_type=SequenceType.RNA)
rnas.read_fasta(self.rna_fasta)
if rna_name:
self.rna_name = rna_name
else:
self.rna_name = rnas[0].name
# DNA: GenomicRegionSet
self.dna_region = GenomicRegionSet(name="target")
self.dna_region.read_bed(dna_region)
self.dna_region = self.dna_region.gene_association(organism=self.organism, show_dis=True)
self.topDBD = []
self.stat = OrderedDict(name=rna_name, genome=organism)
self.stat["target_regions"] = str(len(self.dna_region))
def get_rna_region_str(self, rna):
"""Getting the rna region from the information header with the pattern:
REGION_chr3_51978050_51983935_-_"""
self.rna_regions = get_rna_region_str(rna)
if self.rna_regions and len(self.rna_regions[0]) == 5:
self.rna_expression = float(self.rna_regions[0][-1])
else:
self.rna_expression = "n.a."
def connect_rna(self, rna, temp):
d = connect_rna(rna, temp, self.rna_name)
self.stat["exons"] = str(d[0])
self.stat["seq_length"] = str(d[1])
self.rna_len = d[1]
def target_dna(self, temp, remove_temp, cutoff, l, e, c, fr, fm, of, mf, par, obed=False):
"""Calculate the true counts of triplexes on the given dna regions"""
self.triplexator_p = [ l, e, c, fr, fm, of, mf ]
txp = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"), dna_region=self.dna_region,
temp=temp, organism=self.organism, remove_temp=remove_temp,
l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, par=par, genome_path=self.genome_path,
prefix="targeted_region", dna_fine_posi=False)
txp.merge_rbs(rm_duplicate=True, region_set=self.dna_region, asgene_organism=self.organism, cutoff=cutoff)
self.txp = txp
self.stat["DBSs_target_all"] = str(len(self.txp))
txp.remove_duplicates()
self.rbss = txp.merged_dict.keys()
# if len(self.rbss) == 0:
# print("ERROR: No potential binding event. Please change the parameters.")
# sys.exit(1)
txpf = find_triplex(rna_fasta=os.path.join(temp, "rna_temp.fa"), dna_region=self.dna_region,
temp=temp, organism=self.organism, remove_temp=remove_temp,
l=l, e=e, c=c, fr=fr, fm=fm, of=of, mf=mf, par=par, genome_path=self.genome_path,
prefix="dbs", dna_fine_posi=True)
txpf.remove_duplicates()
txpf.merge_rbs(rbss=self.rbss, rm_duplicate=True, asgene_organism=self.organism)
self.txpf = txpf
self.stat["DBSs_target_all"] = str(len(self.txpf))
self.counts_tr = OrderedDict()
self.counts_dbs = OrderedDict()
for rbs in self.rbss:
tr = len(self.txp.merged_dict[rbs])
self.counts_tr[rbs] = [tr, len(self.dna_region) - tr]
self.counts_dbs[rbs] = len(self.txpf.merged_dict[rbs])
self.region_dbd = self.txpf.sort_rbs_by_regions(self.dna_region)
self.region_dbs = self.txpf.sort_rd_by_regions(regionset=self.dna_region)
self.region_dbsm = {}
self.region_coverage = {}
for region in self.dna_region:
self.region_dbsm[region.toString()] = self.region_dbs[region.toString()].get_dbs().merge(w_return=True)
self.region_coverage[region.toString()] = float(self.region_dbsm[region.toString()].total_coverage()) / len \
(region)
self.stat["target_regions"] = str(len(self.dna_region))
if obed:
# btr = self.txp.get_dbs()
# btr = btr.gene_association(organism=self.organism, show_dis=True)
# btr.write_bed(os.path.join(temp, obed + "_target_region_dbs.bed"))
# dbss = txpf.get_dbs()
# dbss.write_bed(os.path.join(temp, obed + "_dbss.bed"))
# output = self.dna_region.gene_association(organism=self.organism, show_dis=True)
self.txp.write_bed(filename=os.path.join(temp, obed + "_target_region_dbs.bed"),
dbd_tag=False,
remove_duplicates=False, associated=self.organism)
#.........这里部分代码省略.........