本文整理汇总了Python中jcvi.formats.bed.Bed类的典型用法代码示例。如果您正苦于以下问题:Python Bed类的具体用法?Python Bed怎么用?Python Bed使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Bed类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare_synteny
def prepare_synteny(tourfile, lastfile, odir, p, opts):
"""
Prepare synteny plots for movie().
"""
qbedfile, sbedfile = get_bed_filenames(lastfile, p, opts)
qbedfile = op.abspath(qbedfile)
sbedfile = op.abspath(sbedfile)
qbed = Bed(qbedfile, sorted=False)
contig_to_beds = dict(qbed.sub_beds())
# Create a separate directory for the subplots and movie
mkdir(odir, overwrite=True)
os.chdir(odir)
logging.debug("Change into subdir `{}`".format(odir))
# Make anchorsfile
anchorsfile = ".".join(op.basename(lastfile).split(".", 2)[:2]) \
+ ".anchors"
fw = open(anchorsfile, "w")
for b in Blast(lastfile):
print >> fw, "\t".join((gene_name(b.query), gene_name(b.subject),
str(int(b.score))))
fw.close()
# Symlink sbed
symlink(sbedfile, op.basename(sbedfile))
return anchorsfile, qbedfile, contig_to_beds
示例2: insertion
def insertion(args):
"""
%prog insertion mic.mac.bed
Find IES based on mapping MIC reads to MAC genome. Output a bedfile with
'lesions' (stack of broken reads) in the MAC genome.
"""
p = OptionParser(insertion.__doc__)
p.add_option("--mindepth", default=6, type="int",
help="Minimum depth to call an insertion")
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
bedfile, = args
mindepth = opts.mindepth
bed = Bed(bedfile)
fw = must_open(opts.outfile, "w")
for seqid, feats in bed.sub_beds():
left_ends = Counter([x.start for x in feats])
right_ends = Counter([x.end for x in feats])
selected = []
for le, count in left_ends.items():
if count >= mindepth:
selected.append((seqid, le, "LE-{0}".format(le), count))
for re, count in right_ends.items():
if count >= mindepth:
selected.append((seqid, re, "RE-{0}".format(re), count))
selected.sort()
for seqid, pos, label, count in selected:
label = "{0}-r{1}".format(label, count)
print >> fw, "\t".join((seqid, str(pos - 1), str(pos), label))
示例3: nucmer
def nucmer(args):
"""
%prog nucmer mappings.bed MTR.fasta assembly.fasta chr1 3
Select specific chromosome region based on MTR mapping. The above command
will extract chr1:2,000,001-3,000,000.
"""
p = OptionParser(nucmer.__doc__)
opts, args = p.parse_args(args)
if len(args) != 5:
sys.exit(not p.print_help())
mapbed, mtrfasta, asmfasta, chr, idx = args
idx = int(idx)
m1 = 1000000
bedfile = "sample.bed"
bed = Bed()
bed.add("\t".join(str(x) for x in (chr, (idx - 1) * m1, idx * m1)))
bed.print_to_file(bedfile)
cmd = "intersectBed -a {0} -b {1} -nonamecheck -sorted | cut -f4".format(mapbed, bedfile)
idsfile = "query.ids"
sh(cmd, outfile=idsfile)
sfasta = fastaFromBed(bedfile, mtrfasta)
qfasta = "query.fasta"
cmd = "faSomeRecords {0} {1} {2}".format(asmfasta, idsfile, qfasta)
sh(cmd)
cmd = "nucmer {0} {1}".format(sfasta, qfasta)
sh(cmd)
mummerplot_main(["out.delta", "--refcov=0"])
sh("mv out.pdf {0}.{1}.pdf".format(chr, idx))
示例4: bed
def bed(args):
"""
%prog bed anchorsfile
Convert ANCHORS file to BED format.
"""
from collections import defaultdict
from jcvi.compara.synteny import AnchorFile, check_beds
from jcvi.formats.bed import Bed
from jcvi.formats.base import get_number
p = OptionParser(bed.__doc__)
p.add_option("--switch", default=False, action="store_true",
help="Switch reference and aligned map elements")
p.add_option("--scale", type="float",
help="Scale the aligned map distance by factor")
p.set_beds()
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
anchorsfile, = args
switch = opts.switch
scale = opts.scale
ac = AnchorFile(anchorsfile)
pairs = defaultdict(list)
for a, b, block_id in ac.iter_pairs():
pairs[a].append(b)
qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts)
bd = Bed()
for q in qbed:
qseqid, qstart, qend, qaccn = q.seqid, q.start, q.end, q.accn
if qaccn not in pairs:
continue
for s in pairs[qaccn]:
si, s = sorder[s]
sseqid, sstart, send, saccn = s.seqid, s.start, s.end, s.accn
if switch:
qseqid, sseqid = sseqid, qseqid
qstart, sstart = sstart, qstart
qend, send = send, qend
qaccn, saccn = saccn, qaccn
if scale:
sstart /= scale
try:
newsseqid = get_number(sseqid)
except ValueError:
raise ValueError, "`{0}` is on `{1}` with no number to extract".\
format(saccn, sseqid)
bedline = "\t".join(str(x) for x in (qseqid, qstart - 1, qend,
"{0}:{1}".format(newsseqid, sstart)))
bd.add(bedline)
bd.print_to_file(filename=opts.outfile, sorted=True)
示例5: scaffold
def scaffold(args):
"""
%prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed
physicalmap.blast physicalmap.sizes physicalmap.bed
As evaluation of scaffolding, visualize external line of evidences:
* Plot synteny to an external genome
* Plot alignments to physical map
* Plot alignments to genetic map (TODO)
Each trio defines one panel to be plotted. blastfile defines the matchings
between the evidences vs scaffolds. Then the evidence sizes, and evidence
bed to plot dot plots.
This script will plot a dot in the dot plot in the corresponding location
the plots are one contig/scaffold per plot.
"""
from jcvi.graphics.base import set_image_options
from jcvi.utils.iter import grouper
p = OptionParser(scaffold.__doc__)
p.add_option("--cutoff", type="int", default=1000000,
help="Plot scaffolds with size larger than [default: %default]")
p.add_option("--highlights",
help="A set of regions in BED format to highlight [default: %default]")
opts, args, iopts = set_image_options(p, args, figsize="14x8", dpi=150)
if len(args) < 4 or len(args) % 3 != 1:
sys.exit(not p.print_help())
highlights = opts.highlights
scafsizes = Sizes(args[0])
trios = list(grouper(3, args[1:]))
trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios]
if highlights:
hlbed = Bed(highlights)
for scaffoldID, scafsize in scafsizes.iter_sizes():
if scafsize < opts.cutoff:
continue
logging.debug("Loading {0} (size={1})".format(scaffoldID,
thousands(scafsize)))
tmpname = scaffoldID + ".sizes"
tmp = open(tmpname, "w")
tmp.write("{0}\t{1}".format(scaffoldID, scafsize))
tmp.close()
tmpsizes = Sizes(tmpname)
tmpsizes.close(clean=True)
if highlights:
subhighlights = list(hlbed.sub_bed(scaffoldID))
imagename = ".".join((scaffoldID, opts.format))
plot_one_scaffold(scaffoldID, tmpsizes, None, trios, imagename, iopts,
highlights=subhighlights)
示例6: frombed
def frombed(args):
"""
%prog frombed bedfile contigfasta readfasta
Convert read placement to contig format. This is useful before running BAMBUS.
"""
from jcvi.formats.fasta import Fasta
from jcvi.formats.bed import Bed
from jcvi.utils.cbook import fill
p = OptionParser(frombed.__doc__)
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
bedfile, contigfasta, readfasta = args
prefix = bedfile.rsplit(".", 1)[0]
contigfile = prefix + ".contig"
idsfile = prefix + ".ids"
contigfasta = Fasta(contigfasta)
readfasta = Fasta(readfasta)
bed = Bed(bedfile)
checksum = "00000000 checksum."
fw_ids = open(idsfile, "w")
fw = open(contigfile, "w")
for ctg, reads in bed.sub_beds():
ctgseq = contigfasta[ctg]
ctgline = "##{0} {1} {2} bases, {3}".format(\
ctg, len(reads), len(ctgseq), checksum)
print >> fw_ids, ctg
print >> fw, ctgline
print >> fw, fill(ctgseq.seq)
for b in reads:
read = b.accn
strand = b.strand
readseq = readfasta[read]
rc = " [RC]" if strand == "-" else ""
readlen = len(readseq)
rstart, rend = 1, readlen
if strand == "-":
rstart, rend = rend, rstart
readrange = "{{{0} {1}}}".format(rstart, rend)
conrange = "<{0} {1}>".format(b.start, b.end)
readline = "#{0}(0){1} {2} bases, {3} {4} {5}".format(\
read, rc, readlen, checksum, readrange, conrange)
print >> fw, readline
print >> fw, fill(readseq.seq)
logging.debug("Mapped contigs written to `{0}`.".format(contigfile))
logging.debug("Contig IDs written to `{0}`.".format(idsfile))
示例7: write_lst
def write_lst(bedfile):
pf = op.basename(bedfile).split(".")[0]
mkdir(pf)
bed = Bed(bedfile)
stanza = []
for seqid, bs in bed.sub_beds():
fname = op.join(pf, "{0}.lst".format(seqid))
fw = open(fname, "w")
for b in bs:
print >> fw, "{0}{1}".format(b.accn.replace(" ", ""), b.strand)
stanza.append((seqid, fname))
fw.close()
return pf, stanza
示例8: tips
def tips(args):
"""
%prog tips patchers.bed complements.bed original.fasta backbone.fasta
Append telomeric sequences based on patchers and complements.
"""
p = OptionParser(tips.__doc__)
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
pbedfile, cbedfile, sizesfile, bbfasta = args
pbed = Bed(pbedfile, sorted=False)
cbed = Bed(cbedfile, sorted=False)
complements = dict()
for object, beds in groupby(cbed, key=lambda x: x.seqid):
beds = list(beds)
complements[object] = beds
sizes = Sizes(sizesfile).mapping
bbsizes = Sizes(bbfasta).mapping
tbeds = []
for object, beds in groupby(pbed, key=lambda x: x.accn):
beds = list(beds)
startbed, endbed = beds[0], beds[-1]
start_id, end_id = startbed.seqid, endbed.seqid
if startbed.start == 1:
start_id = None
if endbed.end == sizes[end_id]:
end_id = None
print >> sys.stderr, object, start_id, end_id
if start_id:
b = complements[start_id][0]
b.accn = object
tbeds.append(b)
tbeds.append(BedLine("\t".join(str(x) for x in \
(object, 0, bbsizes[object], object, 1000, "+"))))
if end_id:
b = complements[end_id][-1]
b.accn = object
tbeds.append(b)
tbed = Bed()
tbed.extend(tbeds)
tbedfile = "tips.bed"
tbed.print_to_file(tbedfile)
示例9: liftover
def liftover(args):
"""
%prog liftover agpfile bedfile
Given coordinates in components, convert to the coordinates in chromosomes.
"""
p = OptionParser(liftover.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Prepend prefix to accn names [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(p.print_help())
agpfile, bedfile = args
agp = AGP(agpfile).order
bed = Bed(bedfile)
newbed = Bed()
for b in bed:
component = b.seqid
if component not in agp:
newbed.append(b)
continue
i, a = agp[component]
assert a.component_beg < a.component_end
arange = a.component_beg, a.component_end
assert b.start < b.end
brange = b.start, b.end
st = range_intersect(arange, brange)
if not st:
continue
start, end = st
assert start <= end
if a.orientation == '-':
d = a.object_end + a.component_beg
s, t = d - end, d - start
else:
d = a.object_beg - a.component_beg
s, t = d + start, d + end
name = b.accn.replace(" ", "_")
if opts.prefix:
name = component + "_" + name
bline = "\t".join(str(x) for x in (a.object, s - 1, t, name))
newbed.append(BedLine(bline))
newbed.sort(key=newbed.nullkey)
newbed.print_to_file()
示例10: merge
def merge(args):
"""
%prog merge map1 map2 map3 ...
Convert csv maps to bed format.
Each input map is csv formatted, for example:
ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition
scaffold_2707,11508,1,0
scaffold_2707,11525,1,1.2
scaffold_759,81336,1,9.7
"""
p = OptionParser(merge.__doc__)
p.add_option("-w", "--weightsfile", default="weights.txt",
help="Write weights to file")
p.set_outfile("out.bed")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
maps = args
outfile = opts.outfile
fp = must_open(maps)
b = Bed()
mapnames = set()
for row in fp:
mapname = fp.filename().split(".")[0]
mapnames.add(mapname)
try:
m = CSVMapLine(row, mapname=mapname)
if m.cm < 0:
logging.error("Ignore marker with negative genetic distance")
print >> sys.stderr, row.strip()
else:
b.append(BedLine(m.bedline))
except (IndexError, ValueError): # header or mal-formed line
continue
b.print_to_file(filename=outfile, sorted=True)
logging.debug("A total of {0} markers written to `{1}`.".\
format(len(b), outfile))
assert len(maps) == len(mapnames), "You have a collision in map names"
write_weightsfile(mapnames, weightsfile=opts.weightsfile)
示例11: patcher
def patcher(args):
"""
%prog patcher backbone.bed other.bed
Given optical map alignment, prepare the patchers. Use --backbone to suggest
which assembly is the major one, and the patchers will be extracted from
another assembly.
"""
from jcvi.formats.bed import uniq
p = OptionParser(patcher.__doc__)
p.add_option("--backbone", default="OM",
help="Prefix of the backbone assembly [default: %default]")
p.add_option("--object", default="object",
help="New object name [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
backbonebed, otherbed = args
backbonebed = uniq([backbonebed])
otherbed = uniq([otherbed])
pf = backbonebed.split(".")[0]
key = lambda x: (x.seqid, x.start, x.end)
# Make a uniq bed keeping backbone at redundant intervals
cmd = "intersectBed -v -wa"
cmd += " -a {0} -b {1}".format(otherbed, backbonebed)
outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed
sh(cmd, outfile=outfile)
uniqbed = Bed()
uniqbedfile = pf + ".merged.bed"
uniqbed.extend(Bed(backbonebed))
uniqbed.extend(Bed(outfile))
uniqbed.print_to_file(uniqbedfile, sorted=True)
# Condense adjacent intervals, allow some chaining
bed = uniqbed
key = lambda x: range_parse(x.accn).seqid
bed_fn = pf + ".patchers.bed"
bed_fw = open(bed_fn, "w")
for k, sb in groupby(bed, key=key):
sb = list(sb)
chr, start, end, strand = merge_ranges(sb)
print >> bed_fw, "\t".join(str(x) for x in \
(chr, start, end, opts.object, 1000, strand))
bed_fw.close()
示例12: breakpoint
def breakpoint(args):
"""
%prog breakpoint blastfile bedfile
Identify breakpoints where collinearity ends. `blastfile` contains mapping
from markers (query) to scaffolds (subject). `bedfile` contains marker
locations in the related species.
"""
from jcvi.formats.blast import bed
from jcvi.utils.range import range_interleave
p = OptionParser(breakpoint.__doc__)
p.add_option("--xdist", type="int", default=20,
help="xdist (in related genome) cutoff [default: %default]")
p.add_option("--ydist", type="int", default=200000,
help="ydist (in current genome) cutoff [default: %default]")
p.add_option("-n", type="int", default=5,
help="number of markers in a block [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
blastfile, bedfile = args
order = Bed(bedfile).order
blastbedfile = bed([blastfile])
bbed = Bed(blastbedfile)
key = lambda x: x[1]
for scaffold, bs in bbed.sub_beds():
blocks = get_blocks(scaffold, bs, order,
xdist=opts.xdist, ydist=opts.ydist, N=opts.n)
sblocks = []
for block in blocks:
xx, yy = zip(*block)
sblocks.append((scaffold, min(yy), max(yy)))
iblocks = range_interleave(sblocks)
for ib in iblocks:
ch, start, end = ib
print "{0}\t{1}\t{2}".format(ch, start - 1, end)
示例13: eject
def eject(args):
"""
%prog eject candidates.bed chr.fasta
Eject scaffolds from assembly, using the range identified by closest().
"""
p = OptionParser(eject.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
candidates, chrfasta = args
sizesfile = Sizes(chrfasta).filename
cbedfile = complementBed(candidates, sizesfile)
cbed = Bed(cbedfile)
for b in cbed:
b.accn = b.seqid
b.score = 1000
b.strand = '+'
cbed.print_to_file()
示例14: mergebed
def mergebed(args):
"""
%prog mergebed map1.bed map2.bed map3.bed ...
Combine bed maps to bed format, adding the map name.
"""
p = OptionParser(mergebed.__doc__)
p.add_option("-w", "--weightsfile", default="weights.txt",
help="Write weights to file")
p.set_outfile("out.bed")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
maps = args
outfile = opts.outfile
fp = must_open(maps)
b = Bed()
mapnames = set()
for row in fp:
mapname = fp.filename().split(".")[0]
mapnames.add(mapname)
try:
m = BedLine(row)
m.accn = "{0}-{1}".format(mapname, m.accn)
m.extra = ["{0}:{1}".format(m.seqid, m.start)]
b.append(m)
except (IndexError, ValueError): # header or mal-formed line
continue
b.print_to_file(filename=outfile, sorted=True)
logging.debug("A total of {0} markers written to `{1}`.".\
format(len(b), outfile))
assert len(maps) == len(mapnames), "You have a collision in map names"
write_weightsfile(mapnames, weightsfile=opts.weightsfile)
示例15: bed
def bed(args):
'''
%prog bed gff_file [--options]
Parses the start, stop locations of the selected features out of GFF and
generate a bed file
'''
p = OptionParser(bed.__doc__)
p.add_option("--type", dest="type", default="gene",
help="Feature type to extract, use comma for multiple [default: %default]")
p.add_option("--key", dest="key", default="ID",
help="Key in the attributes to extract [default: %default]")
set_outfile(p)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
gffile, = args
key = opts.key
if key == "None":
key = None
type = set(x.strip() for x in opts.type.split(","))
gff = Gff(gffile, key=key)
b = Bed()
for g in gff:
if g.type not in type:
continue
b.append(g.bedline)
b.sort(key=b.key)
b.print_to_file(opts.outfile)