本文整理汇总了Python中jcvi.formats.bed.Bed.sub_beds方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.sub_beds方法的具体用法?Python Bed.sub_beds怎么用?Python Bed.sub_beds使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.formats.bed.Bed
的用法示例。
在下文中一共展示了Bed.sub_beds方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: insertion
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def insertion(args):
"""
%prog insertion mic.mac.bed
Find IES based on mapping MIC reads to MAC genome. Output a bedfile with
'lesions' (stack of broken reads) in the MAC genome.
"""
p = OptionParser(insertion.__doc__)
p.add_option("--mindepth", default=6, type="int",
help="Minimum depth to call an insertion")
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
bedfile, = args
mindepth = opts.mindepth
bed = Bed(bedfile)
fw = must_open(opts.outfile, "w")
for seqid, feats in bed.sub_beds():
left_ends = Counter([x.start for x in feats])
right_ends = Counter([x.end for x in feats])
selected = []
for le, count in left_ends.items():
if count >= mindepth:
selected.append((seqid, le, "LE-{0}".format(le), count))
for re, count in right_ends.items():
if count >= mindepth:
selected.append((seqid, re, "RE-{0}".format(re), count))
selected.sort()
for seqid, pos, label, count in selected:
label = "{0}-r{1}".format(label, count)
print >> fw, "\t".join((seqid, str(pos - 1), str(pos), label))
示例2: prepare_synteny
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def prepare_synteny(tourfile, lastfile, odir, p, opts):
"""
Prepare synteny plots for movie().
"""
qbedfile, sbedfile = get_bed_filenames(lastfile, p, opts)
qbedfile = op.abspath(qbedfile)
sbedfile = op.abspath(sbedfile)
qbed = Bed(qbedfile, sorted=False)
contig_to_beds = dict(qbed.sub_beds())
# Create a separate directory for the subplots and movie
mkdir(odir, overwrite=True)
os.chdir(odir)
logging.debug("Change into subdir `{}`".format(odir))
# Make anchorsfile
anchorsfile = ".".join(op.basename(lastfile).split(".", 2)[:2]) \
+ ".anchors"
fw = open(anchorsfile, "w")
for b in Blast(lastfile):
print >> fw, "\t".join((gene_name(b.query), gene_name(b.subject),
str(int(b.score))))
fw.close()
# Symlink sbed
symlink(sbedfile, op.basename(sbedfile))
return anchorsfile, qbedfile, contig_to_beds
示例3: frombed
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def frombed(args):
"""
%prog frombed bedfile contigfasta readfasta
Convert read placement to contig format. This is useful before running BAMBUS.
"""
from jcvi.formats.fasta import Fasta
from jcvi.formats.bed import Bed
from jcvi.utils.cbook import fill
p = OptionParser(frombed.__doc__)
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
bedfile, contigfasta, readfasta = args
prefix = bedfile.rsplit(".", 1)[0]
contigfile = prefix + ".contig"
idsfile = prefix + ".ids"
contigfasta = Fasta(contigfasta)
readfasta = Fasta(readfasta)
bed = Bed(bedfile)
checksum = "00000000 checksum."
fw_ids = open(idsfile, "w")
fw = open(contigfile, "w")
for ctg, reads in bed.sub_beds():
ctgseq = contigfasta[ctg]
ctgline = "##{0} {1} {2} bases, {3}".format(\
ctg, len(reads), len(ctgseq), checksum)
print >> fw_ids, ctg
print >> fw, ctgline
print >> fw, fill(ctgseq.seq)
for b in reads:
read = b.accn
strand = b.strand
readseq = readfasta[read]
rc = " [RC]" if strand == "-" else ""
readlen = len(readseq)
rstart, rend = 1, readlen
if strand == "-":
rstart, rend = rend, rstart
readrange = "{{{0} {1}}}".format(rstart, rend)
conrange = "<{0} {1}>".format(b.start, b.end)
readline = "#{0}(0){1} {2} bases, {3} {4} {5}".format(\
read, rc, readlen, checksum, readrange, conrange)
print >> fw, readline
print >> fw, fill(readseq.seq)
logging.debug("Mapped contigs written to `{0}`.".format(contigfile))
logging.debug("Contig IDs written to `{0}`.".format(idsfile))
示例4: write_lst
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def write_lst(bedfile):
pf = op.basename(bedfile).split(".")[0]
mkdir(pf)
bed = Bed(bedfile)
stanza = []
for seqid, bs in bed.sub_beds():
fname = op.join(pf, "{0}.lst".format(seqid))
fw = open(fname, "w")
for b in bs:
print >> fw, "{0}{1}".format(b.accn.replace(" ", ""), b.strand)
stanza.append((seqid, fname))
fw.close()
return pf, stanza
示例5: breakpoint
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def breakpoint(args):
"""
%prog breakpoint blastfile bedfile
Identify breakpoints where collinearity ends. `blastfile` contains mapping
from markers (query) to scaffolds (subject). `bedfile` contains marker
locations in the related species.
"""
from jcvi.formats.blast import bed
from jcvi.utils.range import range_interleave
p = OptionParser(breakpoint.__doc__)
p.add_option("--xdist", type="int", default=20,
help="xdist (in related genome) cutoff [default: %default]")
p.add_option("--ydist", type="int", default=200000,
help="ydist (in current genome) cutoff [default: %default]")
p.add_option("-n", type="int", default=5,
help="number of markers in a block [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
blastfile, bedfile = args
order = Bed(bedfile).order
blastbedfile = bed([blastfile])
bbed = Bed(blastbedfile)
key = lambda x: x[1]
for scaffold, bs in bbed.sub_beds():
blocks = get_blocks(scaffold, bs, order,
xdist=opts.xdist, ydist=opts.ydist, N=opts.n)
sblocks = []
for block in blocks:
xx, yy = zip(*block)
sblocks.append((scaffold, min(yy), max(yy)))
iblocks = range_interleave(sblocks)
for ib in iblocks:
ch, start, end = ib
print "{0}\t{1}\t{2}".format(ch, start - 1, end)
示例6: annotate
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
#.........这里部分代码省略.........
valid_resolve_choices = ["alignment", "overlap"]
p = OptionParser(annotate.__doc__)
p.add_option("--resolve", default="alignment", choices=valid_resolve_choices,
help="Resolve ID assignment based on a certain metric" \
+ " [default: %default]")
p.add_option("--atg_name", default=False, action="store_true",
help="Specify is locus IDs in `new.bed` file follow ATG nomenclature" \
+ " [default: %default]")
g1 = OptionGroup(p, "Optional parameters (alignment):\n" \
+ "Use if resolving ambiguities based on sequence `alignment`")
g1.add_option("--pid", dest="pid", default=35., type="float",
help="Percent identity cutoff [default: %default]")
g1.add_option("--score", dest="score", default=250., type="float",
help="Alignment score cutoff [default: %default]")
p.add_option_group(g1)
g2 = OptionGroup(p, "Optional parameters (overlap):\n" \
+ "Use if resolving ambiguities based on `overlap` length\n" \
+ "Parameters equivalent to `intersectBed`")
g2.add_option("-f", dest="f", default=0.5, type="float",
help="Minimum overlap fraction (0.0 - 1.0) [default: %default]")
g2.add_option("-r", dest="r", default=False, action="store_true",
help="Require fraction overlap to be reciprocal [default: %default]")
g2.add_option("-s", dest="s", default=True, action="store_true",
help="Require same strandedness [default: %default]")
p.add_option_group(g2)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
nbedfile, obedfile = args
npf, opf = nbedfile.rsplit(".", 1)[0], obedfile.rsplit(".", 1)[0]
# Make consolidated.bed
cbedfile = "consolidated.bed"
if not os.path.isfile(cbedfile):
consolidate(nbedfile, obedfile, cbedfile)
else:
logging.warning("`{0}` already exists. Skipping step".format(cbedfile))
logging.warning("Resolving ID assignment ambiguity based on `{0}`".\
format(opts.resolve))
if opts.resolve == "alignment":
# Get pairs and prompt to run needle
pairsfile = "nw.pairs"
scoresfile = "nw.scores"
if not os.path.isfile(pairsfile):
get_pairs(cbedfile, pairsfile)
else:
logging.warning("`{0}` already exists. Checking for needle output".\
format(pairsfile))
# If needle scores do not exist, prompt user to run needle
if not os.path.isfile(scoresfile):
logging.error("`{0}` does not exist. Please process {1} using `needle`".\
format(scoresfile, pairsfile))
sys.exit()
else:
scoresfile = "ovl.scores"
# Calculate overlap length using intersectBed
calculate_ovl(nbedfile, obedfile, opts, scoresfile)
logging.warning("`{0}' exists. Storing scores in memory".\
format(scoresfile))
scores = read_scores(scoresfile, opts)
# Iterate through consolidated bed and
# filter piles based on score
abedline = {}
cbed = Bed(cbedfile)
g = Grouper()
for c in cbed:
accn = c.accn
g.join(*accn.split(";"))
nbedline = {}
nbed = Bed(nbedfile)
for line in nbed: nbedline[line.accn] = line
splits = set()
for chr, chrbed in nbed.sub_beds():
abedline, splits = annotate_chr(chr, chrbed, g, scores, nbedline, abedline, opts, splits)
if splits is not None:
abedline = process_splits(splits, scores, nbedline, abedline)
abedfile = npf + ".annotated.bed"
afh = open(abedfile, "w")
for accn in abedline:
print >> afh, abedline[accn]
afh.close()
sort([abedfile, "-i"])
示例7: renumber
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def renumber(args):
"""
%prog renumber Mt35.consolidated.bed > tagged.bed
Renumber genes for annotation updates.
"""
from jcvi.algorithms.lis import longest_increasing_subsequence
from jcvi.utils.grouper import Grouper
p = OptionParser(renumber.__doc__)
p.set_annot_reformat_opts()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
bedfile, = args
pf = bedfile.rsplit(".", 1)[0]
abedfile = pf + ".a.bed"
bbedfile = pf + ".b.bed"
if need_update(bedfile, (abedfile, bbedfile)):
prepare(bedfile)
mbed = Bed(bbedfile)
g = Grouper()
for s in mbed:
accn = s.accn
g.join(*accn.split(";"))
bed = Bed(abedfile)
for chr, sbed in bed.sub_beds():
current_chr = chr_number(chr)
if not current_chr:
continue
ranks = []
gg = set()
for s in sbed:
accn = s.accn
achr, arank = atg_name(accn)
if achr != current_chr:
continue
ranks.append(arank)
gg.add(accn)
lranks = longest_increasing_subsequence(ranks)
print >> sys.stderr, current_chr, len(sbed), "==>", len(ranks), \
"==>", len(lranks)
granks = set(gene_name(current_chr, x, prefix=opts.prefix, \
pad0=opts.pad0, uc=opts.uc) for x in lranks) | \
set(gene_name(current_chr, x, prefix=opts.prefix, \
pad0=opts.pad0, sep="te", uc=opts.uc) for x in lranks)
tagstore = {}
for s in sbed:
achr, arank = atg_name(s.accn)
accn = s.accn
if accn in granks:
tag = (accn, FRAME)
elif accn in gg:
tag = (accn, RETAIN)
else:
tag = (".", NEW)
tagstore[accn] = tag
# Find cases where genes overlap
for s in sbed:
accn = s.accn
gaccn = g[accn]
tags = [((tagstore[x][-1] if x in tagstore else NEW), x) for x in gaccn]
group = [(PRIORITY.index(tag), x) for tag, x in tags]
best = min(group)[-1]
if accn != best:
tag = (best, OVERLAP)
else:
tag = tagstore[accn]
print "\t".join((str(s), "|".join(tag)))
示例8: instantiate
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def instantiate(args):
"""
%prog instantiate tagged.bed blacklist.ids big_gaps.bed
instantiate NEW genes tagged by renumber.
"""
p = OptionParser(instantiate.__doc__)
p.set_annot_reformat_opts()
p.add_option("--extended_stride", default=False, action="store_true",
help="Toggle extended strides for gene numbering")
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
taggedbed, blacklist, gapsbed = args
r = NameRegister(prefix=opts.prefix, pad0=opts.pad0, uc=opts.uc)
r.get_blacklist(blacklist)
r.get_gaps(gapsbed)
# Run through the bed, identify stretch of NEW ids to instantiate,
# identify the flanking FRAMEs, interpolate!
bed = Bed(taggedbed)
outputbed = taggedbed.rsplit(".", 1)[0] + ".new.bed"
fw = open(outputbed, "w")
tagkey = lambda x: x.rsplit("|", 1)[-1]
for chr, sbed in bed.sub_beds():
current_chr = chr_number(chr)
if not current_chr:
continue
sbed = list(sbed)
ranks = []
for i, s in enumerate(sbed):
nametag = s.extra[0]
tag = tagkey(nametag)
if tag in (NEW, FRAME):
ranks.append((i, nametag))
blocks = []
for tag, names in groupby(ranks, key=lambda x: tagkey(x[-1])):
names = list(names)
if tag == NEW:
blocks.append((tag, [sbed[x[0]] for x in names]))
else:
start, end = names[0][-1], names[-1][-1]
start, end = atg_name(start, retval="rank"), atg_name(end, retval="rank")
blocks.append((tag, [start, end]))
id_table = {} # old to new name conversion
for i, (tag, info) in enumerate(blocks):
if tag != NEW:
continue
start_id = 0 if i == 0 else blocks[i - 1][1][-1]
end_id = start_id + 10000 if i == len(blocks) -1 \
else blocks[i + 1][1][0]
r.allocate(info, chr, start_id, end_id, id_table, extended_stride=opts.extended_stride)
# Output new names
for i, s in enumerate(sbed):
nametag = s.extra[0]
name, tag = nametag.split("|")
if tag == NEW:
assert name == '.'
name = id_table[s.accn]
elif tag == OVERLAP:
if name in id_table:
name = id_table[name]
s.extra[0] = "|".join((name, tag))
print >> fw, s
fw.close()
示例9: mask
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def mask(args):
"""
%prog mask agpfile bedfile
Mask given ranges in componets to gaps.
"""
p = OptionParser(mask.__doc__)
p.add_option("--split", default=False, action="store_true",
help="Split object and create new names [default: %default]")
p.add_option("--log", default=False, action="store_true",
help="Write verbose logs to .masklog file [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(p.print_help())
agpfile, bedfile = args
agp = AGP(agpfile)
bed = Bed(bedfile)
simple_agp = agp.order
# agp lines to replace original ones, keyed by the component
agp_fixes = defaultdict(list)
newagpfile = agpfile.replace(".agp", ".masked.agp")
logfile = bedfile.replace(".bed", ".masklog")
fw = open(newagpfile, "w")
if opts.log:
fwlog = open(logfile, "w")
for component, intervals in bed.sub_beds():
if opts.log:
print >> fwlog, "\n".join(str(x) for x in intervals)
i, a = simple_agp[component]
object = a.object
component_span = a.component_span
orientation = a.orientation
if opts.log:
print >> fwlog, a
assert a.component_beg, a.component_end
arange = a.component_beg, a.component_end
# Make sure `ivs` contain DISJOINT ranges, and located within `arange`
ivs = []
for i in intervals:
iv = range_intersect(arange, (i.start, i.end))
if iv is not None:
ivs.append(iv)
# Sort the ends of `ivs` as well as the arange
arange = a.component_beg - 1, a.component_end + 1
endpoints = sorted(flatten(ivs + [arange]))
# reverse if component on negative strand
if orientation == '-':
endpoints.reverse()
sum_of_spans = 0
# assign complements as sequence components
for i, (a, b) in enumerate(pairwise(endpoints)):
if orientation == '-':
a, b = b, a
if orientation not in ('+', '-'):
orientation = '+'
oid = object + "_{0}".format(i / 2) if opts.split else object
aline = [oid, 0, 0, 0]
if i % 2 == 0:
cspan = b - a - 1
aline += ['D', component, a + 1, b - 1, orientation]
is_gap = False
else:
cspan = b - a + 1
aline += ["N", cspan, "fragment", "yes"]
is_gap = True
if cspan <= 0:
continue
sum_of_spans += cspan
aline = "\t".join(str(x) for x in aline)
if not (opts.split and is_gap):
agp_fixes[component].append(aline)
if opts.log:
print >> fwlog, aline
assert component_span == sum_of_spans
if opts.log:
print >> fwlog
# Finally write the masked agp
for a in agp:
if not a.is_gap and a.component_id in agp_fixes:
print >> fw, "\n".join(agp_fixes[a.component_id])
else:
print >> fw, a
fw.close()
# Reindex
idxagpfile = reindex([newagpfile])
shutil.move(idxagpfile, newagpfile)
#.........这里部分代码省略.........
示例10: cut
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def cut(args):
"""
%prog cut agpfile bedfile
Cut at the boundaries of the ranges in the bedfile. Use --shrink to control
the exact boundaries where you cut.
"""
p = OptionParser(cut.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
agpfile, bedfile = args
agp = AGP(agpfile)
bed = Bed(bedfile)
simple_agp = agp.order
newagpfile = agpfile.replace(".agp", ".cut.agp")
fw = open(newagpfile, "w")
agp_fixes = defaultdict(list)
for component, intervals in bed.sub_beds():
i, a = simple_agp[component]
object = a.object
component_span = a.component_span
orientation = a.orientation
assert a.component_beg, a.component_end
arange = a.component_beg, a.component_end
cuts = set()
for i in intervals:
start, end = i.start, i.end
end -= 1
assert start <= end
cuts.add(start)
cuts.add(end)
cuts.add(0)
cuts.add(component_span)
cuts = list(sorted(cuts))
sum_of_spans = 0
for i, (a, b) in enumerate(pairwise(cuts)):
oid = object + "_{0}".format(i)
aline = [oid, 0, 0, 0]
cspan = b - a
aline += ['D', component, a + 1, b, orientation]
sum_of_spans += cspan
aline = "\t".join(str(x) for x in aline)
agp_fixes[component].append(aline)
assert component_span == sum_of_spans
# Finally write the masked agp
for a in agp:
if not a.is_gap and a.component_id in agp_fixes:
print >> fw, "\n".join(agp_fixes[a.component_id])
else:
print >> fw, a
fw.close()
# Reindex
idxagpfile = reindex([newagpfile])
shutil.move(idxagpfile, newagpfile)
return newagpfile
示例11: scaffold
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import sub_beds [as 别名]
def scaffold(args):
"""
%prog scaffold ctgfasta agpfile
Build scaffolds based on ordering in the AGP file.
"""
from jcvi.formats.agp import bed, order_to_agp, build
from jcvi.formats.bed import Bed
p = OptionParser(scaffold.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Keep IDs with same prefix together [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
ctgfasta, agpfile = args
sizes = Sizes(ctgfasta).mapping
pf = ctgfasta.rsplit(".", 1)[0]
phasefile = pf + ".phases"
fwphase = open(phasefile, "w")
newagpfile = pf + ".new.agp"
fwagp = open(newagpfile, "w")
scaffoldbuckets = defaultdict(list)
bedfile = bed([agpfile, "--nogaps", "--outfile=tmp"])
bb = Bed(bedfile)
for s, partialorder in bb.sub_beds():
name = partialorder[0].accn
bname = name.rsplit("_", 1)[0] if opts.prefix else s
scaffoldbuckets[bname].append([(b.accn, b.strand) for b in partialorder])
# Now the buckets contain a mixture of singletons and partially resolved
# scaffolds. Print the scaffolds first then remaining singletons.
for bname, scaffolds in sorted(scaffoldbuckets.items()):
ctgorder = []
singletons = set()
for scaf in sorted(scaffolds):
for node, orientation in scaf:
ctgorder.append((node, orientation))
if len(scaf) == 1:
singletons.add(node)
nscaffolds = len(scaffolds)
nsingletons = len(singletons)
if nsingletons == 1 and nscaffolds == 0:
phase = 3
elif nsingletons == 0 and nscaffolds == 1:
phase = 2
else:
phase = 1
msg = "{0}: Scaffolds={1} Singletons={2} Phase={3}".\
format(bname, nscaffolds, nsingletons, phase)
print >> sys.stderr, msg
print >> fwphase, "\t".join((bname, str(phase)))
order_to_agp(bname, ctgorder, sizes, fwagp)
fwagp.close()
os.remove(bedfile)
fastafile = "final.fasta"
build([newagpfile, ctgfasta, fastafile])
tidy([fastafile])