本文整理汇总了Python中jcvi.formats.bed.Bed.extend方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.extend方法的具体用法?Python Bed.extend怎么用?Python Bed.extend使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.formats.bed.Bed
的用法示例。
在下文中一共展示了Bed.extend方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: patcher
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def patcher(args):
"""
%prog patcher backbone.bed other.bed
Given optical map alignment, prepare the patchers. Use --backbone to suggest
which assembly is the major one, and the patchers will be extracted from
another assembly.
"""
from jcvi.formats.bed import uniq
p = OptionParser(patcher.__doc__)
p.add_option("--backbone", default="OM",
help="Prefix of the backbone assembly [default: %default]")
p.add_option("--object", default="object",
help="New object name [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
backbonebed, otherbed = args
backbonebed = uniq([backbonebed])
otherbed = uniq([otherbed])
pf = backbonebed.split(".")[0]
key = lambda x: (x.seqid, x.start, x.end)
# Make a uniq bed keeping backbone at redundant intervals
cmd = "intersectBed -v -wa"
cmd += " -a {0} -b {1}".format(otherbed, backbonebed)
outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed
sh(cmd, outfile=outfile)
uniqbed = Bed()
uniqbedfile = pf + ".merged.bed"
uniqbed.extend(Bed(backbonebed))
uniqbed.extend(Bed(outfile))
uniqbed.print_to_file(uniqbedfile, sorted=True)
# Condense adjacent intervals, allow some chaining
bed = uniqbed
key = lambda x: range_parse(x.accn).seqid
bed_fn = pf + ".patchers.bed"
bed_fw = open(bed_fn, "w")
for k, sb in groupby(bed, key=key):
sb = list(sb)
chr, start, end, strand = merge_ranges(sb)
print >> bed_fw, "\t".join(str(x) for x in \
(chr, start, end, opts.object, 1000, strand))
bed_fw.close()
示例2: tips
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def tips(args):
"""
%prog tips patchers.bed complements.bed original.fasta backbone.fasta
Append telomeric sequences based on patchers and complements.
"""
p = OptionParser(tips.__doc__)
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
pbedfile, cbedfile, sizesfile, bbfasta = args
pbed = Bed(pbedfile, sorted=False)
cbed = Bed(cbedfile, sorted=False)
complements = dict()
for object, beds in groupby(cbed, key=lambda x: x.seqid):
beds = list(beds)
complements[object] = beds
sizes = Sizes(sizesfile).mapping
bbsizes = Sizes(bbfasta).mapping
tbeds = []
for object, beds in groupby(pbed, key=lambda x: x.accn):
beds = list(beds)
startbed, endbed = beds[0], beds[-1]
start_id, end_id = startbed.seqid, endbed.seqid
if startbed.start == 1:
start_id = None
if endbed.end == sizes[end_id]:
end_id = None
print >> sys.stderr, object, start_id, end_id
if start_id:
b = complements[start_id][0]
b.accn = object
tbeds.append(b)
tbeds.append(BedLine("\t".join(str(x) for x in \
(object, 0, bbsizes[object], object, 1000, "+"))))
if end_id:
b = complements[end_id][-1]
b.accn = object
tbeds.append(b)
tbed = Bed()
tbed.extend(tbeds)
tbedfile = "tips.bed"
tbed.print_to_file(tbedfile)
示例3: install
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def install(args):
"""
%prog install patchers.bed patchers.fasta backbone.fasta alt.fasta
Install patches into backbone, using sequences from alternative assembly.
The patches sequences are generated via jcvi.assembly.patch.fill().
The output is a bedfile that can be converted to AGP using
jcvi.formats.agp.frombed().
"""
from jcvi.apps.align import blast
from jcvi.formats.fasta import SeqIO
p = OptionParser(install.__doc__)
p.set_rclip(rclip=1)
p.add_option("--maxsize", default=300000, type="int",
help="Maximum size of patchers to be replaced [default: %default]")
p.add_option("--prefix", help="Prefix of the new object [default: %default]")
p.add_option("--strict", default=False, action="store_true",
help="Only update if replacement has no gaps [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
pbed, pfasta, bbfasta, altfasta = args
maxsize = opts.maxsize # Max DNA size to replace gap
rclip = opts.rclip
blastfile = blast([altfasta, pfasta,"--wordsize=100", "--pctid=99"])
order = Bed(pbed).order
beforebed, afterbed = blast_to_twobeds(blastfile, order, rclip=rclip,
maxsize=maxsize)
beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True)
afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True)
# Exclude the replacements that contain more Ns than before
ah = SeqIO.parse(beforefasta, "fasta")
bh = SeqIO.parse(afterfasta, "fasta")
count_Ns = lambda x: x.seq.count('n') + x.seq.count('N')
exclude = set()
for arec, brec in zip(ah, bh):
an = count_Ns(arec)
bn = count_Ns(brec)
if opts.strict:
if bn == 0:
continue
elif bn < an:
continue
id = arec.id
exclude.add(id)
logging.debug("Ignore {0} updates because of decreasing quality."\
.format(len(exclude)))
abed = Bed(beforebed, sorted=False)
bbed = Bed(afterbed, sorted=False)
abed = [x for x in abed if x.accn not in exclude]
bbed = [x for x in bbed if x.accn not in exclude]
abedfile = "before.filtered.bed"
bbedfile = "after.filtered.bed"
afbed = Bed()
afbed.extend(abed)
bfbed = Bed()
bfbed.extend(bbed)
afbed.print_to_file(abedfile)
bfbed.print_to_file(bbedfile)
shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
示例4: shuffle_twobeds
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def shuffle_twobeds(afbed, bfbed, bbfasta, prefix=None):
# Shuffle the two bedfiles together
sz = Sizes(bbfasta)
sizes = sz.mapping
shuffled = "shuffled.bed"
border = bfbed.order
all = []
afbed.sort(key=afbed.nullkey)
totalids = len(sizes)
pad = int(math.log10(totalids)) + 1
cj = 0
seen = set()
accn = lambda x: "{0}{1:0{2}d}".format(prefix, x, pad)
for seqid, aa in afbed.sub_beds():
cj += 1
abeds, bbeds, beds = [], [], []
size = sizes[seqid]
ranges = [(x.seqid, x.start, x.end) for x in aa]
cranges = range_interleave(ranges, sizes={seqid: size}, empty=True)
for crange in cranges:
if crange:
seqid, start, end = crange
bedline = "\t".join(str(x) for x in (seqid, start - 1, end))
abeds.append(BedLine(bedline))
else:
abeds.append(None)
for a in aa:
gapid = a.accn
bi, b = border[gapid]
if a.strand == '-':
b.extra[1] = b.strand = ('-' if b.strand == '+' else '+')
bbeds.append(b)
n_abeds = len(abeds)
n_bbeds = len(bbeds)
assert n_abeds - n_bbeds == 1, \
"abeds: {0}, bbeds: {1}".format(n_abeds, n_bbeds)
beds = [x for x in roundrobin(abeds, bbeds) if x]
if prefix:
for b in beds:
b.accn = accn(cj)
all.extend(beds)
seen.add(seqid)
# Singletons
for seqid, size in sz.iter_sizes():
if seqid in seen:
continue
bedline = "\t".join(str(x) for x in (seqid, 0, size, accn(cj)))
b = BedLine(bedline)
cj += 1
if prefix:
b.accn = accn(cj)
all.append(b)
shuffledbed = Bed()
shuffledbed.extend(all)
shuffledbed.print_to_file(shuffled)
return shuffledbed
示例5: bambus
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def bambus(args):
"""
%prog bambus bambus.bed bambus.mates total.fasta
Insert unplaced scaffolds based on mates.
"""
from jcvi.utils.iter import pairwise
from jcvi.formats.posmap import MatesFile
p = OptionParser(bambus.__doc__)
p.add_option("--prefix", default="scaffold",
help="Prefix of the unplaced scaffolds [default: %default]")
p.add_option("--minlinks", default=3, type="int",
help="Minimum number of links to place [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
bedfile, matesfile, fastafile = args
pf = matesfile.rsplit(".", 1)[0]
logfile = pf + ".log"
log = open(logfile, "w")
mf = MatesFile(matesfile)
maxdist = max(x.max for x in mf.libraries.values())
logging.debug("Max separation: {0}".format(maxdist))
prefix = opts.prefix
minlinks = opts.minlinks
is_unplaced = lambda x: x.startswith(prefix)
bed = Bed(bedfile, sorted=False)
beds = []
unplaced = defaultdict(list)
for a, b in pairwise(bed):
aname, bname = a.accn, b.accn
aseqid, bseqid = a.seqid, b.seqid
if aname not in mf:
continue
pa, la = mf[aname]
if pa != bname:
continue
ia = is_unplaced(aseqid)
ib = is_unplaced(bseqid)
if ia == ib:
continue
if ia:
a, b = b, a
unplaced[b.seqid].append((a, b))
beds.extend([a, b])
sizes = Sizes(fastafile)
candidatebed = Bed()
cbeds = []
# For each unplaced scaffold, find most likely placement and orientation
for scf, beds in sorted(unplaced.items()):
print >> log
ranges = []
for a, b in beds:
aname, astrand = a.accn, a.strand
bname, bstrand = b.accn, b.strand
aseqid, bseqid = a.seqid, b.seqid
pa, lib = mf[aname]
print >> log, a
print >> log, b
flip_b = (astrand == bstrand)
fbstrand = '-' if flip_b else '+'
if flip_b:
b.reverse_complement(sizes)
lmin, lmax = lib.min, lib.max
L = sizes.get_size(scf)
assert astrand in ('+', '-')
if astrand == '+':
offset = a.start - b.end
sstart, sstop = offset + lmin, offset + lmax
else:
offset = a.end - b.start + L
sstart, sstop = offset - lmax, offset - lmin
# Prevent out of range error
size = sizes.get_size(aseqid)
sstart = max(0, sstart)
sstop = max(0, sstop)
sstart = min(size - 1, sstart)
sstop = min(size - 1, sstop)
start_range = (aseqid, sstart, sstop, scf, 1, fbstrand)
print >> log, "*" + "\t".join(str(x) for x in start_range)
ranges.append(start_range)
#.........这里部分代码省略.........
示例6: insert
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import extend [as 别名]
def insert(args):
"""
%prog insert candidates.bed gaps.bed chrs.fasta unplaced.fasta
Insert scaffolds into assembly.
"""
from jcvi.formats.agp import mask, bed
from jcvi.formats.sizes import agp
p = OptionParser(insert.__doc__)
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
candidates, gapsbed, chrfasta, unplacedfasta = args
refinedbed = refine([candidates, gapsbed])
sizes = Sizes(unplacedfasta).mapping
cbed = Bed(candidates)
corder = cbed.order
gbed = Bed(gapsbed)
gorder = gbed.order
gpbed = Bed()
gappositions = {} # (chr, start, end) => gapid
fp = open(refinedbed)
gap_to_scf = defaultdict(list)
seen = set()
for row in fp:
atoms = row.split()
unplaced = atoms[3]
strand = atoms[5]
gapid = atoms[9]
if gapid not in seen:
seen.add(gapid)
gi, gb = gorder[gapid]
gpbed.append(gb)
gappositions[(gb.seqid, gb.start, gb.end)] = gapid
gap_to_scf[gapid].append((unplaced, strand))
gpbedfile = "candidate.gaps.bed"
gpbed.print_to_file(gpbedfile, sorted=True)
agpfile = agp([chrfasta])
maskedagpfile = mask([agpfile, gpbedfile])
maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed"
bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)])
mbed = Bed(maskedbedfile)
beds = []
for b in mbed:
sid = b.seqid
key = (sid, b.start, b.end)
if key not in gappositions:
beds.append(b)
continue
gapid = gappositions[key]
scfs = gap_to_scf[gapid]
# For scaffolds placed in the same gap, sort according to positions
scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end)
for scf, strand in scfs:
size = sizes[scf]
beds.append(BedLine("\t".join(str(x) for x in \
(scf, 0, size, sid, 1000, strand))))
finalbed = Bed()
finalbed.extend(beds)
finalbedfile = "final.bed"
finalbed.print_to_file(finalbedfile)
# Clean-up
toclean = [gpbedfile, agpfile, maskedagpfile, maskedbedfile]
FileShredder(toclean)