本文整理汇总了Python中jcvi.formats.bed.Bed.append方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.append方法的具体用法?Python Bed.append怎么用?Python Bed.append使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.formats.bed.Bed
的用法示例。
在下文中一共展示了Bed.append方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bed
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def bed(args):
"""
%prog bed anchorsfile
Convert ANCHORS file to BED format.
"""
from collections import defaultdict
from jcvi.compara.synteny import AnchorFile, check_beds
from jcvi.formats.bed import Bed, BedLine
from jcvi.formats.base import get_number
p = OptionParser(bed.__doc__)
p.add_option("--switch", default=False, action="store_true",
help="Switch reference and aligned map elements")
p.add_option("--scale", type="float",
help="Scale the aligned map distance by factor")
p.set_beds()
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
anchorsfile, = args
switch = opts.switch
scale = opts.scale
ac = AnchorFile(anchorsfile)
pairs = defaultdict(list)
for a, b, block_id in ac.iter_pairs():
pairs[a].append(b)
qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts)
bd = Bed()
for q in qbed:
qseqid, qstart, qend, qaccn = q.seqid, q.start, q.end, q.accn
if qaccn not in pairs:
continue
for s in pairs[qaccn]:
si, s = sorder[s]
sseqid, sstart, send, saccn = s.seqid, s.start, s.end, s.accn
if switch:
qseqid, sseqid = sseqid, qseqid
qstart, sstart = sstart, qstart
qend, send = send, qend
qaccn, saccn = saccn, qaccn
if scale:
sstart /= scale
try:
newsseqid = get_number(sseqid)
except ValueError:
raise ValueError, "`{0}` is on `{1}` with no number to extract".\
format(saccn, sseqid)
bedline = "\t".join(str(x) for x in (qseqid, qstart - 1, qend,
"{0}:{1}".format(newsseqid, sstart)))
bd.append(BedLine(bedline))
bd.print_to_file(filename=opts.outfile, sorted=True)
示例2: liftover
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def liftover(args):
"""
%prog liftover agpfile bedfile
Given coordinates in components, convert to the coordinates in chromosomes.
"""
p = OptionParser(liftover.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Prepend prefix to accn names [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(p.print_help())
agpfile, bedfile = args
agp = AGP(agpfile).order
bed = Bed(bedfile)
newbed = Bed()
for b in bed:
component = b.seqid
if component not in agp:
newbed.append(b)
continue
i, a = agp[component]
assert a.component_beg < a.component_end
arange = a.component_beg, a.component_end
assert b.start < b.end
brange = b.start, b.end
st = range_intersect(arange, brange)
if not st:
continue
start, end = st
assert start <= end
if a.orientation == '-':
d = a.object_end + a.component_beg
s, t = d - end, d - start
else:
d = a.object_beg - a.component_beg
s, t = d + start, d + end
name = b.accn.replace(" ", "_")
if opts.prefix:
name = component + "_" + name
bline = "\t".join(str(x) for x in (a.object, s - 1, t, name))
newbed.append(BedLine(bline))
newbed.sort(key=newbed.nullkey)
newbed.print_to_file()
示例3: merge
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def merge(args):
"""
%prog merge map1 map2 map3 ...
Convert csv maps to bed format.
Each input map is csv formatted, for example:
ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition
scaffold_2707,11508,1,0
scaffold_2707,11525,1,1.2
scaffold_759,81336,1,9.7
"""
p = OptionParser(merge.__doc__)
p.add_option("-w", "--weightsfile", default="weights.txt",
help="Write weights to file")
p.set_outfile("out.bed")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
maps = args
outfile = opts.outfile
fp = must_open(maps)
b = Bed()
mapnames = set()
for row in fp:
mapname = fp.filename().split(".")[0]
mapnames.add(mapname)
try:
m = CSVMapLine(row, mapname=mapname)
if m.cm < 0:
logging.error("Ignore marker with negative genetic distance")
print >> sys.stderr, row.strip()
else:
b.append(BedLine(m.bedline))
except (IndexError, ValueError): # header or mal-formed line
continue
b.print_to_file(filename=outfile, sorted=True)
logging.debug("A total of {0} markers written to `{1}`.".\
format(len(b), outfile))
assert len(maps) == len(mapnames), "You have a collision in map names"
write_weightsfile(mapnames, weightsfile=opts.weightsfile)
示例4: mergebed
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def mergebed(args):
"""
%prog mergebed map1.bed map2.bed map3.bed ...
Combine bed maps to bed format, adding the map name.
"""
p = OptionParser(mergebed.__doc__)
p.add_option("-w", "--weightsfile", default="weights.txt",
help="Write weights to file")
p.set_outfile("out.bed")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
maps = args
outfile = opts.outfile
fp = must_open(maps)
b = Bed()
mapnames = set()
for row in fp:
mapname = fp.filename().split(".")[0]
mapnames.add(mapname)
try:
m = BedLine(row)
m.accn = "{0}-{1}".format(mapname, m.accn)
m.extra = ["{0}:{1}".format(m.seqid, m.start)]
b.append(m)
except (IndexError, ValueError): # header or mal-formed line
continue
b.print_to_file(filename=outfile, sorted=True)
logging.debug("A total of {0} markers written to `{1}`.".\
format(len(b), outfile))
assert len(maps) == len(mapnames), "You have a collision in map names"
write_weightsfile(mapnames, weightsfile=opts.weightsfile)
示例5: bed
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def bed(args):
'''
%prog bed gff_file [--options]
Parses the start, stop locations of the selected features out of GFF and
generate a bed file
'''
p = OptionParser(bed.__doc__)
p.add_option("--type", dest="type", default="gene",
help="Feature type to extract, use comma for multiple [default: %default]")
p.add_option("--key", dest="key", default="ID",
help="Key in the attributes to extract [default: %default]")
set_outfile(p)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
gffile, = args
key = opts.key
if key == "None":
key = None
type = set(x.strip() for x in opts.type.split(","))
gff = Gff(gffile, key=key)
b = Bed()
for g in gff:
if g.type not in type:
continue
b.append(g.bedline)
b.sort(key=b.key)
b.print_to_file(opts.outfile)
示例6: insert
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def insert(args):
"""
%prog insert candidates.bed gaps.bed chrs.fasta unplaced.fasta
Insert scaffolds into assembly.
"""
from jcvi.formats.agp import mask, bed
from jcvi.formats.sizes import agp
p = OptionParser(insert.__doc__)
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
candidates, gapsbed, chrfasta, unplacedfasta = args
refinedbed = refine([candidates, gapsbed])
sizes = Sizes(unplacedfasta).mapping
cbed = Bed(candidates)
corder = cbed.order
gbed = Bed(gapsbed)
gorder = gbed.order
gpbed = Bed()
gappositions = {} # (chr, start, end) => gapid
fp = open(refinedbed)
gap_to_scf = defaultdict(list)
seen = set()
for row in fp:
atoms = row.split()
unplaced = atoms[3]
strand = atoms[5]
gapid = atoms[9]
if gapid not in seen:
seen.add(gapid)
gi, gb = gorder[gapid]
gpbed.append(gb)
gappositions[(gb.seqid, gb.start, gb.end)] = gapid
gap_to_scf[gapid].append((unplaced, strand))
gpbedfile = "candidate.gaps.bed"
gpbed.print_to_file(gpbedfile, sorted=True)
agpfile = agp([chrfasta])
maskedagpfile = mask([agpfile, gpbedfile])
maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed"
bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)])
mbed = Bed(maskedbedfile)
beds = []
for b in mbed:
sid = b.seqid
key = (sid, b.start, b.end)
if key not in gappositions:
beds.append(b)
continue
gapid = gappositions[key]
scfs = gap_to_scf[gapid]
# For scaffolds placed in the same gap, sort according to positions
scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end)
for scf, strand in scfs:
size = sizes[scf]
beds.append(BedLine("\t".join(str(x) for x in \
(scf, 0, size, sid, 1000, strand))))
finalbed = Bed()
finalbed.extend(beds)
finalbedfile = "final.bed"
finalbed.print_to_file(finalbedfile)
# Clean-up
toclean = [gpbedfile, agpfile, maskedagpfile, maskedbedfile]
FileShredder(toclean)
示例7: rename
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def rename(args):
"""
%prog rename genes.bed [gaps.bed]
Rename genes for annotation release.
For genes on chromosomes (e.g. the 12th gene on C1):
Bo1g00120
For genes on scaffolds (e.g. the 12th gene on unplaced Scaffold00285):
Bo00285s120
The genes identifiers will increment by 10. So assuming no gap, these are
the consecutive genes:
Bo1g00120, Bo1g00130, Bo1g00140...
Bo00285s120, Bo00285s130, Bo00285s140...
When we encounter gaps, we would like the increment to be larger. For example,
Bo1g00120, <gap>, Bo1g01120...
Gaps bed file is optional.
"""
import string
p = OptionParser(rename.__doc__)
p.add_option("-a", dest="gene_increment", default=10, type="int",
help="Increment for continuous genes [default: %default]")
p.add_option("-b", dest="gap_increment", default=1000, type="int",
help="Increment for gaps [default: %default]")
p.add_option("--pad0", default=6, type="int",
help="Pad gene identifiers with 0 [default: %default]")
p.add_option("--spad0", default=4, type="int",
help="Pad gene identifiers on small scaffolds [default: %default]")
p.add_option("--prefix", default="Bo",
help="Genome prefix [default: %default]")
p.add_option("--jgi", default=False, action="store_true",
help="Create JGI style identifier PREFIX.NN[G|TE]NNNNN.1" + \
" [default: %default]")
opts, args = p.parse_args(args)
if len(args) not in (1, 2):
sys.exit(not p.print_help())
genebed = args[0]
gapbed = args[1] if len(args) == 2 else None
prefix = opts.prefix
gene_increment = opts.gene_increment
gap_increment = opts.gap_increment
genes = Bed(genebed)
if gapbed:
fp = open(gapbed)
for row in fp:
genes.append(BedLine(row))
genes.sort(key=genes.key)
idsfile = prefix + ".ids"
newbedfile = prefix + ".bed"
gap_increment -= gene_increment
assert gap_increment >= 0
if opts.jgi:
prefix += "."
fw = open(idsfile, "w")
for chr, lines in groupby(genes, key=lambda x: x.seqid):
lines = list(lines)
pad0 = opts.pad0 if len(lines) > 1000 else opts.spad0
isChr = chr[0].upper() == 'C'
digits = "".join(x for x in chr if x in string.digits)
gs = "g" if isChr else "s"
pp = prefix + digits + gs
idx = 0
if isChr:
idx += gap_increment
for r in lines:
isGap = r.strand not in ("+", "-")
if isGap:
idx += gap_increment
continue
else:
idx += gene_increment
accn = pp + "{0:0{1}d}".format(idx, pad0)
oldaccn = r.accn
print >> fw, "\t".join((oldaccn, accn))
r.accn = accn
genes.print_to_file(newbedfile)
logging.debug("Converted IDs written to `{0}`.".format(idsfile))
logging.debug("Converted bed written to `{0}`.".format(newbedfile))
示例8: variation
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def variation(args):
"""
%prog variation P1.bed P2.bed F1.bed
Associate IES in parents and progeny.
"""
p = OptionParser(variation.__doc__)
p.add_option("--diversity", choices=("breakpoint", "variant"),
default="variant", help="Plot diversity")
opts, args, iopts = p.set_image_options(args, figsize="6x6")
if len(args) != 3:
sys.exit(not p.print_help())
pfs = [op.basename(x).split('-')[0] for x in args]
P1, P2, F1 = pfs
newbedfile = "-".join(pfs) + ".bed"
if need_update(args, newbedfile):
newbed = Bed()
for pf, filename in zip(pfs, args):
bed = Bed(filename)
for b in bed:
b.accn = "-".join((pf, b.accn))
b.score = None
newbed.append(b)
newbed.print_to_file(newbedfile, sorted=True)
neworder = Bed(newbedfile).order
mergedbedfile = mergeBed(newbedfile, nms=True)
bed = Bed(mergedbedfile)
valid = 0
total_counts = Counter()
F1_counts = []
bp_diff = []
novelbedfile = "novel.bed"
fw = open(novelbedfile, "w")
for b in bed:
accns = b.accn.split(',')
pfs_accns = [x.split("-")[0] for x in accns]
pfs_counts = Counter(pfs_accns)
if len(pfs_counts) != 3:
print >> fw, b
continue
valid += 1
total_counts += pfs_counts
F1_counts.append(pfs_counts[F1])
# Collect breakpoint positions between P1 and F1
P1_accns = [x for x in accns if x.split("-")[0] == P1]
F1_accns = [x for x in accns if x.split("-")[0] == F1]
if len(P1_accns) != 1:
continue
ri, ref = neworder[P1_accns[0]]
P1_accns = [neworder[x][-1] for x in F1_accns]
bp_diff.extend(x.start - ref.start for x in P1_accns)
bp_diff.extend(x.end - ref.end for x in P1_accns)
print >> sys.stderr, \
"A total of {0} sites show consistent deletions across samples.".\
format(percentage(valid, len(bed)))
for pf, count in total_counts.items():
print >> sys.stderr, "{0:>9}: {1:.2f} deletions/site".\
format(pf, count * 1. / valid)
F1_counts = Counter(F1_counts)
# Plot the IES variant number diversity
from jcvi.graphics.base import plt, savefig, set_ticklabels_helvetica
fig = plt.figure(1, (iopts.w, iopts.h))
if opts.diversity == "variant":
left, height = zip(*sorted(F1_counts.items()))
for l, h in zip(left, height):
print >> sys.stderr, "{0:>9} variants: {1}".format(l, h)
plt.text(l, h + 5, str(h), color="darkslategray", size=8,
ha="center", va="bottom", rotation=90)
plt.bar(left, height, align="center")
plt.xlabel("Identified number of IES per site")
plt.ylabel("Counts")
plt.title("IES variation in progeny pool")
ax = plt.gca()
set_ticklabels_helvetica(ax)
savefig(F1 + ".counts.pdf")
# Plot the IES breakpoint position diversity
else:
bp_diff = Counter(bp_diff)
bp_diff_abs = Counter()
for k, v in bp_diff.items():
bp_diff_abs[abs(k)] += v
plt.figure(1, (iopts.w, iopts.h))
left, height = zip(*sorted(bp_diff_abs.items()))
for l, h in zip(left, height)[:21]:
plt.text(l, h + 50, str(h), color="darkslategray", size=8,
ha="center", va="bottom", rotation=90)
plt.bar(left, height, align="center")
#.........这里部分代码省略.........
示例9: movie
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def movie(args):
"""
%prog movie test.tour test.clm ref.contigs.last
Plot optimization history.
"""
p = OptionParser(movie.__doc__)
p.add_option("--frames", default=500, type="int",
help="Only plot every N frames")
p.add_option("--engine", default="ffmpeg", choices=("ffmpeg", "gifsicle"),
help="Movie engine, output MP4 or GIF")
p.set_beds()
opts, args, iopts = p.set_image_options(args, figsize="16x8",
style="white", cmap="coolwarm",
format="png", dpi=300)
if len(args) != 3:
sys.exit(not p.print_help())
tourfile, clmfile, lastfile = args
tourfile = op.abspath(tourfile)
clmfile = op.abspath(clmfile)
lastfile = op.abspath(lastfile)
cwd = os.getcwd()
odir = op.basename(tourfile).rsplit(".", 1)[0] + "-movie"
anchorsfile, qbedfile, contig_to_beds = \
prepare_synteny(tourfile, lastfile, odir, p, opts)
args = []
for i, label, tour, tour_o in iter_tours(tourfile, frames=opts.frames):
padi = "{:06d}".format(i)
# Make sure the anchorsfile and bedfile has the serial number in,
# otherwise parallelization may fail
a, b = op.basename(anchorsfile).split(".", 1)
ianchorsfile = a + "_" + padi + "." + b
symlink(anchorsfile, ianchorsfile)
# Make BED file with new order
qb = Bed()
for contig, o in zip(tour, tour_o):
if contig not in contig_to_beds:
continue
bedlines = contig_to_beds[contig][:]
if o == '-':
bedlines.reverse()
for x in bedlines:
qb.append(x)
a, b = op.basename(qbedfile).split(".", 1)
ibedfile = a + "_" + padi + "." + b
qb.print_to_file(ibedfile)
# Plot dot plot, but do not sort contigs by name (otherwise losing
# order)
image_name = padi + "." + iopts.format
tour = ",".join(tour)
args.append([[tour, clmfile, ianchorsfile,
"--outfile", image_name, "--label", label]])
Jobs(movieframe, args).run()
os.chdir(cwd)
make_movie(odir, odir, engine=opts.engine, format=iopts.format)
示例10: refine
# 需要导入模块: from jcvi.formats.bed import Bed [as 别名]
# 或者: from jcvi.formats.bed.Bed import append [as 别名]
def refine(args):
"""
%prog refine breakpoints.bed gaps.bed
Find gaps within or near breakpoint region.
For breakpoint regions with no gaps, there are two options:
- Break in the middle of the region
- Break at the closest gap (--closest)
"""
p = OptionParser(refine.__doc__)
p.add_option("--closest", default=False, action="store_true",
help="In case of no gaps, use closest [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
breakpointsbed, gapsbed = args
ncols = len(open(breakpointsbed).next().split())
logging.debug("File {0} contains {1} columns.".format(breakpointsbed, ncols))
cmd = "intersectBed -wao -a {0} -b {1}".format(breakpointsbed, gapsbed)
pf = "{0}.{1}".format(breakpointsbed.split(".")[0], gapsbed.split(".")[0])
ingapsbed = pf + ".bed"
sh(cmd, outfile=ingapsbed)
fp = open(ingapsbed)
data = [x.split() for x in fp]
nogapsbed = pf + ".nogaps.bed"
largestgapsbed = pf + ".largestgaps.bed"
nogapsfw = open(nogapsbed, "w")
largestgapsfw = open(largestgapsbed, "w")
for b, gaps in groupby(data, key=lambda x: x[:ncols]):
gaps = list(gaps)
gap = gaps[0]
if len(gaps) == 1 and gap[-1] == "0":
assert gap[-3] == "."
print("\t".join(b), file=nogapsfw)
continue
gaps = [(int(x[-1]), x) for x in gaps]
maxgap = max(gaps)[1]
print("\t".join(maxgap), file=largestgapsfw)
nogapsfw.close()
largestgapsfw.close()
beds = [largestgapsbed]
toclean = [nogapsbed, largestgapsbed]
if opts.closest:
closestgapsbed = pf + ".closestgaps.bed"
cmd = "closestBed -a {0} -b {1} -d".format(nogapsbed, gapsbed)
sh(cmd, outfile=closestgapsbed)
beds += [closestgapsbed]
toclean += [closestgapsbed]
else:
pointbed = pf + ".point.bed"
pbed = Bed()
bed = Bed(nogapsbed)
for b in bed:
pos = (b.start + b.end) / 2
b.start, b.end = pos, pos
pbed.append(b)
pbed.print_to_file(pointbed)
beds += [pointbed]
toclean += [pointbed]
refinedbed = pf + ".refined.bed"
FileMerger(beds, outfile=refinedbed).merge()
# Clean-up
FileShredder(toclean)
return refinedbed