本文整理汇总了Python中jcvi.formats.sizes.Sizes类的典型用法代码示例。如果您正苦于以下问题:Python Sizes类的具体用法?Python Sizes怎么用?Python Sizes使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Sizes类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pasteprepare
def pasteprepare(args):
"""
%prog pasteprepare bacs.fasta
Prepare sequences for paste.
"""
p = OptionParser(pasteprepare.__doc__)
p.add_option("--flank", default=5000, type="int",
help="Get the seq of size on two ends [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
goodfasta, = args
flank = opts.flank
pf = goodfasta.rsplit(".", 1)[0]
extbed = pf + ".ext.bed"
sizes = Sizes(goodfasta)
fw = open(extbed, "w")
for bac, size in sizes.iter_sizes():
print >> fw, "\t".join(str(x) for x in \
(bac, 0, min(flank, size), bac + "L"))
print >> fw, "\t".join(str(x) for x in \
(bac, max(size - flank, 0), size, bac + "R"))
fw.close()
fastaFromBed(extbed, goodfasta, name=True)
示例2: main
def main():
p = OptionParser(__doc__)
p.add_option("--order",
help="The order to plot the tracks, comma-separated")
opts, args, iopts = p.set_image_options()
if len(args) != 3:
sys.exit(not p.print_help())
chr, sizes, datadir = args
order = opts.order
hlsuffix = opts.hlsuffix
if order:
order = order.split(",")
sizes = Sizes(sizes)
fig = plt.figure(1, (iopts.w, iopts.h))
root = fig.add_axes([0, 0, 1, 1])
canvas = (.12, .35, .8, .35)
chr_size = sizes.get_size(chr)
c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir,
order=order, hlsuffix=hlsuffix)
root.set_xlim(0, 1)
root.set_ylim(0, 1)
root.set_axis_off()
image_name = chr + "." + iopts.format
savefig(image_name, dpi=iopts.dpi, iopts=iopts)
示例3: agp
def agp(args):
"""
%prog agp main_results/ contigs.fasta
Generate AGP file based on LACHESIS output.
"""
p = OptionParser(agp.__doc__)
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
odir, contigsfasta = args
fwagp = must_open(opts.outfile, 'w')
orderingfiles = natsorted(iglob(odir, "*.ordering"))
sizes = Sizes(contigsfasta).mapping
contigs = set(sizes.keys())
anchored = set()
for ofile in orderingfiles:
co = ContigOrdering(ofile)
anchored |= set([x.contig_name for x in co])
obj = op.basename(ofile).split('.')[0]
co.write_agp(obj, sizes, fwagp)
singletons = contigs - anchored
logging.debug('Anchored: {}, Singletons: {}'.
format(len(anchored), len(singletons)))
for s in natsorted(singletons):
order_to_agp(s, [(s, "?")], sizes, fwagp)
示例4: bincount
def bincount(args):
"""
%prog bincount fastafile binfile
Count K-mers in the bin.
"""
from bitarray import bitarray
from jcvi.formats.sizes import Sizes
p = OptionParser(bincount.__doc__)
p.add_option("-K", default=23, type="int", help="K-mer size [default: %default]")
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
fastafile, binfile = args
K = opts.K
fp = open(binfile)
a = bitarray()
a.fromfile(fp)
f = Sizes(fastafile)
tsize = 0
fw = must_open(opts.outfile, "w")
for name, seqlen in f.iter_sizes():
ksize = seqlen - K + 1
b = a[tsize : tsize + ksize]
bcount = b.count()
print >> fw, "\t".join(str(x) for x in (name, bcount))
tsize += ksize
示例5: longest
def longest(args):
"""
%prog longest bedfile fastafile
Select longest feature within overlapping piles.
"""
from jcvi.formats.sizes import Sizes
p = OptionParser(longest.__doc__)
p.add_option("--maxsize", default=20000, type="int",
help="Limit max size")
p.add_option("--minsize", default=60, type="int",
help="Limit min size")
p.add_option("--precedence", default="Medtr",
help="Accessions with prefix take precedence")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
bedfile, fastafile = args
maxsize = opts.maxsize
minsize = opts.minsize
prec = opts.precedence
mergedbed = mergeBed(bedfile, nms=True)
sizes = Sizes(fastafile).mapping
bed = Bed(mergedbed)
pf = bedfile.rsplit(".", 1)[0]
ids = set()
for b in bed:
accns = b.accn.split(";")
prec_accns = [x for x in accns if x.startswith(prec)]
if prec_accns:
accns = prec_accns
accn_sizes = [(sizes.get(x, 0), x) for x in accns]
accn_sizes = [(size, x) for size, x in accn_sizes if size < maxsize]
if not accn_sizes:
continue
max_size, max_accn = max(accn_sizes)
if max_size < minsize:
continue
ids.add(max_accn)
newids = remove_isoforms(ids)
logging.debug("Remove isoforms: before={0} after={1}".\
format(len(ids), len(newids)))
longestidsfile = pf + ".longest.ids"
fw = open(longestidsfile, "w")
print >> fw, "\n".join(newids)
fw.close()
logging.debug("A total of {0} records written to `{1}`.".\
format(len(newids), longestidsfile))
longestbedfile = pf + ".longest.bed"
some([bedfile, longestidsfile, "--outfile={0}".format(longestbedfile),
"--no_strip_names"])
示例6: scaffold
def scaffold(args):
"""
%prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed
physicalmap.blast physicalmap.sizes physicalmap.bed
As evaluation of scaffolding, visualize external line of evidences:
* Plot synteny to an external genome
* Plot alignments to physical map
* Plot alignments to genetic map (TODO)
Each trio defines one panel to be plotted. blastfile defines the matchings
between the evidences vs scaffolds. Then the evidence sizes, and evidence
bed to plot dot plots.
This script will plot a dot in the dot plot in the corresponding location
the plots are one contig/scaffold per plot.
"""
from jcvi.graphics.base import set_image_options
from jcvi.utils.iter import grouper
p = OptionParser(scaffold.__doc__)
p.add_option("--cutoff", type="int", default=1000000,
help="Plot scaffolds with size larger than [default: %default]")
p.add_option("--highlights",
help="A set of regions in BED format to highlight [default: %default]")
opts, args, iopts = set_image_options(p, args, figsize="14x8", dpi=150)
if len(args) < 4 or len(args) % 3 != 1:
sys.exit(not p.print_help())
highlights = opts.highlights
scafsizes = Sizes(args[0])
trios = list(grouper(3, args[1:]))
trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios]
if highlights:
hlbed = Bed(highlights)
for scaffoldID, scafsize in scafsizes.iter_sizes():
if scafsize < opts.cutoff:
continue
logging.debug("Loading {0} (size={1})".format(scaffoldID,
thousands(scafsize)))
tmpname = scaffoldID + ".sizes"
tmp = open(tmpname, "w")
tmp.write("{0}\t{1}".format(scaffoldID, scafsize))
tmp.close()
tmpsizes = Sizes(tmpname)
tmpsizes.close(clean=True)
if highlights:
subhighlights = list(hlbed.sub_bed(scaffoldID))
imagename = ".".join((scaffoldID, opts.format))
plot_one_scaffold(scaffoldID, tmpsizes, None, trios, imagename, iopts,
highlights=subhighlights)
示例7: __init__
def __init__(self, filename, fastafile):
super(EvidenceFile, self).__init__(filename)
sz = Sizes(fastafile)
sizes = [None] # tig-list starts at 1
for name, size in sz.iter_sizes():
sizes.append((name, size))
self.sizes = sizes
self.sz = sz.mapping
self.scf = {}
示例8: write_unplaced_agp
def write_unplaced_agp(agpfile, scaffolds, unplaced_agp):
agp = AGP(agpfile)
scaffolds_seen = set(x.component_id for x in agp)
sizes = Sizes(scaffolds).mapping
fwagp = must_open(unplaced_agp, "w")
for s in sorted(sizes.keys()):
if s in scaffolds_seen:
continue
order_to_agp(s, [(s, "?")], sizes, fwagp)
logging.debug("Write unplaced AGP to `{0}`.".format(unplaced_agp))
示例9: graph_to_agp
def graph_to_agp(g, blastfile, subjectfasta, exclude=[], verbose=False):
from jcvi.formats.agp import order_to_agp
logging.debug(str(g))
g.write("graph.txt")
#g.draw("graph.pdf")
paths = []
for path in g.iter_paths():
m, oo = g.path(path)
if len(oo) == 1: # Singleton path
continue
paths.append(oo)
if verbose:
print m
print oo
npaths = len(paths)
ntigs = sum(len(x) for x in paths)
logging.debug("Graph decomposed to {0} paths with {1} components.".\
format(npaths, ntigs))
agpfile = blastfile + ".agp"
sizes = Sizes(subjectfasta)
fwagp = open(agpfile, "w")
scaffolded = set()
for i, oo in enumerate(paths):
ctgorder = [(str(ctg), ("+" if strand else "-")) \
for ctg, strand in oo]
scaffolded |= set(ctg for ctg, strand in ctgorder)
object = "pmol_{0:04d}".format(i)
order_to_agp(object, ctgorder, sizes.mapping, fwagp)
# Get the singletons as well
nsingletons = nscaffolded = nexcluded = 0
for ctg, size in sizes.iter_sizes():
if ctg in scaffolded:
nscaffolded += 1
continue
if ctg in exclude:
nexcluded += 1
continue
ctgorder = [(ctg, "+")]
object = ctg
order_to_agp(object, ctgorder, sizes.mapping, fwagp)
nsingletons += 1
logging.debug("scaffolded={} excluded={} singletons={}".\
format(nscaffolded, nexcluded, nsingletons))
fwagp.close()
logging.debug("AGP file written to `{0}`.".format(agpfile))
示例10: covlen
def covlen(args):
"""
%prog covlen covfile fastafile
Plot coverage vs length. `covfile` is two-column listing contig id and
depth of coverage.
"""
import numpy as np
import pandas as pd
import seaborn as sns
from jcvi.formats.base import DictFile
p = OptionParser(covlen.__doc__)
p.add_option("--maxsize", default=1000000, type="int", help="Max contig size")
p.add_option("--maxcov", default=100, type="int", help="Max contig size")
p.add_option("--color", default='m', help="Color of the data points")
p.add_option("--kind", default="scatter",
choices=("scatter", "reg", "resid", "kde", "hex"),
help="Kind of plot to draw")
opts, args, iopts = p.set_image_options(args, figsize="8x8")
if len(args) != 2:
sys.exit(not p.print_help())
covfile, fastafile = args
cov = DictFile(covfile, cast=float)
s = Sizes(fastafile)
data = []
maxsize, maxcov = opts.maxsize, opts.maxcov
for ctg, size in s.iter_sizes():
c = cov.get(ctg, 0)
if size > maxsize:
continue
if c > maxcov:
continue
data.append((size, c))
x, y = zip(*data)
x = np.array(x)
y = np.array(y)
logging.debug("X size {0}, Y size {1}".format(x.size, y.size))
df = pd.DataFrame()
xlab, ylab = "Length", "Coverage of depth (X)"
df[xlab] = x
df[ylab] = y
sns.jointplot(xlab, ylab, kind=opts.kind, data=df,
xlim=(0, maxsize), ylim=(0, maxcov),
stat_func=None, edgecolor="w", color=opts.color)
figname = covfile + ".pdf"
savefig(figname, dpi=iopts.dpi, iopts=iopts)
示例11: main
def main(args):
"""
%prog deltafile
Plot one query. Extract the references that have major matches to this
query. Control "major" by option --refcov.
"""
p = OptionParser(main.__doc__)
p.add_option("--refids", help="Use subset of contigs in the ref")
p.add_option("--refcov", default=.01, type="float",
help="Minimum reference coverage [default: %default]")
p.add_option("--all", default=False, action="store_true",
help="Plot one pdf file per ref in refidsfile [default: %default]")
p.add_option("--color", default="similarity",
choices=("similarity", "direction", "none"),
help="Color the dots based on")
p.add_option("--nolayout", default=False, action="store_true",
help="Do not rearrange contigs")
p.set_align(pctid=0, hitlen=0)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
deltafile, = args
reffasta, queryfasta = open(deltafile).readline().split()
color = opts.color
layout = not opts.nolayout
prefix = op.basename(deltafile).split(".")[0]
qsizes = Sizes(queryfasta).mapping
rsizes = Sizes(reffasta).mapping
refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys())
refcov = opts.refcov
pctid = opts.pctid
hitlen = opts.hitlen
deltafile = filter([deltafile, "--pctid={0}".format(pctid),
"--hitlen={0}".format(hitlen)])
if opts.all:
for r in refs:
pdffile = plot_some_queries([r], qsizes, rsizes, deltafile, refcov,
prefix=prefix, color=color,
layout=layout)
if pdffile:
sh("mv {0} {1}.pdf".format(pdffile, r))
else:
plot_some_queries(refs, qsizes, rsizes,
deltafile, refcov,
prefix=prefix, color=color, layout=layout)
示例12: bed
def bed(args):
"""
%prog bed binfile fastafile
Write bed files where the bases have at least certain depth.
"""
p = OptionParser(bed.__doc__)
p.add_option("-o", dest="output", default="stdout",
help="Output file name [default: %default]")
p.add_option("--cutoff", dest="cutoff", default=10, type="int",
help="Minimum read depth to report intervals [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
binfile, fastafile = args
fw = must_open(opts.output, "w")
cutoff = opts.cutoff
assert cutoff >= 0, "Need non-negative cutoff"
b = BinFile(binfile)
ar = b.array
fastasize, sizes, offsets = get_offsets(fastafile)
s = Sizes(fastafile)
for ctg, ctglen in s.iter_sizes():
offset = offsets[ctg]
subarray = ar[offset:offset + ctglen]
key = lambda x: x[1] >= cutoff
for tf, array_elements in groupby(enumerate(subarray), key=key):
array_elements = list(array_elements)
if not tf:
continue
# 0-based system => 1-based system
start = array_elements[0][0] + 1
end = array_elements[-1][0] + 1
mean_depth = sum([x[1] for x in array_elements]) / \
len(array_elements)
mean_depth = int(mean_depth)
name = "na"
print >> fw, "\t".join(str(x) for x in (ctg, \
start - 1, end, name, mean_depth))
示例13: covlen
def covlen(args):
"""
%prog covlen covfile fastafile
Plot coverage vs lenght. `covfile` is two-column listing contig id and
depth of coverage.
"""
import numpy as np
import seaborn as sns
from jcvi.formats.base import DictFile
p = OptionParser(covlen.__doc__)
p.add_option("--maxsize", default=100000, type="int", help="Max contig size")
p.add_option("--maxcov", default=100, type="int", help="Max contig size")
opts, args, iopts = p.set_image_options(args, figsize="8x8")
if len(args) != 2:
sys.exit(not p.print_help())
covfile, fastafile = args
cov = DictFile(covfile, cast=float)
s = Sizes(fastafile)
data = []
maxsize, maxcov = opts.maxsize, opts.maxcov
for ctg, size in s.iter_sizes():
c = cov[ctg]
if size > maxsize:
continue
if c > maxcov:
continue
data.append((size, c))
x, y = zip(*data)
x = np.array(x)
y = np.array(y)
logging.debug("X size {0}, Y size {1}".format(x.size, y.size))
sns.jointplot(x, y, kind="kde")
figname = covfile + ".pdf"
savefig(figname, dpi=iopts.dpi, iopts=iopts)
示例14: bed2chain
def bed2chain(args):
from maize.formats.sizes import Sizes
tdic = Sizes(args.tsize)
qdic = Sizes(args.qsize)
firstline = True
cid0, tName0, qName0, srd0, locs = '', '', '', '', []
for line in must_open(args.fi):
line = line.rstrip("\n")
if not line:
continue
tName, tStart, tEnd, srd, qName, qStart, qEnd, cid = line.split()[:8]
tStart, tEnd, qStart, qEnd = int(tStart), int(tEnd), int(qStart), int(qEnd)
if firstline:
cid0, tName0, qName0, srd0 = cid, tName, qName, srd
locs.append([tStart, tEnd, qStart, qEnd])
firstline = False
elif cid0 == cid:
assert tName == tName0 and qName == qName0 and srd == srd0, "inconsistent info in chain"
locs.append([tStart, tEnd, qStart, qEnd])
else:
print_chain(cid0, tName0, qName0, srd0, tdic.get_size(tName0), qdic.get_size(qName0), locs)
cid0, tName0, qName0, srd0 = cid, tName, qName, srd
locs = [[tStart, tEnd, qStart, qEnd]]
print_chain(cid0, tName0, qName0, srd0, tdic.get_size(tName0), qdic.get_size(qName0), locs)
示例15: stack
def stack(args):
"""
%prog stack fastafile
Create landscape plots that show the amounts of genic sequences, and repetitive
sequences along the chromosomes.
"""
p = OptionParser(stack.__doc__)
p.add_option("--top", default=10, type="int",
help="Draw the first N chromosomes [default: %default]")
p.add_option("--stacks",
default="Exons,Introns,DNA_transposons,Retrotransposons",
help="Features to plot in stackplot [default: %default]")
p.add_option("--switch",
help="Change chr names based on two-column file [default: %default]")
add_window_options(p)
opts, args, iopts = p.set_image_options(args, figsize="8x8")
if len(args) != 1:
sys.exit(not p.print_help())
fastafile, = args
top = opts.top
window, shift, subtract = check_window_options(opts)
switch = opts.switch
if switch:
switch = DictFile(opts.switch)
stacks = opts.stacks.split(",")
bedfiles = get_beds(stacks)
binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract)
sizes = Sizes(fastafile)
s = list(sizes.iter_sizes())[:top]
maxl = max(x[1] for x in s)
margin = .08
inner = .02 # y distance between tracks
pf = fastafile.rsplit(".", 1)[0]
fig = plt.figure(1, (iopts.w, iopts.h))
root = fig.add_axes([0, 0, 1, 1])
# Gauge
ratio = draw_gauge(root, margin, maxl)
# Per chromosome
yinterval = (1 - 2 * margin) / (top + 1)
xx = margin
yy = 1 - margin
for chr, clen in s:
yy -= yinterval
xlen = clen / ratio
cc = chr
if "_" in chr:
ca, cb = chr.split("_")
cc = ca[0].upper() + cb
if switch and cc in switch:
cc = "\n".join((cc, "({0})".format(switch[cc])))
root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
ax = fig.add_axes([xx, yy, xlen, yinterval - inner])
nbins = clen / shift
if clen % shift:
nbins += 1
stackplot(ax, binfiles, nbins, palette, chr, window, shift)
root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center")
ax.set_xlim(0, nbins)
ax.set_ylim(0, 1)
ax.set_axis_off()
# Legends
yy -= yinterval
xx = margin
for b, p in zip(bedfiles, palette):
b = b.rsplit(".", 1)[0].replace("_", " ")
b = Registration.get(b, b)
root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
xx += 2 * inner
root.text(xx, yy, b, size=13)
xx += len(b) * .012 + inner
root.set_xlim(0, 1)
root.set_ylim(0, 1)
root.set_axis_off()
image_name = pf + "." + iopts.format
savefig(image_name, dpi=iopts.dpi, iopts=iopts)