本文整理汇总了Python中jcvi.formats.sizes.Sizes.keys方法的典型用法代码示例。如果您正苦于以下问题:Python Sizes.keys方法的具体用法?Python Sizes.keys怎么用?Python Sizes.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.formats.sizes.Sizes
的用法示例。
在下文中一共展示了Sizes.keys方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: agp
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def agp(args):
"""
%prog agp main_results/ contigs.fasta
Generate AGP file based on LACHESIS output.
"""
p = OptionParser(agp.__doc__)
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
odir, contigsfasta = args
fwagp = must_open(opts.outfile, 'w')
orderingfiles = natsorted(iglob(odir, "*.ordering"))
sizes = Sizes(contigsfasta).mapping
contigs = set(sizes.keys())
anchored = set()
for ofile in orderingfiles:
co = ContigOrdering(ofile)
anchored |= set([x.contig_name for x in co])
obj = op.basename(ofile).split('.')[0]
co.write_agp(obj, sizes, fwagp)
singletons = contigs - anchored
logging.debug('Anchored: {}, Singletons: {}'.
format(len(anchored), len(singletons)))
for s in natsorted(singletons):
order_to_agp(s, [(s, "?")], sizes, fwagp)
示例2: write_unplaced_agp
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def write_unplaced_agp(agpfile, scaffolds, unplaced_agp):
agp = AGP(agpfile)
scaffolds_seen = set(x.component_id for x in agp)
sizes = Sizes(scaffolds).mapping
fwagp = must_open(unplaced_agp, "w")
for s in sorted(sizes.keys()):
if s in scaffolds_seen:
continue
order_to_agp(s, [(s, "?")], sizes, fwagp)
logging.debug("Write unplaced AGP to `{0}`.".format(unplaced_agp))
示例3: main
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def main(args):
"""
%prog deltafile
Plot one query. Extract the references that have major matches to this
query. Control "major" by option --refcov.
"""
p = OptionParser(main.__doc__)
p.add_option("--refids", help="Use subset of contigs in the ref")
p.add_option("--refcov", default=.01, type="float",
help="Minimum reference coverage [default: %default]")
p.add_option("--all", default=False, action="store_true",
help="Plot one pdf file per ref in refidsfile [default: %default]")
p.add_option("--color", default="similarity",
choices=("similarity", "direction", "none"),
help="Color the dots based on")
p.add_option("--nolayout", default=False, action="store_true",
help="Do not rearrange contigs")
p.set_align(pctid=0, hitlen=0)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
deltafile, = args
reffasta, queryfasta = open(deltafile).readline().split()
color = opts.color
layout = not opts.nolayout
prefix = op.basename(deltafile).split(".")[0]
qsizes = Sizes(queryfasta).mapping
rsizes = Sizes(reffasta).mapping
refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys())
refcov = opts.refcov
pctid = opts.pctid
hitlen = opts.hitlen
deltafile = filter([deltafile, "--pctid={0}".format(pctid),
"--hitlen={0}".format(hitlen)])
if opts.all:
for r in refs:
pdffile = plot_some_queries([r], qsizes, rsizes, deltafile, refcov,
prefix=prefix, color=color,
layout=layout)
if pdffile:
sh("mv {0} {1}.pdf".format(pdffile, r))
else:
plot_some_queries(refs, qsizes, rsizes,
deltafile, refcov,
prefix=prefix, color=color, layout=layout)
示例4: covfilter
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def covfilter(args):
"""
%prog covfilter blastfile fastafile
Fastafile is used to get the sizes of the queries. Two filters can be
applied, the id% and cov%.
"""
p = OptionParser(covfilter.__doc__)
p.add_option("--pctid", dest="pctid", default=90, type="int",
help="Percentage identity cutoff [default: %default]")
p.add_option("--pctcov", dest="pctcov", default=50, type="int",
help="Percentage identity cutoff [default: %default]")
p.add_option("--ids", dest="ids", default=None,
help="Print out the ids that satisfy [default: %default]")
p.add_option("--list", dest="list", default=False, action="store_true",
help="List the id% and cov% per gene [default: %default]")
set_outfile(p, outfile=None)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
from jcvi.algorithms.supermap import supermap
blastfile, fastafile = args
sizes = Sizes(fastafile).mapping
querysupermap = blastfile + ".query.supermap"
if not op.exists(querysupermap):
supermap(blastfile, filter="query")
blastfile = querysupermap
assert op.exists(blastfile)
covered = 0
mismatches = 0
gaps = 0
alignlen = 0
queries = set()
valid = set()
blast = BlastSlow(querysupermap)
for query, blines in blast.iter_hits():
blines = list(blines)
queries.add(query)
# per gene report
this_covered = 0
this_alignlen = 0
this_mismatches = 0
this_gaps = 0
for b in blines:
this_covered += abs(b.qstart - b.qstop + 1)
this_alignlen += b.hitlen
this_mismatches += b.nmismatch
this_gaps += b.ngaps
this_identity = 100. - (this_mismatches + this_gaps) * 100. / this_alignlen
this_coverage = this_covered * 100. / sizes[query]
if opts.list:
print "{0}\t{1:.1f}\t{2:.1f}".format(query, this_identity, this_coverage)
if this_identity >= opts.pctid and this_coverage >= opts.pctcov:
valid.add(query)
covered += this_covered
mismatches += this_mismatches
gaps += this_gaps
alignlen += this_alignlen
mapped_count = len(queries)
valid_count = len(valid)
cutoff_message = "(id={0.pctid}% cov={0.pctcov}%)".format(opts)
print >> sys.stderr, "Identity: {0} mismatches, {1} gaps, {2} alignlen".\
format(mismatches, gaps, alignlen)
total = len(sizes.keys())
print >> sys.stderr, "Total mapped: {0} ({1:.1f}% of {2})".\
format(mapped_count, mapped_count * 100. / total, total)
print >> sys.stderr, "Total valid {0}: {1} ({2:.1f}% of {3})".\
format(cutoff_message, valid_count, valid_count * 100. / total, total)
print >> sys.stderr, "Average id = {0:.2f}%".\
format(100 - (mismatches + gaps) * 100. / alignlen)
queries_combined = sum(sizes[x] for x in queries)
print >> sys.stderr, "Coverage: {0} covered, {1} total".\
format(covered, queries_combined)
print >> sys.stderr, "Average coverage = {0:.2f}%".\
format(covered * 100. / queries_combined)
if opts.ids:
filename = opts.ids
fw = must_open(filename, "w")
for id in valid:
print >> fw, id
logging.debug("Queries beyond cutoffs {0} written to `{1}`.".\
format(cutoff_message, filename))
outfile = opts.outfile
#.........这里部分代码省略.........
示例5: scaffold
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def scaffold(args):
"""
%prog scaffold ctgfasta linksfile
Use the linksfile to build scaffolds. The linksfile can be
generated by calling assembly.bundle.link() or assembly.bundle.bundle().
Use --prefix to place the sequences with same prefix together. The final
product is an AGP file.
"""
from jcvi.algorithms.graph import nx
from jcvi.formats.agp import order_to_agp
p = OptionParser(scaffold.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Keep IDs with same prefix together [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
ctgfasta, linksfile = args
sizes = Sizes(ctgfasta).mapping
logfile = "scaffold.log"
fwlog = open(logfile, "w")
pf = ctgfasta.rsplit(".", 1)[0]
agpfile = pf + ".agp"
fwagp = open(agpfile, "w")
clinks = []
g = nx.MultiGraph() # use this to get connected components
fp = open(linksfile)
for row in fp:
c = LinkLine(row)
distance = max(c.distance, 50)
g.add_edge(c.aseqid, c.bseqid,
orientation=c.orientation, distance=distance)
def get_bname(sname, prefix=False):
return sname.rsplit("_", 1)[0] if prefix else "chr0"
scaffoldbuckets = defaultdict(list)
seqnames = sorted(sizes.keys())
for h in nx.connected_component_subgraphs(g):
partialorder = solve_component(h, sizes, fwlog)
name = partialorder[0][0]
bname = get_bname(name, prefix=opts.prefix)
scaffoldbuckets[bname].append(partialorder)
ctgbuckets = defaultdict(set)
for name in seqnames:
bname = get_bname(name, prefix=opts.prefix)
ctgbuckets[bname].add(name)
# Now the buckets contain a mixture of singletons and partially resolved
# scaffolds. Print the scaffolds first then remaining singletons.
scafname = "{0}.scf_{1:04d}"
for bname, ctgs in sorted(ctgbuckets.items()):
scaffolds = scaffoldbuckets[bname]
scaffolded = set()
ctgorder = []
for scafID, scaf in enumerate(scaffolds):
ctgorder = []
for node, start, end, orientation in scaf:
ctgorder.append((node, orientation))
scaffolded.add(node)
scaf = scafname.format(bname, scafID)
order_to_agp(scaf, ctgorder, sizes, fwagp)
singletons = sorted(ctgbuckets[bname] - scaffolded)
nscaffolds = len(scaffolds)
nsingletons = len(singletons)
msg = "{0}: Scaffolds={1} Singletons={2}".\
format(bname, nscaffolds, nsingletons)
print >> sys.stderr, msg
for singleton in singletons:
ctgorder = [(singleton, "+")]
order_to_agp(singleton, ctgorder, sizes, fwagp)
fwagp.close()
logging.debug("AGP file written to `{0}`.".format(agpfile))
示例6: scaffold
# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def scaffold(args):
"""
%prog scaffold ctgfasta agpfile
Build scaffolds based on ordering in the AGP file.
"""
from jcvi.formats.agp import AGP, bed, order_to_agp, build
from jcvi.formats.bed import Bed
p = OptionParser(scaffold.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Keep IDs with same prefix together [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
ctgfasta, agpfile = args
sizes = Sizes(ctgfasta).mapping
pf = ctgfasta.rsplit(".", 1)[0]
phasefile = pf + ".phases"
fwphase = open(phasefile, "w")
newagpfile = pf + ".new.agp"
fwagp = open(newagpfile, "w")
scaffoldbuckets = defaultdict(list)
seqnames = sorted(sizes.keys())
bedfile = bed([agpfile, "--nogaps", "--outfile=tmp"])
bb = Bed(bedfile)
for s, partialorder in bb.sub_beds():
name = partialorder[0].accn
bname = name.rsplit("_", 1)[0] if opts.prefix else s
scaffoldbuckets[bname].append([(b.accn, b.strand) for b in partialorder])
# Now the buckets contain a mixture of singletons and partially resolved
# scaffolds. Print the scaffolds first then remaining singletons.
for bname, scaffolds in sorted(scaffoldbuckets.items()):
ctgorder = []
singletons = set()
for scaf in sorted(scaffolds):
for node, orientation in scaf:
ctgorder.append((node, orientation))
if len(scaf) == 1:
singletons.add(node)
nscaffolds = len(scaffolds)
nsingletons = len(singletons)
if nsingletons == 1 and nscaffolds == 0:
phase = 3
elif nsingletons == 0 and nscaffolds == 1:
phase = 2
else:
phase = 1
msg = "{0}: Scaffolds={1} Singletons={2} Phase={3}".\
format(bname, nscaffolds, nsingletons, phase)
print >> sys.stderr, msg
print >> fwphase, "\t".join((bname, str(phase)))
order_to_agp(bname, ctgorder, sizes, fwagp)
fwagp.close()
os.remove(bedfile)
fastafile = "final.fasta"
build([newagpfile, ctgfasta, fastafile])
tidy([fastafile])