本文整理匯總了Python中jcvi.apps.grid.Jobs類的典型用法代碼示例。如果您正苦於以下問題:Python Jobs類的具體用法?Python Jobs怎麽用?Python Jobs使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Jobs類的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: bcf
def bcf(args):
"""
%prog bcf fastafile bamfiles > bcffile
Run mpileup on bam files.
"""
from jcvi.apps.grid import Jobs
p = OptionParser(bcf.__doc__)
set_outfile(p)
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
fastafile = args[0]
bamfiles = args[1:]
unsorted = [x for x in bamfiles if ".sorted." not in x]
jargs = [[[x, "--unique"]] for x in unsorted]
jobs = Jobs(index, args=jargs)
jobs.run()
bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles]
bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles]
cmd = "samtools mpileup -P ILLUMINA -E -ugDf"
cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
cmd += " | bcftools view -bcvg -"
sh(cmd, outfile=opts.outfile)
示例2: split
def split(args):
"""
%prog split pairs.fastq
Split shuffled pairs into `.1.fastq` and `.2.fastq`, using `sed`. Can work
on gzipped file.
<http://seqanswers.com/forums/showthread.php?t=13776>
"""
from jcvi.apps.grid import Jobs
p = OptionParser(split.__doc__)
set_grid(p)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
pairsfastq, = args
gz = pairsfastq.endswith(".gz")
pf = pairsfastq.replace(".gz", "").rsplit(".", 1)[0]
p1 = pf + ".1.fastq"
p2 = pf + ".2.fastq"
cmd = "zcat" if gz else "cat"
p1cmd = cmd + " {0} | sed -ne '1~8{{N;N;N;p}}'".format(pairsfastq)
p2cmd = cmd + " {0} | sed -ne '5~8{{N;N;N;p}}'".format(pairsfastq)
if gz:
p1cmd += " | gzip"
p2cmd += " | gzip"
p1 += ".gz"
p2 += ".gz"
p1cmd += " > " + p1
p2cmd += " > " + p2
if opts.grid:
sh(p1cmd, grid=True)
sh(p2cmd, grid=True)
else:
args = [(p1cmd, ), (p2cmd, )]
m = Jobs(target=sh, args=args)
m.run()
checkShuffleSizes(p1, p2, pairsfastq)
示例3: parallel_musclewrap
def parallel_musclewrap(clustfile, cpus, minsamp=0):
musclewrap_minsamp = partial(musclewrap, minsamp=minsamp)
if cpus == 1:
return musclewrap_minsamp(clustfile)
from jcvi.apps.grid import Jobs
outdir = mkdtemp(dir=".")
fs = split([clustfile, outdir, str(cpus), "--format=clust"])
g = Jobs(musclewrap_minsamp, fs.names)
g.run()
clustnames = [x.replace(".clust", ".clustS") for x in fs.names]
clustSfile = clustfile.replace(".clust", ".clustS")
FileMerger(clustnames, outfile=clustSfile).merge()
shutil.rmtree(outdir)
示例4: mapped
def mapped(args):
"""
%prog mapped sam/bamfile
Given an input sam/bam file, output a sam/bam file containing only the mapped reads.
Optionally, extract the unmapped reads into a separate file
"""
import pysam
from jcvi.apps.grid import Jobs
p = OptionParser(mapped.__doc__)
p.set_sam_options(extra=False)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
samfile, = args
view_opts = []
oext, mopts = (".sam", ["-S"]) \
if samfile.endswith(".sam") else (".bam", [])
flag, ext = ("-b", ".bam") if opts.bam else ("-h", ".sam")
mopts.append(flag)
if opts.uniq:
mopts.append("-q1")
ext = ".uniq{0}".format(ext)
if opts.unmapped:
uopts = [x for x in mopts]
uoutfile = samfile.replace(oext, ".unmapped{0}".format(ext))
uopts.extend(["-f4", samfile, "-o{0}".format(uoutfile)])
view_opts.append(uopts)
outfile = samfile.replace(oext, ".mapped{0}".format(ext))
mopts.extend(["-F4", samfile, "-o{0}".format(outfile)])
view_opts.append(mopts)
for vo in view_opts:
logging.debug('samtools view {0}'.format(" ".join(vo)))
jobs = Jobs(pysam.view, [(z for z in x) for x in view_opts])
jobs.run()
示例5: augustus
def augustus(args):
"""
%prog augustus fastafile
Run parallel AUGUSTUS. Final results can be reformatted using
annotation.reformat.augustus().
"""
p = OptionParser(augustus.__doc__)
p.add_option("--species", default="maize",
help="Use species model for prediction")
p.add_option("--hintsfile", help="Hint-guided AUGUSTUS")
p.add_option("--nogff3", default=False, action="store_true",
help="Turn --gff3=off")
p.set_home("augustus")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastafile, = args
cpus = opts.cpus
mhome = opts.augustus_home
gff3 = not opts.nogff3
suffix = ".gff3" if gff3 else ".out"
cfgfile = op.join(mhome, "config/extrinsic/extrinsic.M.RM.E.W.cfg")
outdir = mkdtemp(dir=".")
fs = split([fastafile, outdir, str(cpus)])
augustuswrap_params = partial(augustuswrap, species=opts.species,
gff3=gff3, cfgfile=cfgfile,
hintsfile=opts.hintsfile)
g = Jobs(augustuswrap_params, fs.names)
g.run()
gff3files = [x.rsplit(".", 1)[0] + suffix for x in fs.names]
outfile = fastafile.rsplit(".", 1)[0] + suffix
FileMerger(gff3files, outfile=outfile).merge()
shutil.rmtree(outdir)
if gff3:
from jcvi.annotation.reformat import augustus as reformat_augustus
reformat_outfile = outfile.replace(".gff3", ".reformat.gff3")
reformat_augustus([outfile, "--outfile={0}".format(reformat_outfile)])
示例6: mdownload
def mdownload(args):
"""
%prog mdownload links.txt
Multiple download a list of files. Use formats.html.links() to extract the
links file.
"""
from jcvi.apps.grid import Jobs
p = OptionParser(mdownload.__doc__)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
linksfile, = args
links = [(x.strip(),) for x in open(linksfile)]
j = Jobs(download, links)
j.run()
示例7: vcf
def vcf(args):
"""
%prog vcf fastafile bamfiles > out.vcf.gz
Call SNPs on bam files.
"""
from jcvi.apps.grid import Jobs
valid_callers = ("mpileup", "freebayes")
p = OptionParser(vcf.__doc__)
p.set_outfile(outfile="out.vcf.gz")
p.add_option("--nosort", default=False, action="store_true",
help="Do not sort the BAM files")
p.add_option("--caller", default="mpileup", choices=valid_callers,
help="Use variant caller [default: %default]")
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
fastafile = args[0]
bamfiles = args[1:]
caller = opts.caller
unsorted = [x for x in bamfiles if ".sorted." not in x]
if opts.nosort:
bamfiles = unsorted
else:
jargs = [[[x, "--unique"]] for x in unsorted]
jobs = Jobs(index, args=jargs)
jobs.run()
bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles]
bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles]
if caller == "mpileup":
cmd = "samtools mpileup -E -uf"
cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
cmd += " | bcftools call -vmO v"
elif caller == "freebayes":
cmd = "freebayes -f"
cmd += " {0} {1}".format(fastafile, " ".join(bamfiles))
sh(cmd, outfile=opts.outfile)
示例8: dump
def dump(args):
"""
%prog dump fastbfile
Export ALLPATHS fastb file to fastq file. Use --dir to indicate a previously
run allpaths folder.
"""
p = OptionParser(dump.__doc__)
p.add_option("--dir",
help="Working directory [default: %default]")
p.add_option("--nosim", default=False, action="store_true",
help="Do not simulate qual to 50 [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastbfile, = args
d = opts.dir
if d:
from jcvi.assembly.preprocess import export_fastq
export_fastq(d, fastbfile)
return
sim = not opts.nosim
pf = "j" if "jump" in fastbfile else "f"
statsfile = "{0}.lib_stats".format(pf)
if op.exists(statsfile):
os.remove(statsfile)
cmd = "SplitReadsByLibrary READS_IN={0}".format(fastbfile)
cmd += " READS_OUT={0} QUALS=True".format(pf)
sh(cmd)
libs = []
fp = open(statsfile)
fp.next(); fp.next() # skip two rows
for row in fp:
if row.strip() == "":
continue
libname = row.split()[0]
if libname == "Unpaired":
continue
libs.append(libname)
logging.debug("Found libraries: {0}".format(",".join(libs)))
cmds = []
for libname in libs:
cmd = "FastbQualbToFastq"
cmd += " HEAD_IN={0}.{1}.AB HEAD_OUT={1}".format(pf, libname)
cmd += " PAIRED=True PHRED_OFFSET=33"
if sim:
cmd += " SIMULATE_QUALS=True"
if pf == 'j':
cmd += " FLIP=True"
cmds.append((cmd, ))
m = Jobs(target=sh, args=cmds)
m.run()
for libname in libs:
cmd = "mv {0}.A.fastq {0}.1.fastq".format(libname)
sh(cmd)
cmd = "mv {0}.B.fastq {0}.2.fastq".format(libname)
sh(cmd)
示例9: main
def main(args):
"""
%prog database.fasta query.fasta
Run LAST by calling LASTDB, LASTAL and LASTEX.
"""
p = OptionParser(main.__doc__)
p.add_option("--path", help="specify LAST path")
p.add_option("--mask", default=False, action="store_true",
help="invoke -c in lastdb [default: %default]")
p.add_option("--format", default="blast", choices=supported_formats,
help="Output format [default: %default]")
p.add_option("--eval", default=False, action="store_true",
help="Use lastex to recalculate E-value [default: %default]")
p.set_cpus(cpus=32)
p.set_params()
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
subject, query = args
if opts.eval and opts.cpus > 1:
raise Exception, "Option --eval cannnot work with multiple threads"
path = opts.path
getpath = lambda x: op.join(path, x) if path else x
lastdb_bin = getpath("lastdb")
lastal_bin = getpath("lastal")
lastex_bin = getpath("lastex")
subjectdb = subject.rsplit(".", 1)[0]
run_lastdb(infile=subject, outfile=subjectdb + ".prj", mask=opts.mask, \
lastdb_bin=lastdb_bin)
cpus = opts.cpus
logging.debug("Dispatch job to {0} cpus".format(cpus))
oappend = False
if opts.format == "maf":
cmd = 'echo "##maf version=1"'
sh(cmd, outfile=opts.outfile)
oappend = True
u = 2 if opts.mask else 0
cmd = "{0} -u {1}".format(lastal_bin, u)
f = supported_formats.index(opts.format)
cmd += " -f {0}".format(f)
cmd += " {0} -".format(subjectdb)
extra = opts.extra
if extra:
cmd += " " + extra
if opts.eval:
querydb = query.rsplit(".", 1)[0]
run_lastdb(infile=query, outfile=querydb + ".prj")
cmd += " | {0} {1}.prj {2}.prj -".format(lastex_bin, subjectdb, querydb)
out_fh = must_open(opts.outfile, "w", checkexists=True, oappend=oappend)
if out_fh is None:
return
lock = Lock()
args = [(k + 1, cpus, out_fh, cmd, query, lock) \
for k in xrange(cpus)]
g = Jobs(target=last, args=args)
g.run()
示例10: main
def main():
"""
%prog database.fa query.fa [options]
Run LASTZ similar to the BLAST interface, and generates -m8 tabular format
"""
p = OptionParser(main.__doc__)
supported_formats = tuple(x.strip() for x in \
"lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, "\
"sam-, softsam-, cigar, BLASTN, BLASTN-, differences, rdotplot, text".split(','))
p.add_option("--format", default="BLASTN-", choices=supported_formats,
help="Ooutput format [default: %default]")
p.add_option("--path", dest="lastz_path", default=None,
help="specify LASTZ path")
p.add_option("--mask", dest="mask", default=False, action="store_true",
help="treat lower-case letters as mask info [default: %default]")
p.add_option("--similar", default=False, action="store_true",
help="Use options tuned for close comparison [default: %default]")
p.set_cpus(cpus=32)
p.set_params()
p.set_outfile()
opts, args = p.parse_args()
if len(args) != 2:
sys.exit(p.print_help())
bfasta_fn, afasta_fn = args
for fn in (afasta_fn, bfasta_fn):
assert op.exists(fn)
afasta_fn = op.abspath(afasta_fn)
bfasta_fn = op.abspath(bfasta_fn)
out_fh = must_open(opts.outfile, "w")
extra = opts.extra
if opts.similar:
extra += similarOptions
lastz_bin = opts.lastz_path or "lastz"
assert lastz_bin.endswith("lastz"), "You need to include lastz in your path"
mask = opts.mask
cpus = opts.cpus
logging.debug("Dispatch job to %d cpus" % cpus)
format = opts.format
blastline = (format == "BLASTN-")
# The axt, maf, etc. format can only be run on splitted database (i.e. one
# FASTA record per file). The splitted files are then parallelized for the
# computation, as opposed to splitting queries through "subsample".
outdir = "outdir"
if not blastline:
from jcvi.formats.fasta import Fasta
from jcvi.formats.chain import faToTwoBit
mkdir(outdir)
bfasta_2bit = faToTwoBit(bfasta_fn)
bids = list(Fasta(bfasta_fn, lazy=True).iterkeys_ordered())
apf = op.basename(afasta_fn).split(".")[0]
args = []
# bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format
for id in bids:
bfasta = "/".join((bfasta_2bit, id))
outfile = op.join(outdir, "{0}.{1}.{2}".format(apf, id, format))
args.append((bfasta, afasta_fn, outfile, \
lastz_bin, extra, mask, format))
p = Pool(cpus)
p.map(lastz_2bit, args)
return
lock = Lock()
args = [(k + 1, cpus, bfasta_fn, afasta_fn, out_fh,
lock, lastz_bin, extra, mask) for k in xrange(cpus)]
g = Jobs(target=lastz, args=args)
g.run()
示例11: main
def main():
"""
%prog database.fa query.fa [options]
Wrapper for NCBI BLAST+.
"""
p = OptionParser(main.__doc__)
p.add_option("--format", default=" \'6 qseqid sseqid pident length " \
"mismatch gapopen qstart qend sstart send evalue bitscore\' ",
help="0-11, learn more with \"blastp -help\". [default: %default]")
p.add_option("--path", dest="blast_path", default=None,
help="specify BLAST+ path including the program name")
p.add_option("--prog", dest="blast_program", default="blastp",
help="specify BLAST+ program to use. See complete list here: " \
"http://www.ncbi.nlm.nih.gov/books/NBK52640/#chapter1.Installation"
" [default: %default]")
p.set_align(evalue=.01)
p.add_option("--best", default=1, type="int",
help="Only look for best N hits [default: %default]")
p.set_cpus()
p.add_option("--nprocs", default=1, type="int",
help="number of BLAST processes to run in parallel. " + \
"split query.fa into `nprocs` chunks, " + \
"each chunk uses -num_threads=`cpus`")
p.set_params()
p.set_outfile()
opts, args = p.parse_args()
if len(args) != 2 or opts.blast_program is None:
sys.exit(not p.print_help())
bfasta_fn, afasta_fn = args
for fn in (afasta_fn, bfasta_fn):
assert op.exists(fn)
afasta_fn = op.abspath(afasta_fn)
bfasta_fn = op.abspath(bfasta_fn)
out_fh = must_open(opts.outfile, "w")
extra = opts.extra
blast_path = opts.blast_path
blast_program = opts.blast_program
blast_bin = blast_path or blast_program
if op.basename(blast_bin) != blast_program:
blast_bin = op.join(blast_bin, blast_program)
nprocs, cpus = opts.nprocs, opts.cpus
if nprocs > 1:
logging.debug("Dispatch job to %d processes" % nprocs)
outdir = "outdir"
fs = split([afasta_fn, outdir, str(nprocs)])
queries = fs.names
else:
queries = [afasta_fn]
dbtype = "prot" if op.basename(blast_bin) in ("blastp", "blastx") \
else "nucl"
db = bfasta_fn
if dbtype == "prot":
nin = db + ".pin"
else:
nin = db + ".nin"
nin00 = db + ".00.nin"
nin = nin00 if op.exists(nin00) else (db + ".nin")
run_formatdb(infile=db, outfile=nin, dbtype=dbtype)
lock = Lock()
blastplus_template = "{0} -db {1} -outfmt {2}"
blast_cmd = blastplus_template.format(blast_bin, bfasta_fn, opts.format)
blast_cmd += " -evalue {0} -max_target_seqs {1}".\
format(opts.evalue, opts.best)
blast_cmd += " -num_threads {0}".format(cpus)
if extra:
blast_cmd += " " + extra.strip()
args = [(out_fh, blast_cmd, query, lock) for query in queries]
g = Jobs(target=blastplus, args=args)
g.run()
示例12: main
def main(args):
"""
%prog database.fasta query.fasta
Run LAST by calling LASTDB, LASTAL and LASTEX.
"""
supported_formats = ("tab", "maf", "blast")
p = OptionParser(main.__doc__)
p.add_option("-a", "-A", dest="cpus", default=1, type="int",
help="parallelize job to multiple cpus [default: %default]")
p.add_option("--path", help="specify LAST path")
p.add_option("--format", default="blast", choices=supported_formats,
help="Output format, one of {0} [default: %default]".\
format("|".join(supported_formats)))
p.add_option("--eval", default=False, action="store_true",
help="Use lastex to recalculate E-value [default: %default]")
set_params(p)
set_outfile(p)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
subject, query = args
if opts.eval and opts.cpus > 1:
raise Exception, "Option --eval cannnot work with multiple threads"
path = opts.path
getpath = lambda x: op.join(path, x) if path else x
lastdb_bin = getpath("lastdb")
lastal_bin = getpath("lastal")
lastex_bin = getpath("lastex")
subjectdb = subject.rsplit(".", 1)[0]
run_lastdb(infile=subject, outfile=subjectdb + ".prj", lastdb_bin=lastdb_bin)
cpus = opts.cpus
logging.debug("Dispatch job to {0} cpus".format(cpus))
if opts.format == "maf":
cmd = 'echo "##maf version=1"'
sh(cmd)
cmd = "{0} -u 0".format(lastal_bin)
f = supported_formats.index(opts.format)
cmd += " -f {0}".format(f)
cmd += " {0} -".format(subjectdb)
extra = opts.extra
if extra:
cmd += " " + extra
if opts.eval:
querydb = query.rsplit(".", 1)[0]
run_lastdb(infile=query, outfile=querydb + ".prj")
cmd += " | {0} {1}.prj {2}.prj -".format(lastex_bin, subjectdb, querydb)
out_fh = must_open(opts.outfile, "w")
lock = Lock()
args = [(k + 1, cpus, out_fh, cmd, query, lock) \
for k in xrange(cpus)]
g = Jobs(target=last, args=args)
g.run()