本文整理匯總了Python中jcvi.apps.grid.MakeManager類的典型用法代碼示例。如果您正苦於以下問題:Python MakeManager類的具體用法?Python MakeManager怎麽用?Python MakeManager使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了MakeManager類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: batchccn
def batchccn(args):
"""
%prog batchccn test.csv
Run CCN script in batch. Write makefile.
"""
p = OptionParser(batchccn.__doc__)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
csvfile, = args
mm = MakeManager()
pf = op.basename(csvfile).split(".")[0]
mkdir(pf)
header = open(csvfile).next()
header = None if header.strip().endswith(".bam") else "infer"
logging.debug("Header={}".format(header))
df = pd.read_csv(csvfile, header=header)
cmd = "perl /mnt/software/ccn_gcn_hg38_script/ccn_gcn_hg38.pl"
cmd += " -n {} -b {}"
cmd += " -o {} -r hg38".format(pf)
for i, (sample_key, bam) in df.iterrows():
cmdi = cmd.format(sample_key, bam)
outfile = "{}/{}/{}.ccn".format(pf, sample_key, sample_key)
mm.add(csvfile, outfile, cmdi)
mm.write()
示例2: beagle
def beagle(args):
"""
%prog beagle input.vcf 1
Use BEAGLE4.1 to impute vcf on chromosome 1.
"""
p = OptionParser(beagle.__doc__)
p.set_home("beagle")
p.set_ref()
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
vcffile, chr = args
pf = vcffile.rsplit(".", 1)[0]
outpf = pf + ".beagle"
outfile = outpf + ".vcf.gz"
mm = MakeManager()
beagle_cmd = opts.beagle_home
kg = op.join(opts.ref, "1000GP_Phase3")
cmd = beagle_cmd + " gt={0}".format(vcffile)
cmd += " ref={0}/chr{1}.1kg.phase3.v5a.bref".format(kg, chr)
cmd += " map={0}/plink.chr{1}.GRCh37.map".format(kg, chr)
cmd += " out={0}".format(outpf)
cmd += " nthreads=16 gprobs=true"
mm.add(vcffile, outfile, cmd)
mm.write()
示例3: impute
def impute(args):
"""
%prog impute input.vcf hs37d5.fa 1
Use IMPUTE2 to impute vcf on chromosome 1.
"""
from pyfaidx import Fasta
p = OptionParser(impute.__doc__)
p.set_home("shapeit")
p.set_home("impute")
p.set_ref()
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
vcffile, fastafile, chr = args
mm = MakeManager()
pf = vcffile.rsplit(".", 1)[0]
hapsfile = pf + ".haps"
kg = op.join(opts.ref, "1000GP_Phase3")
shapeit_phasing(mm, chr, vcffile, opts)
fasta = Fasta(fastafile)
size = len(fasta[chr])
binsize = 5000000
bins = size / binsize # 5Mb bins
if size % binsize:
bins += 1
impute_cmd = op.join(opts.impute_home, "impute2")
chunks = []
for x in xrange(bins + 1):
chunk_start = x * binsize + 1
chunk_end = min(chunk_start + binsize - 1, size)
outfile = pf + ".chunk{0:02d}.impute2".format(x)
mapfile = "{0}/genetic_map_chr{1}_combined_b37.txt".format(kg, chr)
rpf = "{0}/1000GP_Phase3_chr{1}".format(kg, chr)
cmd = impute_cmd + " -m {0}".format(mapfile)
cmd += " -known_haps_g {0}".format(hapsfile)
cmd += " -h {0}.hap.gz -l {0}.legend.gz".format(rpf)
cmd += " -Ne 20000 -int {0} {1}".format(chunk_start, chunk_end)
cmd += " -o {0} -allow_large_regions -seed 367946".format(outfile)
cmd += " && touch {0}".format(outfile)
mm.add(hapsfile, outfile, cmd)
chunks.append(outfile)
# Combine all the files
imputefile = pf + ".impute2"
cmd = "cat {0} > {1}".format(" ".join(chunks), imputefile)
mm.add(chunks, imputefile, cmd)
# Convert to vcf
vcffile = pf + ".impute2.vcf"
cmd = "python -m jcvi.formats.vcf fromimpute2 {0} {1} {2} > {3}".\
format(imputefile, fastafile, chr, vcffile)
mm.add(imputefile, vcffile, cmd)
mm.write()
示例4: lobstrindex
def lobstrindex(args):
"""
%prog lobstrindex hg38.trf.bed hg38.upper.fa
Make lobSTR index. Make sure the FASTA contain only upper case (so use
fasta.format --upper to convert from UCSC fasta). The bed file is generated
by str().
"""
p = OptionParser(lobstrindex.__doc__)
p.add_option("--notreds", default=False, action="store_true",
help="Remove TREDs from the bed file")
p.set_home("lobstr")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
trfbed, fastafile = args
pf = fastafile.split(".")[0]
lhome = opts.lobstr_home
mkdir(pf)
if opts.notreds:
newbedfile = trfbed + ".new"
newbed = open(newbedfile, "w")
fp = open(trfbed)
retained = total = 0
seen = set()
for row in fp:
r = STRLine(row)
total += 1
name = r.longname
if name in seen:
continue
seen.add(name)
print >> newbed, r
retained += 1
newbed.close()
logging.debug("Retained: {0}".format(percentage(retained, total)))
else:
newbedfile = trfbed
mm = MakeManager()
cmd = "python {0}/scripts/lobstr_index.py".format(lhome)
cmd += " --str {0} --ref {1} --out {2}".format(newbedfile, fastafile, pf)
mm.add((newbedfile, fastafile), op.join(pf, "lobSTR_ref.fasta.rsa"), cmd)
tabfile = "{0}/index.tab".format(pf)
cmd = "python {0}/scripts/GetSTRInfo.py".format(lhome)
cmd += " {0} {1} > {2}".format(newbedfile, fastafile, tabfile)
mm.add((newbedfile, fastafile), tabfile, cmd)
infofile = "{0}/index.info".format(pf)
cmd = "cp {0} {1}".format(newbedfile, infofile)
mm.add(trfbed, infofile, cmd)
mm.write()
示例5: kmc
def kmc(args):
"""
%prog kmc folder
Run kmc3 on Illumina reads.
"""
p = OptionParser(kmc.__doc__)
p.add_option("-k", default=21, type="int", help="Kmer size")
p.add_option("--ci", default=2, type="int",
help="Exclude kmers with less than ci counts")
p.add_option("--cs", default=2, type="int",
help="Maximal value of a counter")
p.add_option("--cx", default=None, type="int",
help="Exclude kmers with more than cx counts")
p.add_option("--single", default=False, action="store_true",
help="Input is single-end data, only one FASTQ/FASTA")
p.add_option("--fasta", default=False, action="store_true",
help="Input is FASTA instead of FASTQ")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
folder, = args
K = opts.k
n = 1 if opts.single else 2
pattern = "*.fa,*.fa.gz,*.fasta,*.fasta.gz" if opts.fasta else \
"*.fq,*.fq.gz,*.fastq,*.fastq.gz"
mm = MakeManager()
for p, pf in iter_project(folder, pattern=pattern,
n=n, commonprefix=False):
pf = pf.split("_")[0] + ".ms{}".format(K)
infiles = pf + ".infiles"
fw = open(infiles, "w")
print >> fw, "\n".join(p)
fw.close()
cmd = "kmc -k{} -m64 -t{}".format(K, opts.cpus)
cmd += " -ci{} -cs{}".format(opts.ci, opts.cs)
if opts.cx:
cmd += " -cx{}".format(opts.cx)
if opts.fasta:
cmd += " -fm"
cmd += " @{} {} .".format(infiles, pf)
outfile = pf + ".kmc_suf"
mm.add(p, outfile, cmd)
mm.write()
示例6: lobstrindex
def lobstrindex(args):
"""
%prog lobstrindex hg38.trf.bed hg38.upper.fa hg38
Make lobSTR index. Make sure the FASTA contain only upper case (so use
fasta.format --upper to convert from UCSC fasta). The bed file is generated
by str().
"""
p = OptionParser(lobstrindex.__doc__)
p.add_option("--fixseq", action="store_true", default=False,
help="Scan sequences to extract perfect STRs")
p.set_home("lobstr")
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
trfbed, fastafile, pf = args
lhome = opts.lobstr_home
mkdir(pf)
if opts.fixseq:
genome = pyfasta.Fasta(fastafile)
newbedfile = trfbed + ".new"
newbed = open(newbedfile, "w")
fp = open(trfbed)
retained = total = 0
for row in fp:
s = STRLine(row)
total += 1
for ns in s.iter_exact_str(genome):
if not ns.is_valid():
continue
print >> newbed, ns
retained += 1
newbed.close()
logging.debug("Retained: {0}".format(percentage(retained, total)))
else:
newbedfile = trfbed
mm = MakeManager()
cmd = "python {0}/scripts/lobstr_index.py".format(lhome)
cmd += " --str {0} --ref {1} --out {2}".format(newbedfile, fastafile, pf)
mm.add((newbedfile, fastafile), op.join(pf, "lobSTR_ref.fasta.rsa"), cmd)
tabfile = "{0}/index.tab".format(pf)
cmd = "python {0}/scripts/GetSTRInfo.py".format(lhome)
cmd += " {0} {1} > {2}".format(newbedfile, fastafile, tabfile)
mm.add((newbedfile, fastafile), tabfile, cmd)
infofile = "{0}/index.info".format(pf)
cmd = "cp {0} {1}".format(trfbed, infofile)
mm.add(trfbed, infofile, cmd)
mm.write()
示例7: blasr
def blasr(args):
"""
%prog blasr ref.fasta fofn
Run blasr on a set of PacBio reads. This is based on a divide-and-conquer
strategy described below.
"""
from jcvi.apps.grid import MakeManager
from jcvi.utils.iter import grouper
p = OptionParser(blasr.__doc__)
p.set_cpus(cpus=8)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
reffasta, fofn = args
flist = sorted([x.strip() for x in open(fofn)])
h5list = []
mm = MakeManager()
for i, fl in enumerate(grouper(flist, 3)):
chunkname = "chunk{0:03d}".format(i)
fn = chunkname + ".fofn"
h5 = chunkname + ".cmp.h5"
fw = open(fn, "w")
print >> fw, "\n".join(fl)
fw.close()
cmd = "pbalign {0} {1} {2}".format(fn, reffasta, h5)
cmd += " --nproc {0} --forQuiver --tmpDir .".format(opts.cpus)
mm.add((fn, reffasta), h5, cmd)
h5list.append(h5)
# Merge h5, sort and repack
allh5 = "all.cmp.h5"
tmph5 = "tmp.cmp.h5"
cmd_merge = "cmph5tools.py merge --outFile {0}".format(allh5)
cmd_merge += " " + " ".join(h5list)
cmd_sort = "cmph5tools.py sort --deep {0} --tmpDir .".format(allh5)
cmd_repack = "h5repack -f GZIP=1 {0} {1}".format(allh5, tmph5)
cmd_repack += " && mv {0} {1}".format(tmph5, allh5)
mm.add(h5list, allh5, [cmd_merge, cmd_sort, cmd_repack])
# Quiver
pf = reffasta.rsplit(".", 1)[0]
variantsgff = pf + ".variants.gff"
consensusfasta = pf + ".consensus.fasta"
cmd_faidx = "samtools faidx {0}".format(reffasta)
cmd = "quiver -j 32 {0}".format(allh5)
cmd += " -r {0} -o {1} -o {2}".format(reffasta, variantsgff, consensusfasta)
mm.add(allh5, consensusfasta, [cmd_faidx, cmd])
mm.write()
示例8: trf
def trf(args):
"""
%prog trf outdir
Run TRF on FASTA files.
"""
from jcvi.apps.base import iglob
p = OptionParser(trf.__doc__)
p.add_option("--mismatch", default=31, type="int",
help="Mismatch and gap penalty")
p.add_option("--minscore", default=MINSCORE, type="int",
help="Minimum score to report")
p.add_option("--period", default=6, type="int",
help="Maximum period to report")
p.add_option("--minlength", default=MINSCORE / 2, type="int",
help="Minimum length of repeat tract")
p.add_option("--telomeres", default=False, action="store_true",
help="Run telomere search: minscore=140 period=7")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
outdir, = args
minlength = opts.minlength
mm = MakeManager()
if opts.telomeres:
opts.minscore, opts.period = 140, 7
params = "2 {0} {0} 80 10 {1} {2}".\
format(opts.mismatch, opts.minscore, opts.period).split()
bedfiles = []
for fastafile in natsorted(iglob(outdir, "*.fa,*.fasta")):
pf = op.basename(fastafile).split(".")[0]
cmd1 = "trf {0} {1} -d -h".format(fastafile, " ".join(params))
datfile = op.basename(fastafile) + "." + ".".join(params) + ".dat"
bedfile = "{0}.trf.bed".format(pf)
cmd2 = "cat {} | awk '($8 >= {} && $8 <= {})'".\
format(datfile, minlength, READLEN - minlength)
cmd2 += " | sed 's/ /\\t/g'"
cmd2 += " | awk '{{print \"{0}\\t\" $0}}' > {1}".format(pf, bedfile)
mm.add(fastafile, datfile, cmd1)
mm.add(datfile, bedfile, cmd2)
bedfiles.append(bedfile)
bedfile = "trf.bed"
cmd = "cat {0} > {1}".format(" ".join(natsorted(bedfiles)), bedfile)
mm.add(bedfiles, bedfile, cmd)
mm.write()
示例9: cufflinks
def cufflinks(args):
"""
%prog cufflinks folder reference
Run cufflinks on a folder containing tophat results.
"""
p = OptionParser(cufflinks.__doc__)
p.add_option("--gtf", help="Reference annotation [default: %default]")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
folder, reference = args
cpus = opts.cpus
gtf = opts.gtf
transcripts = "transcripts.gtf"
mm = MakeManager()
gtfs = []
for bam in iglob(folder, "*.bam"):
pf = op.basename(bam).split(".")[0]
outdir = pf + "_cufflinks"
cmd = "cufflinks"
cmd += " -o {0}".format(outdir)
cmd += " -p {0}".format(cpus)
if gtf:
cmd += " -g {0}".format(gtf)
cmd += " --frag-bias-correct {0}".format(reference)
cmd += " --multi-read-correct"
cmd += " {0}".format(bam)
cgtf = op.join(outdir, transcripts)
mm.add(bam, cgtf, cmd)
gtfs.append(cgtf)
assemblylist = "assembly_list.txt"
cmd = 'find . -name "{0}" > {1}'.format(transcripts, assemblylist)
mm.add(gtfs, assemblylist, cmd)
mergedgtf = "merged/merged.gtf"
cmd = "cuffmerge"
cmd += " -o merged"
cmd += " -p {0}".format(cpus)
if gtf:
cmd += " -g {0}".format(gtf)
cmd += " -s {0}".format(reference)
cmd += " {0}".format(assemblylist)
mm.add(assemblylist, mergedgtf, cmd)
mm.write()
示例10: batch
def batch(args):
"""
%prog batch all.cds *.anchors
Compute Ks values for a set of anchors file. This will generate a bunch of
work directories for each comparisons. The anchorsfile should be in the form
of specie1.species2.anchors.
"""
from jcvi.apps.grid import MakeManager
p = OptionParser(batch.__doc__)
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
cdsfile = args[0]
anchors = args[1:]
workdirs = [".".join(op.basename(x).split(".")[:2]) for x in anchors]
for wd in workdirs:
mkdir(wd)
mm = MakeManager()
for wd, ac in zip(workdirs, anchors):
pairscdsfile = wd + ".cds.fasta"
cmd = "python -m jcvi.apps.ks prepare {} {} -o {}".\
format(ac, cdsfile, pairscdsfile)
mm.add((ac, cdsfile), pairscdsfile, cmd)
ksfile = wd + ".ks"
cmd = "python -m jcvi.apps.ks calc {} -o {} --workdir {}".\
format(pairscdsfile, ksfile, wd)
mm.add(pairscdsfile, ksfile, cmd)
mm.write()
示例11: cyntenator
def cyntenator(args):
"""
%prog cyntenator athaliana.athaliana.last athaliana.bed
Prepare input for Cyntenator.
"""
p = OptionParser(cyntenator.__doc__)
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
lastfile = args[0]
fp = open(lastfile)
filteredlastfile = lastfile + ".blast"
fw = open(filteredlastfile, "w")
for row in fp:
b = BlastLine(row)
if b.query == b.subject:
continue
print >> fw, "\t".join((b.query, b.subject, str(b.score)))
fw.close()
bedfiles = args[1:]
fp = open(lastfile)
b = BlastLine(fp.next())
subject = b.subject
txtfiles = []
for bedfile in bedfiles:
order = Bed(bedfile).order
if subject in order:
db = op.basename(bedfile).split(".")[0][:20]
logging.debug("Found db: {0}".format(db))
txtfile = write_txt(bedfile)
txtfiles.append(txtfile)
db += ".txt"
mm = MakeManager()
for txtfile in txtfiles:
outfile = txtfile + ".alignment"
cmd = 'cyntenator -t "({0} {1})" -h blast {2} > {3}'\
.format(txtfile, db, filteredlastfile, outfile)
mm.add((txtfile, db, filteredlastfile), outfile, cmd)
mm.write()
示例12: lastgenome
def lastgenome(args):
"""
%prog genome_A.fasta genome_B.fasta
Run LAST by calling LASTDB, LASTAL and LAST-SPLIT. The recipe is based on
tutorial here:
<https://github.com/mcfrith/last-genome-alignments>
The script runs the following steps:
$ lastdb -P0 -uNEAR -R01 Chr10A-NEAR Chr10A.fa
$ lastal -E0.05 -C2 Chr10A-NEAR Chr10B.fa | last-split -m1 | maf-swap | last-split -m1 -fMAF > Chr10A.Chr10B.1-1.maf
$ maf-convert -n blasttab Chr10A.Chr10B.1-1.maf > Chr10A.Chr10B.1-1.blast
Works with LAST v959.
"""
from jcvi.apps.grid import MakeManager
p = OptionParser(lastgenome.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
gA, gB = args
mm = MakeManager()
bb = lambda x : op.basename(x).rsplit(".", 1)[0]
gA_pf, gB_pf = bb(gA), bb(gB)
# Build LASTDB
dbname = "-".join((gA_pf, "NEAR"))
dbfile = dbname + ".suf"
build_db_cmd = "lastdb -P0 -uNEAR -R01 {} {}".format(dbfile, gA)
mm.add(gA, dbfile, build_db_cmd)
# Run LASTAL
maffile = "{}.{}.1-1.maf".format(gA_pf, gB_pf)
lastal_cmd = "lastal -E0.05 -C2 {} {}".format(dbname, gB)
lastal_cmd += " | last-split -m1"
lastal_cmd += " | maf-swap"
lastal_cmd += " | last-split -m1 -fMAF > {}".format(maffile)
mm.add([dbfile, gB], maffile, lastal_cmd)
# Convert to BLAST format
blastfile = maffile.replace(".maf", ".blast")
convert_cmd = "maf-convert -n blasttab {} > {}".format(maffile, blastfile)
mm.add(maffile, blastfile, convert_cmd)
mm.write()
示例13: nucmer
def nucmer(args):
"""
%prog nucmer ref.fasta query.fasta
Run NUCMER using query against reference. Parallel implementation derived
from: <https://github.com/fritzsedlazeck/sge_mummer>
"""
from itertools import product
from jcvi.apps.grid import MakeManager
from jcvi.formats.base import split
p = OptionParser(nucmer.__doc__)
p.add_option("--chunks", type="int",
help="Split both query and subject into chunks")
p.set_params(prog="nucmer", params="-l 100 -c 500")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
ref, query = args
cpus = opts.cpus
nrefs = nqueries = opts.chunks or int(cpus ** .5)
refdir = ref.split(".")[0] + "-outdir"
querydir = query.split(".")[0] + "-outdir"
reflist = split([ref, refdir, str(nrefs)]).names
querylist = split([query, querydir, str(nqueries)]).names
mm = MakeManager()
for i, (r, q) in enumerate(product(reflist, querylist)):
pf = "{0:04d}".format(i)
cmd = "nucmer -maxmatch"
cmd += " {0}".format(opts.extra)
cmd += " {0} {1} -p {2}".format(r, q, pf)
deltafile = pf + ".delta"
mm.add((r, q), deltafile, cmd)
print cmd
mm.write()
示例14: batch
def batch(args):
"""
%proj batch database.fasta project_dir output_dir
Run bwa in batch mode.
"""
p = OptionParser(batch.__doc__)
set_align_options(p)
p.set_sam_options()
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
ref_fasta, proj_dir, outdir = args
outdir = outdir.rstrip("/")
s3dir = None
if outdir.startswith("s3://"):
s3dir = outdir
outdir = op.basename(outdir)
mkdir(outdir)
mm = MakeManager()
for p, pf in iter_project(proj_dir):
targs = [ref_fasta] + p
cmd1, bamfile = mem(targs, opts)
if cmd1:
cmd1 = output_bam(cmd1, bamfile)
nbamfile = op.join(outdir, bamfile)
cmd2 = "mv {} {}".format(bamfile, nbamfile)
cmds = [cmd1, cmd2]
if s3dir:
cmd = "aws s3 cp {} {} --sse".format(nbamfile,
op.join(s3dir, bamfile))
cmds.append(cmd)
mm.add(p, nbamfile, cmds)
mm.write()
示例15: meryl
def meryl(args):
"""
%prog meryl folder
Run meryl on Illumina reads.
"""
p = OptionParser(meryl.__doc__)
p.add_option("-k", default=19, type="int", help="Kmer size")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
folder, = args
K = opts.k
cpus = opts.cpus
mm = MakeManager()
for p, pf in iter_project(folder):
cmds = []
mss = []
for i, ip in enumerate(p):
ms = "{}{}.ms{}".format(pf, i + 1, K)
mss.append(ms)
cmd = "meryl -B -C -m {} -threads {}".format(K, cpus)
cmd += " -s {} -o {}".format(ip, ms)
cmds.append(cmd)
ams, bms = mss
pms = "{}.ms{}".format(pf, K)
cmd = "meryl -M add -s {} -s {} -o {}".format(ams, bms, pms)
cmds.append(cmd)
cmd = "rm -f {}.mcdat {}.mcidx {}.mcdat {}.mcidx".\
format(ams, ams, bms, bms)
cmds.append(cmd)
mm.add(p, pms + ".mcdat", cmds)
mm.write()