本文整理汇总了Python中jcvi.formats.fasta.Fasta类的典型用法代码示例。如果您正苦于以下问题:Python Fasta类的具体用法?Python Fasta怎么用?Python Fasta使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Fasta类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(arg):
f = Fasta(arg)
s = [str(x.seq) for k, x in f.iteritems_ordered()]
m = s[0]
for z in s[1:]:
m = m.replace(z, "")
print Seq(m).translate().strip("*")
示例2: digest
def digest(args):
"""
%prog digest fastafile NspI,BfuCI
Digest fasta sequences to map restriction site positions.
"""
p = OptionParser(digest.__doc__)
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
fastafile, enzymes = args
enzymes = enzymes.split(",")
enzymes = [x for x in AllEnzymes if str(x) in enzymes]
f = Fasta(fastafile, lazy=True)
fw = must_open(opts.outfile, "w")
header = ["Contig", "Length"] + [str(x) for x in enzymes]
print("\t".join(header), file=fw)
for name, rec in f.iteritems_ordered():
row = [name, len(rec)]
for e in enzymes:
pos = e.search(rec.seq)
pos = "na" if not pos else "|".join(str(x) for x in pos)
row.append(pos)
print("\t".join(str(x) for x in row), file=fw)
示例3: flip
def flip(args):
"""
%prog flip fastafile
Go through each FASTA record, check against Genbank file and determines
whether or not to flip the sequence. This is useful before updates of the
sequences to make sure the same orientation is used.
"""
p = OptionParser(flip.__doc__)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastafile, = args
outfastafile = fastafile.rsplit(".", 1)[0] + ".flipped.fasta"
fo = open(outfastafile, "w")
f = Fasta(fastafile, lazy=True)
for name, rec in f.iteritems_ordered():
tmpfasta = "a.fasta"
fw = open(tmpfasta, "w")
SeqIO.write([rec], fw, "fasta")
fw.close()
o = overlap([tmpfasta, name])
if o.orientation == '-':
rec.seq = rec.seq.reverse_complement()
SeqIO.write([rec], fo, "fasta")
os.remove(tmpfasta)
示例4: score
def score(args):
"""
%prog score blastfile query.fasta A.ids
Add up the scores for each query seq. Go through the lines and for each
query sequence, add up the scores when subject is in each pile by A.ids.
"""
from jcvi.formats.base import SetFile
from jcvi.formats.fasta import Fasta
p = OptionParser(score.__doc__)
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
blastfile, fastafile, idsfile = args
ids = SetFile(idsfile)
blast = Blast(blastfile)
scores = defaultdict(int)
for b in blast:
query = b.query
subject = b.subject
if subject not in ids:
continue
scores[query] += b.score
logging.debug("A total of {0} ids loaded.".format(len(ids)))
f = Fasta(fastafile)
for s in f.iterkeys_ordered():
sc = scores.get(s, 0)
print "\t".join((s, str(sc)))
示例5: count
def count(args):
"""
%prog count cdhit.consensus.fasta
Scan the headers for the consensus clusters and count the number of reads.
"""
from jcvi.formats.fasta import Fasta
from jcvi.graphics.histogram import stem_leaf_plot
from jcvi.utils.cbook import SummaryStats
p = OptionParser(count.__doc__)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastafile, = args
f = Fasta(fastafile, lazy=True)
sizes = []
for desc, rec in f.iterdescriptions_ordered():
if desc.startswith("singleton"):
sizes.append(1)
continue
# consensus_for_cluster_0 with 63 sequences
name, w, size, seqs = desc.split()
assert w == "with"
sizes.append(int(size))
s = SummaryStats(sizes)
print >> sys.stderr, s
stem_leaf_plot(s.data, 0, 100, 20, title="Cluster size")
示例6: main
def main(arg):
f = Fasta(arg)
G = {}
iG = set()
for a in f.keys():
for b in f.keys():
if a == b:
continue
ov = get_overlap(a, b, f)
if not ov:
continue
a, b, i = ov
G[a] = (a, b, i)
iG.add(b)
# linearize graph
start = set(f.keys()) - iG
assert len(start) == 1
z = list(start)[0]
seq = str(f[z].seq)
while z in G:
a, b, i = G[z]
seq = seq[:-i] + str(f[b].seq)
z = b
print seq
示例7: fpkm
def fpkm(args):
"""
%prog fpkm fastafile *.bam
Calculate FPKM values from BAM file.
"""
p = OptionParser(fpkm.__doc__)
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
fastafile = args[0]
bamfiles = args[1:]
# Create a DUMMY gff file for cuffdiff
gffile = fastafile.rsplit(".", 1)[0] + ".gff"
if need_update(fastafile, gffile):
fw = open(gffile, "w")
f = Fasta(fastafile, lazy=True)
for key, size in f.itersizes_ordered():
print >> fw, "\t".join(str(x) for x in (key, "dummy", "transcript",\
1, size, ".", ".", ".", "ID=" + key))
fw.close()
logging.debug("Dummy GFF created: {0}".format(gffile))
cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles))
sh(cmd)
示例8: filter
def filter(args):
"""
%prog filter consensus.fasta
Filter consensus sequence with min cluster size.
"""
from jcvi.formats.fasta import Fasta, SeqIO
p = OptionParser(filter.__doc__)
p.add_option("--minsize", default=10, type="int",
help="Minimum cluster size")
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastafile, = args
minsize = opts.minsize
f = Fasta(fastafile, lazy=True)
fw = must_open(opts.outfile, "w")
for desc, rec in f.iterdescriptions_ordered():
if desc.startswith("singleton"):
continue
# consensus_for_cluster_0 with 63 sequences
name, w, size, seqs = desc.split()
assert w == "with"
size = int(size)
if size < minsize:
continue
SeqIO.write(rec, fw, "fasta")
示例9: main
def main(arg):
f = Fasta(arg)
for a in f.keys():
for b in f.keys():
if a == b:
continue
if check_overlap(a, b, f):
print a, b
示例10: main
def main(filename):
f = Fasta(filename)
gc_store = []
for key, rec in f.iteritems():
gc = sum(rec.seq.count(x) for x in 'GCgc') * 100. / len(rec.seq)
gc_store.append((gc, key))
gc, key = max(gc_store)
print key
print gc
示例11: A50
def A50(args):
"""
%prog A50 contigs_A.fasta contigs_B.fasta ...
Plots A50 graphics, see blog post (http://blog.malde.org/index.php/a50/)
"""
p = OptionParser(A50.__doc__)
p.add_option("--overwrite", default=False, action="store_true",
help="overwrite .rplot file if exists [default: %default]")
p.add_option("--cutoff", default=0, type="int", dest="cutoff",
help="use contigs above certain size [default: %default]")
p.add_option("--stepsize", default=10, type="int", dest="stepsize",
help="stepsize for the distribution [default: %default]")
opts, args = p.parse_args(args)
if not args:
sys.exit(p.print_help())
import numpy as np
from jcvi.utils.table import loadtable
stepsize = opts.stepsize # use stepsize to speed up drawing
rplot = "A50.rplot"
if not op.exists(rplot) or opts.overwrite:
fw = open(rplot, "w")
header = "\t".join(("index", "cumsize", "fasta"))
statsheader = ("Fasta", "L50", "N50", "Min", "Max", "Average", "Sum",
"Counts")
statsrows = []
print >>fw, header
for fastafile in args:
f = Fasta(fastafile, index=False)
ctgsizes = [length for k, length in f.itersizes()]
ctgsizes = np.array(ctgsizes)
a50, l50, n50 = calculate_A50(ctgsizes, cutoff=opts.cutoff)
cmin, cmax, cmean = min(ctgsizes), max(ctgsizes), np.mean(ctgsizes)
csum, counts = np.sum(ctgsizes), len(ctgsizes)
cmean = int(round(cmean))
statsrows.append((fastafile, l50, n50, cmin, cmax, cmean, csum,
counts))
logging.debug("`{0}` ctgsizes: {1}".format(fastafile, ctgsizes))
tag = "{0} (L50={1})".format(\
op.basename(fastafile).rsplit(".", 1)[0], l50)
logging.debug(tag)
for i, s in zip(xrange(0, len(a50), stepsize), a50[::stepsize]):
print >> fw, "\t".join((str(i), str(s / 1000000.), tag))
fw.close()
table = loadtable(statsheader, statsrows)
print >> sys.stderr, table
generate_plot(rplot)
示例12: main
def main(arg):
f = Fasta(arg)
key, rev = f.iteritems().next()
s = rev.seq
for i in xrange(len(s)):
for l in xrange(4, 13):
if i + l > len(s):
continue
ns = s[i:i+l]
if str(ns) == str(ns.reverse_complement()):
print i + 1, l
示例13: get_GC3
def get_GC3(cdsfile):
from jcvi.formats.fasta import Fasta
f = Fasta(cdsfile, lazy=True)
GC3 = {}
for name, rec in f.iteritems_ordered():
positions = rec.seq[2::3].upper()
gc_counts = sum(1 for x in positions if x in "GC")
gc_ratio = gc_counts * 1. / len(positions)
GC3[name] = gc_ratio
return GC3
示例14: main
def main(arg):
f = Fasta(arg)
key, rec = f.iteritems().next()
s = rec.seq
store = defaultdict(int)
for i in xrange(len(s) - 3):
kmer = s[i:i+4]
assert len(kmer) == 4
store[kmer] += 1
counts = [store.get("".join(x), 0) for x in product("ACGT", repeat=4)]
print " ".join(str(x) for x in counts)
示例15: n50
def n50(args):
"""
%prog n50 filename
Given a file with a list of numbers denoting contig lengths, calculate N50.
Input file can be both FASTA or a list of sizes.
"""
from jcvi.graphics.histogram import loghistogram
p = OptionParser(n50.__doc__)
p.add_option(
"--print0", default=False, action="store_true", help="Print size and L50 to stdout [default: %default]"
)
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
ctgsizes = []
# Guess file format
probe = open(args[0]).readline()[0]
isFasta = probe == ">"
if isFasta:
for filename in args:
f = Fasta(filename)
ctgsizes += list(b for a, b in f.itersizes())
else:
for row in must_open(args):
try:
ctgsize = int(row.split()[-1])
except ValueError:
continue
ctgsizes.append(ctgsize)
a50, l50, nn50 = calculate_A50(ctgsizes)
sumsize = sum(ctgsizes)
minsize = min(ctgsizes)
maxsize = max(ctgsizes)
n = len(ctgsizes)
print >> sys.stderr, ", ".join(args)
summary = (sumsize, l50, nn50, minsize, maxsize, n)
print >> sys.stderr, " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary))
loghistogram(ctgsizes)
if opts.print0:
print "\t".join(str(x) for x in (",".join(args), sumsize, l50))
return zip(header, summary)