本文整理汇总了Python中jcvi.formats.fasta.SeqIO.parse方法的典型用法代码示例。如果您正苦于以下问题:Python SeqIO.parse方法的具体用法?Python SeqIO.parse怎么用?Python SeqIO.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.formats.fasta.SeqIO
的用法示例。
在下文中一共展示了SeqIO.parse方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: phase
# 需要导入模块: from jcvi.formats.fasta import SeqIO [as 别名]
# 或者: from jcvi.formats.fasta.SeqIO import parse [as 别名]
def phase(accession):
gbdir = "gb"
gbfile = op.join(gbdir, accession + ".gb")
if not op.exists(gbfile):
entrez([accession, "--skipcheck", "--outdir=" + gbdir, "--format=gb"])
rec = SeqIO.parse(gbfile, "gb").next()
ph, keywords = get_phase(rec)
return ph, len(rec)
示例2: install
# 需要导入模块: from jcvi.formats.fasta import SeqIO [as 别名]
# 或者: from jcvi.formats.fasta.SeqIO import parse [as 别名]
def install(args):
"""
%prog install patchers.bed patchers.fasta backbone.fasta alt.fasta
Install patches into backbone, using sequences from alternative assembly.
The patches sequences are generated via jcvi.assembly.patch.fill().
The output is a bedfile that can be converted to AGP using
jcvi.formats.agp.frombed().
"""
from jcvi.apps.align import blast
from jcvi.formats.fasta import SeqIO
p = OptionParser(install.__doc__)
p.set_rclip(rclip=1)
p.add_option("--maxsize", default=300000, type="int",
help="Maximum size of patchers to be replaced [default: %default]")
p.add_option("--prefix", help="Prefix of the new object [default: %default]")
p.add_option("--strict", default=False, action="store_true",
help="Only update if replacement has no gaps [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
pbed, pfasta, bbfasta, altfasta = args
maxsize = opts.maxsize # Max DNA size to replace gap
rclip = opts.rclip
blastfile = blast([altfasta, pfasta,"--wordsize=100", "--pctid=99"])
order = Bed(pbed).order
beforebed, afterbed = blast_to_twobeds(blastfile, order, rclip=rclip,
maxsize=maxsize)
beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True)
afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True)
# Exclude the replacements that contain more Ns than before
ah = SeqIO.parse(beforefasta, "fasta")
bh = SeqIO.parse(afterfasta, "fasta")
count_Ns = lambda x: x.seq.count('n') + x.seq.count('N')
exclude = set()
for arec, brec in zip(ah, bh):
an = count_Ns(arec)
bn = count_Ns(brec)
if opts.strict:
if bn == 0:
continue
elif bn < an:
continue
id = arec.id
exclude.add(id)
logging.debug("Ignore {0} updates because of decreasing quality."\
.format(len(exclude)))
abed = Bed(beforebed, sorted=False)
bbed = Bed(afterbed, sorted=False)
abed = [x for x in abed if x.accn not in exclude]
bbed = [x for x in bbed if x.accn not in exclude]
abedfile = "before.filtered.bed"
bbedfile = "after.filtered.bed"
afbed = Bed()
afbed.extend(abed)
bfbed = Bed()
bfbed.extend(bbed)
afbed.print_to_file(abedfile)
bfbed.print_to_file(bbedfile)
shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
示例3: weblogo
# 需要导入模块: from jcvi.formats.fasta import SeqIO [as 别名]
# 或者: from jcvi.formats.fasta.SeqIO import parse [as 别名]
def weblogo(args):
"""
%prog weblogo [fastafile|fastqfile]
Extract base composition for reads
"""
import numpy as np
from jcvi.utils.progressbar import ProgressBar, Percentage, Bar, ETA
p = OptionParser(weblogo.__doc__)
p.add_option("-N", default=10, type="int",
help="Count the first and last N bases")
p.add_option("--nreads", default=1000000, type="int",
help="Parse first N reads")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
fastqfile, = args
N = opts.N
nreads = opts.nreads
pat = "ATCG"
L = np.zeros((4, N), dtype="int32")
R = np.zeros((4, N), dtype="int32")
p = dict((a, i) for (i, a) in enumerate(pat))
L4, R3 = Counter(), Counter()
widgets = ['Parse reads: ', Percentage(), ' ',
Bar(marker='>', left='[', right=']'), ' ', ETA()]
pr = ProgressBar(maxval=nreads, term_width=60, widgets=widgets).start()
k = 0
fw_L = open("L.fasta", "w")
fw_R = open("R.fasta", "w")
fastq = fastqfile.endswith(".fastq")
it = iter_fastq(fastqfile) if fastq else \
SeqIO.parse(must_open(fastqfile), "fasta")
for rec in it:
k += 1
if k % 1000 == 0:
pr.update(k)
if k > nreads:
break
if rec is None:
break
s = str(rec.seq)
for i, a in enumerate(s[:N]):
if a in p:
a = p[a]
L[a][i] += 1
for j, a in enumerate(s[-N:][::-1]):
if a in p:
a = p[a]
R[a][N - 1 - j] += 1
l4, r3 = s[:4], s[-3:]
L4[l4] += 1
R3[r3] += 1
print >> fw_L, ">{0}\n{1}".format(k, s[:N])
print >> fw_R, ">{0}\n{1}".format(k, s[-N:])
fw_L.close()
fw_R.close()
cmd = "weblogo -F png -s large -f {0}.fasta -o {0}.png"
cmd += " --color-scheme classic --composition none -U probability"
cmd += " --title {1}"
sh(cmd.format('L', "First_10_bases"))
sh(cmd.format('R', "Last_10_bases"))
np.savetxt("L.{0}.csv".format(pat), L, delimiter=',', fmt="%d")
np.savetxt("R.{0}.csv".format(pat), R, delimiter=',', fmt="%d")
fw = open("L4.common", "w")
for p, c in L4.most_common(N):
print >> fw, "\t".join((p, str(c)))
fw.close()
fw = open("R3.common", "w")
for p, c in R3.most_common(N):
print >> fw, "\t".join((p, str(c)))
fw.close()