当前位置: 首页>>代码示例>>Python>>正文


Python fasta.Fasta类代码示例

本文整理汇总了Python中jcvi.formats.fasta.Fasta的典型用法代码示例。如果您正苦于以下问题:Python Fasta类的具体用法?Python Fasta怎么用?Python Fasta使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Fasta类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(arg):
    f = Fasta(arg)
    s = [str(x.seq) for k, x in f.iteritems_ordered()]
    m = s[0]
    for z in s[1:]:
        m = m.replace(z, "")
    print Seq(m).translate().strip("*")
开发者ID:tanghaibao,项目名称:online-judge,代码行数:7,代码来源:discont.py

示例2: digest

def digest(args):
    """
    %prog digest fastafile NspI,BfuCI

    Digest fasta sequences to map restriction site positions.
    """
    p = OptionParser(digest.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, enzymes = args
    enzymes = enzymes.split(",")
    enzymes = [x for x in AllEnzymes if str(x) in enzymes]
    f = Fasta(fastafile, lazy=True)
    fw = must_open(opts.outfile, "w")

    header = ["Contig", "Length"] + [str(x) for x in enzymes]
    print("\t".join(header), file=fw)
    for name, rec in f.iteritems_ordered():
        row = [name, len(rec)]
        for e in enzymes:
            pos = e.search(rec.seq)
            pos = "na" if not pos else "|".join(str(x) for x in pos)
            row.append(pos)
        print("\t".join(str(x) for x in row), file=fw)
开发者ID:tanghaibao,项目名称:jcvi,代码行数:28,代码来源:restriction.py

示例3: flip

def flip(args):
    """
    %prog flip fastafile

    Go through each FASTA record, check against Genbank file and determines
    whether or not to flip the sequence. This is useful before updates of the
    sequences to make sure the same orientation is used.
    """
    p = OptionParser(flip.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    outfastafile = fastafile.rsplit(".", 1)[0] + ".flipped.fasta"
    fo = open(outfastafile, "w")
    f = Fasta(fastafile, lazy=True)
    for name, rec in f.iteritems_ordered():
        tmpfasta = "a.fasta"
        fw = open(tmpfasta, "w")
        SeqIO.write([rec], fw, "fasta")
        fw.close()

        o = overlap([tmpfasta, name])
        if o.orientation == '-':
            rec.seq = rec.seq.reverse_complement()

        SeqIO.write([rec], fo, "fasta")
        os.remove(tmpfasta)
开发者ID:Hensonmw,项目名称:jcvi,代码行数:30,代码来源:goldenpath.py

示例4: score

def score(args):
    """
    %prog score blastfile query.fasta A.ids

    Add up the scores for each query seq. Go through the lines and for each
    query sequence, add up the scores when subject is in each pile by A.ids.
    """
    from jcvi.formats.base import SetFile
    from jcvi.formats.fasta import Fasta

    p = OptionParser(score.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    blastfile, fastafile, idsfile = args
    ids = SetFile(idsfile)

    blast = Blast(blastfile)
    scores = defaultdict(int)
    for b in blast:
        query = b.query
        subject = b.subject
        if subject not in ids:
            continue
        scores[query] += b.score

    logging.debug("A total of {0} ids loaded.".format(len(ids)))

    f = Fasta(fastafile)
    for s in f.iterkeys_ordered():
        sc = scores.get(s, 0)
        print "\t".join((s, str(sc)))
开发者ID:ascendo,项目名称:jcvi,代码行数:34,代码来源:blast.py

示例5: count

def count(args):
    """
    %prog count cdhit.consensus.fasta

    Scan the headers for the consensus clusters and count the number of reads.
    """
    from jcvi.formats.fasta import Fasta
    from jcvi.graphics.histogram import stem_leaf_plot
    from jcvi.utils.cbook import SummaryStats

    p = OptionParser(count.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    f = Fasta(fastafile, lazy=True)
    sizes = []
    for desc, rec in f.iterdescriptions_ordered():
        if desc.startswith("singleton"):
            sizes.append(1)
            continue
        # consensus_for_cluster_0 with 63 sequences
        name, w, size, seqs = desc.split()
        assert w == "with"
        sizes.append(int(size))

    s = SummaryStats(sizes)
    print >> sys.stderr, s
    stem_leaf_plot(s.data, 0, 100, 20, title="Cluster size")
开发者ID:zachary-zzc,项目名称:jcvi,代码行数:31,代码来源:tgbs.py

示例6: main

def main(arg):
    f = Fasta(arg)
    G = {}
    iG = set()
    for a in f.keys():
        for b in f.keys():
            if a == b:
                continue

            ov = get_overlap(a, b, f)
            if not ov:
                continue
            a, b, i = ov
            G[a] = (a, b, i)
            iG.add(b)

    # linearize graph
    start = set(f.keys()) - iG
    assert len(start) == 1
    z = list(start)[0]
    seq = str(f[z].seq)

    while z in G:
        a, b, i = G[z]
        seq = seq[:-i] + str(f[b].seq)
        z = b
    print seq
开发者ID:tanghaibao,项目名称:online-judge,代码行数:27,代码来源:superstring.py

示例7: fpkm

def fpkm(args):
    """
    %prog fpkm fastafile *.bam

    Calculate FPKM values from BAM file.
    """
    p = OptionParser(fpkm.__doc__)
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    fastafile = args[0]
    bamfiles = args[1:]
    # Create a DUMMY gff file for cuffdiff
    gffile = fastafile.rsplit(".", 1)[0] + ".gff"
    if need_update(fastafile, gffile):
        fw = open(gffile, "w")
        f = Fasta(fastafile, lazy=True)
        for key, size in f.itersizes_ordered():
            print >> fw, "\t".join(str(x) for x in (key, "dummy", "transcript",\
                1, size, ".", ".", ".", "ID=" + key))
        fw.close()
        logging.debug("Dummy GFF created: {0}".format(gffile))

    cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles))
    sh(cmd)
开发者ID:arvin580,项目名称:jcvi,代码行数:27,代码来源:sam.py

示例8: filter

def filter(args):
    """
    %prog filter consensus.fasta

    Filter consensus sequence with min cluster size.
    """
    from jcvi.formats.fasta import Fasta, SeqIO

    p = OptionParser(filter.__doc__)
    p.add_option("--minsize", default=10, type="int",
                 help="Minimum cluster size")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastafile, = args
    minsize = opts.minsize
    f = Fasta(fastafile, lazy=True)
    fw = must_open(opts.outfile, "w")
    for desc, rec in f.iterdescriptions_ordered():
        if desc.startswith("singleton"):
            continue
        # consensus_for_cluster_0 with 63 sequences
        name, w, size, seqs = desc.split()
        assert w == "with"
        size = int(size)
        if size < minsize:
            continue
        SeqIO.write(rec, fw, "fasta")
开发者ID:kvefimov,项目名称:jcvi_062915,代码行数:31,代码来源:cdhit.py

示例9: main

def main(arg):
    f = Fasta(arg)
    for a in f.keys():
        for b in f.keys():
            if a == b:
                continue
            if check_overlap(a, b, f):
                print a, b
开发者ID:tanghaibao,项目名称:online-judge,代码行数:8,代码来源:overlapgraph.py

示例10: main

def main(filename):
    f = Fasta(filename)
    gc_store = []
    for key, rec in f.iteritems():
        gc = sum(rec.seq.count(x) for x in 'GCgc') * 100. / len(rec.seq)
        gc_store.append((gc, key))
    gc, key = max(gc_store)
    print key
    print gc
开发者ID:tanghaibao,项目名称:online-judge,代码行数:9,代码来源:gc.py

示例11: A50

def A50(args):
    """
    %prog A50 contigs_A.fasta contigs_B.fasta ...

    Plots A50 graphics, see blog post (http://blog.malde.org/index.php/a50/)
    """
    p = OptionParser(A50.__doc__)
    p.add_option("--overwrite", default=False, action="store_true",
            help="overwrite .rplot file if exists [default: %default]")
    p.add_option("--cutoff", default=0, type="int", dest="cutoff",
            help="use contigs above certain size [default: %default]")
    p.add_option("--stepsize", default=10, type="int", dest="stepsize",
            help="stepsize for the distribution [default: %default]")
    opts, args = p.parse_args(args)

    if not args:
        sys.exit(p.print_help())

    import numpy as np
    from jcvi.utils.table import loadtable

    stepsize = opts.stepsize  # use stepsize to speed up drawing
    rplot = "A50.rplot"
    if not op.exists(rplot) or opts.overwrite:
        fw = open(rplot, "w")
        header = "\t".join(("index", "cumsize", "fasta"))
        statsheader = ("Fasta", "L50", "N50", "Min", "Max", "Average", "Sum",
                "Counts")
        statsrows = []
        print >>fw, header
        for fastafile in args:
            f = Fasta(fastafile, index=False)
            ctgsizes = [length for k, length in f.itersizes()]
            ctgsizes = np.array(ctgsizes)

            a50, l50, n50 = calculate_A50(ctgsizes, cutoff=opts.cutoff)
            cmin, cmax, cmean = min(ctgsizes), max(ctgsizes), np.mean(ctgsizes)
            csum, counts = np.sum(ctgsizes), len(ctgsizes)
            cmean = int(round(cmean))
            statsrows.append((fastafile, l50, n50, cmin, cmax, cmean, csum,
                counts))

            logging.debug("`{0}` ctgsizes: {1}".format(fastafile, ctgsizes))

            tag = "{0} (L50={1})".format(\
                    op.basename(fastafile).rsplit(".", 1)[0], l50)
            logging.debug(tag)

            for i, s in zip(xrange(0, len(a50), stepsize), a50[::stepsize]):
                print >> fw, "\t".join((str(i), str(s / 1000000.), tag))
        fw.close()

        table = loadtable(statsheader, statsrows)
        print >> sys.stderr, table

    generate_plot(rplot)
开发者ID:JinfengChen,项目名称:jcvi,代码行数:56,代码来源:assembly.py

示例12: main

def main(arg):
    f = Fasta(arg)
    key, rev = f.iteritems().next()
    s = rev.seq
    for i in xrange(len(s)):
        for l in xrange(4, 13):
            if i + l > len(s):
                continue
            ns = s[i:i+l]
            if str(ns) == str(ns.reverse_complement()):
                print i + 1, l
开发者ID:tanghaibao,项目名称:online-judge,代码行数:11,代码来源:revpalind.py

示例13: get_GC3

def get_GC3(cdsfile):
    from jcvi.formats.fasta import Fasta

    f = Fasta(cdsfile, lazy=True)
    GC3 = {}
    for name, rec in f.iteritems_ordered():
        positions = rec.seq[2::3].upper()
        gc_counts = sum(1 for x in positions if x in "GC")
        gc_ratio = gc_counts * 1. / len(positions)
        GC3[name] = gc_ratio

    return GC3
开发者ID:bennyyu,项目名称:jcvi,代码行数:12,代码来源:ks.py

示例14: main

def main(arg):
    f = Fasta(arg)
    key, rec = f.iteritems().next()
    s = rec.seq
    store = defaultdict(int)
    for i in xrange(len(s) - 3):
        kmer = s[i:i+4]
        assert len(kmer) == 4
        store[kmer] += 1

    counts = [store.get("".join(x), 0) for x in product("ACGT", repeat=4)]
    print " ".join(str(x) for x in counts)
开发者ID:tanghaibao,项目名称:online-judge,代码行数:12,代码来源:kmercount.py

示例15: n50

def n50(args):
    """
    %prog n50 filename

    Given a file with a list of numbers denoting contig lengths, calculate N50.
    Input file can be both FASTA or a list of sizes.
    """
    from jcvi.graphics.histogram import loghistogram

    p = OptionParser(n50.__doc__)
    p.add_option(
        "--print0", default=False, action="store_true", help="Print size and L50 to stdout [default: %default]"
    )

    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    ctgsizes = []

    # Guess file format
    probe = open(args[0]).readline()[0]
    isFasta = probe == ">"
    if isFasta:
        for filename in args:
            f = Fasta(filename)
            ctgsizes += list(b for a, b in f.itersizes())

    else:
        for row in must_open(args):
            try:
                ctgsize = int(row.split()[-1])
            except ValueError:
                continue
            ctgsizes.append(ctgsize)

    a50, l50, nn50 = calculate_A50(ctgsizes)
    sumsize = sum(ctgsizes)
    minsize = min(ctgsizes)
    maxsize = max(ctgsizes)
    n = len(ctgsizes)
    print >> sys.stderr, ", ".join(args)

    summary = (sumsize, l50, nn50, minsize, maxsize, n)
    print >> sys.stderr, " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary))
    loghistogram(ctgsizes)

    if opts.print0:
        print "\t".join(str(x) for x in (",".join(args), sumsize, l50))

    return zip(header, summary)
开发者ID:rrane,项目名称:jcvi,代码行数:52,代码来源:base.py


注:本文中的jcvi.formats.fasta.Fasta类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。