当前位置: 首页>>代码示例>>Python>>正文


Python Sizes.keys方法代码示例

本文整理汇总了Python中jcvi.formats.sizes.Sizes.keys方法的典型用法代码示例。如果您正苦于以下问题:Python Sizes.keys方法的具体用法?Python Sizes.keys怎么用?Python Sizes.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在jcvi.formats.sizes.Sizes的用法示例。


在下文中一共展示了Sizes.keys方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: agp

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def agp(args):
    """
    %prog agp main_results/ contigs.fasta

    Generate AGP file based on LACHESIS output.
    """
    p = OptionParser(agp.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    odir, contigsfasta = args
    fwagp = must_open(opts.outfile, 'w')
    orderingfiles = natsorted(iglob(odir, "*.ordering"))
    sizes = Sizes(contigsfasta).mapping
    contigs = set(sizes.keys())
    anchored = set()

    for ofile in orderingfiles:
        co = ContigOrdering(ofile)
        anchored |= set([x.contig_name for x in co])
        obj = op.basename(ofile).split('.')[0]
        co.write_agp(obj, sizes, fwagp)

    singletons = contigs - anchored
    logging.debug('Anchored: {}, Singletons: {}'.
                  format(len(anchored), len(singletons)))

    for s in natsorted(singletons):
        order_to_agp(s, [(s, "?")], sizes, fwagp)
开发者ID:xuanblo,项目名称:jcvi,代码行数:34,代码来源:hic.py

示例2: write_unplaced_agp

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def write_unplaced_agp(agpfile, scaffolds, unplaced_agp):
    agp = AGP(agpfile)
    scaffolds_seen = set(x.component_id for x in agp)
    sizes = Sizes(scaffolds).mapping
    fwagp = must_open(unplaced_agp, "w")
    for s in sorted(sizes.keys()):
        if s in scaffolds_seen:
            continue
        order_to_agp(s, [(s, "?")], sizes, fwagp)
    logging.debug("Write unplaced AGP to `{0}`.".format(unplaced_agp))
开发者ID:yangjl,项目名称:jcvi,代码行数:12,代码来源:allmaps.py

示例3: main

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def main(args):
    """
    %prog deltafile

    Plot one query. Extract the references that have major matches to this
    query. Control "major" by option --refcov.
    """
    p = OptionParser(main.__doc__)
    p.add_option("--refids", help="Use subset of contigs in the ref")
    p.add_option("--refcov", default=.01, type="float",
                 help="Minimum reference coverage [default: %default]")
    p.add_option("--all", default=False, action="store_true",
                 help="Plot one pdf file per ref in refidsfile [default: %default]")
    p.add_option("--color", default="similarity",
                 choices=("similarity", "direction", "none"),
                 help="Color the dots based on")
    p.add_option("--nolayout", default=False, action="store_true",
                 help="Do not rearrange contigs")
    p.set_align(pctid=0, hitlen=0)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    deltafile, = args
    reffasta, queryfasta = open(deltafile).readline().split()
    color = opts.color
    layout = not opts.nolayout
    prefix = op.basename(deltafile).split(".")[0]
    qsizes = Sizes(queryfasta).mapping
    rsizes = Sizes(reffasta).mapping

    refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys())
    refcov = opts.refcov
    pctid = opts.pctid
    hitlen = opts.hitlen
    deltafile = filter([deltafile, "--pctid={0}".format(pctid),
                        "--hitlen={0}".format(hitlen)])

    if opts.all:
        for r in refs:
            pdffile = plot_some_queries([r], qsizes, rsizes, deltafile, refcov,
                                        prefix=prefix, color=color,
                                        layout=layout)
            if pdffile:
                sh("mv {0} {1}.pdf".format(pdffile, r))
    else:
        plot_some_queries(refs, qsizes, rsizes,
                          deltafile, refcov,
                          prefix=prefix, color=color, layout=layout)
开发者ID:Hensonmw,项目名称:jcvi,代码行数:52,代码来源:mummerplot.py

示例4: covfilter

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def covfilter(args):
    """
    %prog covfilter blastfile fastafile

    Fastafile is used to get the sizes of the queries. Two filters can be
    applied, the id% and cov%.
    """
    p = OptionParser(covfilter.__doc__)
    p.add_option("--pctid", dest="pctid", default=90, type="int",
            help="Percentage identity cutoff [default: %default]")
    p.add_option("--pctcov", dest="pctcov", default=50, type="int",
            help="Percentage identity cutoff [default: %default]")
    p.add_option("--ids", dest="ids", default=None,
            help="Print out the ids that satisfy [default: %default]")
    p.add_option("--list", dest="list", default=False, action="store_true",
            help="List the id% and cov% per gene [default: %default]")
    set_outfile(p, outfile=None)

    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    from jcvi.algorithms.supermap import supermap

    blastfile, fastafile = args
    sizes = Sizes(fastafile).mapping
    querysupermap = blastfile + ".query.supermap"
    if not op.exists(querysupermap):
        supermap(blastfile, filter="query")

    blastfile = querysupermap
    assert op.exists(blastfile)

    covered = 0
    mismatches = 0
    gaps = 0
    alignlen = 0
    queries = set()
    valid = set()
    blast = BlastSlow(querysupermap)
    for query, blines in blast.iter_hits():
        blines = list(blines)
        queries.add(query)

        # per gene report
        this_covered = 0
        this_alignlen = 0
        this_mismatches = 0
        this_gaps = 0

        for b in blines:
            this_covered += abs(b.qstart - b.qstop + 1)
            this_alignlen += b.hitlen
            this_mismatches += b.nmismatch
            this_gaps += b.ngaps

        this_identity = 100. - (this_mismatches + this_gaps) * 100. / this_alignlen
        this_coverage = this_covered * 100. / sizes[query]

        if opts.list:
            print "{0}\t{1:.1f}\t{2:.1f}".format(query, this_identity, this_coverage)

        if this_identity >= opts.pctid and this_coverage >= opts.pctcov:
            valid.add(query)

        covered += this_covered
        mismatches += this_mismatches
        gaps += this_gaps
        alignlen += this_alignlen

    mapped_count = len(queries)
    valid_count = len(valid)
    cutoff_message = "(id={0.pctid}% cov={0.pctcov}%)".format(opts)

    print >> sys.stderr, "Identity: {0} mismatches, {1} gaps, {2} alignlen".\
            format(mismatches, gaps, alignlen)
    total = len(sizes.keys())
    print >> sys.stderr, "Total mapped: {0} ({1:.1f}% of {2})".\
            format(mapped_count, mapped_count * 100. / total, total)
    print >> sys.stderr, "Total valid {0}: {1} ({2:.1f}% of {3})".\
            format(cutoff_message, valid_count, valid_count * 100. / total, total)
    print >> sys.stderr, "Average id = {0:.2f}%".\
            format(100 - (mismatches + gaps) * 100. / alignlen)

    queries_combined = sum(sizes[x] for x in queries)
    print >> sys.stderr, "Coverage: {0} covered, {1} total".\
            format(covered, queries_combined)
    print >> sys.stderr, "Average coverage = {0:.2f}%".\
            format(covered * 100. / queries_combined)

    if opts.ids:
        filename = opts.ids
        fw = must_open(filename, "w")
        for id in valid:
            print >> fw, id
        logging.debug("Queries beyond cutoffs {0} written to `{1}`.".\
                format(cutoff_message, filename))

    outfile = opts.outfile
#.........这里部分代码省略.........
开发者ID:bennyyu,项目名称:jcvi,代码行数:103,代码来源:blast.py

示例5: scaffold

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def scaffold(args):
    """
    %prog scaffold ctgfasta linksfile

    Use the linksfile to build scaffolds. The linksfile can be
    generated by calling assembly.bundle.link() or assembly.bundle.bundle().
    Use --prefix to place the sequences with same prefix together. The final
    product is an AGP file.
    """
    from jcvi.algorithms.graph import nx
    from jcvi.formats.agp import order_to_agp

    p = OptionParser(scaffold.__doc__)
    p.add_option("--prefix", default=False, action="store_true",
            help="Keep IDs with same prefix together [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    ctgfasta, linksfile = args
    sizes = Sizes(ctgfasta).mapping
    logfile = "scaffold.log"
    fwlog = open(logfile, "w")

    pf = ctgfasta.rsplit(".", 1)[0]
    agpfile = pf + ".agp"
    fwagp = open(agpfile, "w")

    clinks = []
    g = nx.MultiGraph()  # use this to get connected components

    fp = open(linksfile)
    for row in fp:
        c = LinkLine(row)
        distance = max(c.distance, 50)

        g.add_edge(c.aseqid, c.bseqid,
                orientation=c.orientation, distance=distance)

    def get_bname(sname, prefix=False):
        return sname.rsplit("_", 1)[0] if prefix else "chr0"

    scaffoldbuckets = defaultdict(list)
    seqnames = sorted(sizes.keys())

    for h in nx.connected_component_subgraphs(g):
        partialorder = solve_component(h, sizes, fwlog)
        name = partialorder[0][0]
        bname = get_bname(name, prefix=opts.prefix)
        scaffoldbuckets[bname].append(partialorder)

    ctgbuckets = defaultdict(set)
    for name in seqnames:
        bname = get_bname(name, prefix=opts.prefix)
        ctgbuckets[bname].add(name)

    # Now the buckets contain a mixture of singletons and partially resolved
    # scaffolds. Print the scaffolds first then remaining singletons.
    scafname = "{0}.scf_{1:04d}"
    for bname, ctgs in sorted(ctgbuckets.items()):
        scaffolds = scaffoldbuckets[bname]
        scaffolded = set()
        ctgorder = []
        for scafID, scaf in enumerate(scaffolds):
            ctgorder = []
            for node, start, end, orientation in scaf:
                ctgorder.append((node, orientation))
                scaffolded.add(node)
            scaf = scafname.format(bname, scafID)
            order_to_agp(scaf, ctgorder, sizes, fwagp)
        singletons = sorted(ctgbuckets[bname] - scaffolded)
        nscaffolds = len(scaffolds)
        nsingletons = len(singletons)

        msg = "{0}: Scaffolds={1} Singletons={2}".\
            format(bname, nscaffolds, nsingletons)
        print >> sys.stderr, msg

        for singleton in singletons:
            ctgorder = [(singleton, "+")]
            order_to_agp(singleton, ctgorder, sizes, fwagp)

    fwagp.close()
    logging.debug("AGP file written to `{0}`.".format(agpfile))
开发者ID:bennyyu,项目名称:jcvi,代码行数:87,代码来源:sopra.py

示例6: scaffold

# 需要导入模块: from jcvi.formats.sizes import Sizes [as 别名]
# 或者: from jcvi.formats.sizes.Sizes import keys [as 别名]
def scaffold(args):
    """
    %prog scaffold ctgfasta agpfile

    Build scaffolds based on ordering in the AGP file.
    """
    from jcvi.formats.agp import AGP, bed, order_to_agp, build
    from jcvi.formats.bed import Bed

    p = OptionParser(scaffold.__doc__)
    p.add_option("--prefix", default=False, action="store_true",
            help="Keep IDs with same prefix together [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    ctgfasta, agpfile = args
    sizes = Sizes(ctgfasta).mapping

    pf = ctgfasta.rsplit(".", 1)[0]
    phasefile = pf + ".phases"
    fwphase = open(phasefile, "w")
    newagpfile = pf + ".new.agp"
    fwagp = open(newagpfile, "w")

    scaffoldbuckets = defaultdict(list)
    seqnames = sorted(sizes.keys())

    bedfile = bed([agpfile, "--nogaps", "--outfile=tmp"])
    bb = Bed(bedfile)
    for s, partialorder in bb.sub_beds():
        name = partialorder[0].accn
        bname = name.rsplit("_", 1)[0] if opts.prefix else s
        scaffoldbuckets[bname].append([(b.accn, b.strand) for b in partialorder])

    # Now the buckets contain a mixture of singletons and partially resolved
    # scaffolds. Print the scaffolds first then remaining singletons.
    for bname, scaffolds in sorted(scaffoldbuckets.items()):
        ctgorder = []
        singletons = set()
        for scaf in sorted(scaffolds):
            for node, orientation in scaf:
                ctgorder.append((node, orientation))
            if len(scaf) == 1:
                singletons.add(node)
        nscaffolds = len(scaffolds)
        nsingletons = len(singletons)
        if nsingletons == 1 and nscaffolds == 0:
            phase = 3
        elif nsingletons == 0 and nscaffolds == 1:
            phase = 2
        else:
            phase = 1

        msg = "{0}: Scaffolds={1} Singletons={2} Phase={3}".\
            format(bname, nscaffolds, nsingletons, phase)
        print >> sys.stderr, msg
        print >> fwphase, "\t".join((bname, str(phase)))

        order_to_agp(bname, ctgorder, sizes, fwagp)

    fwagp.close()
    os.remove(bedfile)

    fastafile = "final.fasta"
    build([newagpfile, ctgfasta, fastafile])
    tidy([fastafile])
开发者ID:bennyyu,项目名称:jcvi,代码行数:70,代码来源:finish.py


注:本文中的jcvi.formats.sizes.Sizes.keys方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。