当前位置: 首页>>代码示例>>Python>>正文


Python Bed.setName方法代码示例

本文整理汇总了Python中CGAT.Bed.setName方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.setName方法的具体用法?Python Bed.setName怎么用?Python Bed.setName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CGAT.Bed的用法示例。


在下文中一共展示了Bed.setName方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: exportSequencesFromBedFile

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def exportSequencesFromBedFile( infile, outfile, masker = None, mode = "intervals" ):
    '''export sequences for intervals in :term:`bed`-formatted *infile* 
    to :term:`fasta` formatted *outfile*
    '''

    track = P.snip( infile, ".bed.gz" )

    fasta = IndexedFasta.IndexedFasta( os.path.join( PARAMS["genome_dir"], PARAMS["genome"] ) )
    outs = IOTools.openFile( outfile, "w")

    ids, seqs = [], []
    for bed in Bed.setName(Bed.iterator( IOTools.openFile(infile) )):
        lcontig = fasta.getLength( bed.contig )

        if mode == "intervals":
            seqs.append( fasta.getSequence( bed.contig, "+", bed.start, bed.end) )
            ids.append( "%s_%s %s:%i..%i" % (track, bed.name, bed.contig, bed.start, bed.end) )

        elif mode == "leftright":
            l = bed.end - bed.start

            start, end = max(0,bed.start-l), bed.end-l
            ids.append( "%s_%s_l %s:%i..%i" % (track, bed.name, bed.contig, start, end) )
            seqs.append( fasta.getSequence( bed.contig, "+", start, end) )
            
            start, end = bed.start+l, min(lcontig,bed.end+l)
            ids.append( "%s_%s_r %s:%i..%i" % (track, bed.name, bed.contig, start, end) )
            seqs.append( fasta.getSequence( bed.contig, "+", start, end) )
            
    masked = maskSequences( seqs, masker )
    outs.write("\n".join( [ ">%s\n%s" % (x,y) for x,y in zip(ids, masked) ] ) )

    outs.close()
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:35,代码来源:PipelineMotifs.py

示例2: main

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option("-g", "--genome-file", dest="genome_file", type="string",
                      help="filename with genomic sequence to retrieve "
                      "sequences from.")

    parser.add_option("-m", "--masker", dest="masker", type="choice",
                      choices=("dust", "dustmasker", "softmask", "none"),
                      help="apply masker to mask output sequences "
                      "[%default].")

    parser.add_option("--output-mode", dest="output_mode", type="choice",
                      choices=("intervals", "leftright", "segments"),
                      help="what to output. "
                      "'intervals' generates a single sequence for "
                      "each bed interval. 'leftright' generates two "
                      "sequences, one in each direction, for each bed "
                      "interval. 'segments' can be used to output "
                      "sequence from bed12 files so that sequence only covers "
                      "the segements [%default]")

    parser.add_option("--min-sequence-length", dest="min_length", type="int",
                      help="require a minimum sequence length [%default]")

    parser.add_option("--max-sequence-length", dest="max_length", type="int",
                      help="require a maximum sequence length [%default]")

    parser.add_option(
        "--extend-at", dest="extend_at", type="choice",
        choices=("none", "3", "5", "both", "3only", "5only"),
        help="extend at 3', 5' or both or no ends. If 3only or 5only "
        "are set, only the added sequence is returned [default=%default]")

    parser.add_option(
        "--extend-by", dest="extend_by", type="int",
        help="extend by # bases [default=%default]")

    parser.add_option(
        "--use-strand", dest="ignore_strand",
        action="store_false",
        help="use strand information and return reverse complement "
        "on intervals located on the negative strand. "
        "[default=%default]")

    parser.set_defaults(
        genome_file=None,
        masker=None,
        output_mode="intervals",
        min_length=0,
        max_length=0,
        extend_at=None,
        extend_by=100,
        ignore_strand=True,
    )

    (options, args) = E.Start(parser)

    if options.genome_file:
        fasta = IndexedFasta.IndexedFasta(options.genome_file)
        contigs = fasta.getContigSizes()
        fasta.setConverter(IndexedFasta.getConverter("zero-both-open"))

    counter = E.Counter()
    ids, seqs = [], []

    E.info("collecting sequences")
    for bed in Bed.setName(Bed.iterator(options.stdin)):
        counter.input += 1

        lcontig = fasta.getLength(bed.contig)

        if options.ignore_strand:
            strand = "+"
        else:
            strand = bed.strand

        if options.output_mode == "segments" and bed.columns == 12:
            ids.append("%s %s:%i..%i (%s) %s %s" %
                       (bed.name, bed.contig, bed.start, bed.end, strand,
                        bed["blockSizes"], bed["blockStarts"]))
            seg_seqs = [fasta.getSequence(bed.contig, strand, start, end)
                        for start, end in bed.toIntervals()]
            seqs.append("".join(seg_seqs))

        elif (options.output_mode == "intervals" or
              options.output_mode == "segments"):
            ids.append("%s %s:%i..%i (%s)" %
                       (bed.name, bed.contig, bed.start, bed.end, strand))
            seqs.append(
                fasta.getSequence(bed.contig, strand, bed.start, bed.end))

        elif options.output_mode == "leftright":
            l = bed.end - bed.start

#.........这里部分代码省略.........
开发者ID:CGATOxford,项目名称:cgat,代码行数:103,代码来源:bed2fasta.py

示例3: main

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import setName [as 别名]
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: gff2fasta.py 2861 2010-02-23 17:36:32Z andreas $")

    parser.add_option("-g", "--genome-file", dest="genome_file", type="string",
                      help="filename with genome.")

    parser.add_option("-m", "--masker", dest="masker", type="choice",
                      choices=("dust", "dustmasker", "softmask", "none"),
                      help="apply masker [%default].")

    parser.add_option("-o", "--mode", dest="mode", type="choice",
                      choices=("intervals", "leftright"),
                      help="what to output [%default]")

    parser.add_option("--min-length", dest="min_length", type="int",
                      help="require a minimum sequence length [%default]")

    parser.add_option("--max-length", dest="max_length", type="int",
                      help="require a maximum sequence length [%default]")

    parser.add_option("--extend-at", dest="extend_at", type="choice",
                      choices=("none", "3", "5", "both", "3only", "5only"),
                      help="extend at no, 3', 5' or both ends. If 3only or 5only are set, only the added sequence is returned [default=%default]")

    parser.add_option("--extend-by", dest="extend_by", type="int",
                      help="extend by # bases [default=%default]")

    parser.add_option("--use-strand", dest="ignore_strand", action="store_false",
                      help="use strand information and return reverse complement [default=%default]")

    parser.set_defaults(
        genome_file=None,
        masker=None,
        mode="intervals",
        min_length=0,
        max_length=0,
        extend_at=None,
        extend_by=100,
        ignore_strand=True,
    )

    (options, args) = E.Start(parser)

    if options.genome_file:
        fasta = IndexedFasta.IndexedFasta(options.genome_file)
        contigs = fasta.getContigSizes()
        fasta.setConverter(IndexedFasta.getConverter("zero-both-open"))

    counter = E.Counter()
    ids, seqs = [], []

    E.info("collecting sequences")
    for bed in Bed.setName(Bed.iterator(options.stdin)):
        counter.input += 1

        lcontig = fasta.getLength(bed.contig)

        if options.ignore_strand:
            strand = "+"
        else:
            strand = bed.strand

        if options.mode == "intervals":
            ids.append("%s %s:%i..%i (%s)" %
                       (bed.name, bed.contig, bed.start, bed.end, strand))
            seqs.append(
                fasta.getSequence(bed.contig, strand, bed.start, bed.end))

        elif options.mode == "leftright":
            l = bed.end - bed.start

            start, end = max(0, bed.start - l), bed.end - l
            ids.append("%s_l %s:%i..%i (%s)" %
                       (bed.name, bed.contig, start, end, strand))
            seqs.append(fasta.getSequence(bed.contig, strand, start, end))

            start, end = bed.start + l, min(lcontig, bed.end + l)
            ids.append("%s_r %s:%i..%i (%s)" %
                       (bed.name, bed.contig, start, end, strand))
            seqs.append(fasta.getSequence(bed.contig, strand, start, end))

    E.info("collected %i sequences" % len(seqs))

    masked = Masker.maskSequences(seqs, options.masker)
    options.stdout.write(
        "\n".join([">%s\n%s" % (x, y) for x, y in zip(ids, masked)]) + "\n")

    E.info("masked %i sequences" % len(seqs))

    counter.output = len(seqs)

    E.info("%s" % counter)

    E.Stop()
开发者ID:Charlie-George,项目名称:cgat,代码行数:100,代码来源:bed2fasta.py


注:本文中的CGAT.Bed.setName方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。