当前位置: 首页>>代码示例>>Python>>正文


Python Fastq.iterate_convert方法代码示例

本文整理汇总了Python中CGAT.Fastq.iterate_convert方法的典型用法代码示例。如果您正苦于以下问题:Python Fastq.iterate_convert方法的具体用法?Python Fastq.iterate_convert怎么用?Python Fastq.iterate_convert使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CGAT.Fastq的用法示例。


在下文中一共展示了Fastq.iterate_convert方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--change-format", dest="change_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer'),
                      help="guess quality score format and set quality scores to format [default=%default].")

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer'),
                      help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option("--pattern", dest="pattern", type="string",
                      help="filename prefix [default=%default].")

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        pattern="%s.gz"
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    c = E.Counter()

    outfile_seq = IOTools.openFile(options.pattern % "csfasta", "w")
    outfile_qual = IOTools.openFile(options.pattern % "qual", "w")

    if options.change_format:
        iter = Fastq.iterate_convert(options.stdin,
                                     format=options.change_format,
                                     guess=options.guess_format)
    else:
        iter = Fastq.iterate(options.stdin)

    for record in iter:
        c.input += 1
        outfile_seq.write(">%s\n%s\n" % (record.identifier, record.seq))
        outfile_qual.write(">%s\n%s\n" % (record.identifier, record.quals))
        c.output += 1

    outfile_seq.close()
    outfile_qual.close()

    # write footer and output benchmark information.
    E.info("%s" % str(c))
    E.Stop()
开发者ID:Charlie-George,项目名称:cgat,代码行数:59,代码来源:fastq2solid.py

示例2: main

# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--change-format", dest="change_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
                      help="guess quality score format and set quality scores to format [default=%default].")

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
                      help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option("--sample", dest="sample", type="float",
                      help="sample a proportion of reads [default=%default].")

    parser.add_option("--pair", dest="pair", type="string",
                      help="if data is paired, filename with second pair. "
                      "Implemented for sampling [default=%default].")

    parser.add_option("--outfile-pair", dest="outfile_pair", type="string",
                      help="if data is paired, filename for second pair. "
                      "Implemented for sampling [default=%default].")

    parser.add_option("--uniq", dest="uniq", action="store_true",
                      help="remove duplicate reads (by name) [default=%default].")

    parser.add_option("--apply", dest="apply", type="string",
                      help="apply a filter to fastq file (taking only reads in filename) [default=%default].")

    parser.add_option("--trim3", dest="trim3", type="int",
                      help="trim # bases from 3' end [default=%default].")

    parser.add_option("--sort", dest="sort", action="store_true",
                      help="sort fastq by sequence id [default=%default].")

    parser.add_option("--seed", dest="seed", type="int",
                      help="seed for random number generator [default=%default].")

    parser.add_option("--renumber-ids", dest="renumber_ids", type="string",
                      help="rename reads in file by pattern [default=%default]")

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        sample=None,
        trim3=None,
        pair=None,
        apply=None,
        uniq=False,
        outfile_pair=None,
        sort=None,
        seed=None,
        renumber_ids=None)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    c = E.Counter()

    if options.change_format:
        for record in Fastq.iterate_convert(options.stdin,
                                            format=options.change_format,
                                            guess=options.guess_format):
            c.input += 1
            options.stdout.write("%s\n" % record)
            c.output += 1

    elif options.sample:
        sample_threshold = min(1.0, options.sample)

        random.seed(options.seed)

        if options.pair:
            if not options.outfile_pair:
                raise ValueError(
                    "please specify output filename for second pair (--outfile-pair)")

            outfile1 = options.stdout
            outfile2 = IOTools.openFile(options.outfile_pair, "w")

            for record1, record2 in itertools.izip(Fastq.iterate(options.stdin), Fastq.iterate(IOTools.openFile(options.pair))):
                c.input += 1
                if random.random() <= sample_threshold:
                    c.output += 1
                    outfile1.write("%s\n" % record1)
                    outfile2.write("%s\n" % record2)

        for record in Fastq.iterate(options.stdin):
            c.input += 1
            if random.random() <= sample_threshold:
                c.output += 1
#.........这里部分代码省略.........
开发者ID:Charlie-George,项目名称:cgat,代码行数:103,代码来源:fastq2fastq.py

示例3: main

# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=(
                          "apply",
                          "change-format",
                          "renumber-reads",
                          "sample",
                          "sort",
                          "trim3",
                          "trim5",
                          "unique",
                          "grep"),
                      help="method to apply [%default]")

    parser.add_option(
        "--target-format", dest="target_format", type="choice",
        choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
        help="guess quality score format and set quality scores "
        "to format [default=%default].")

    parser.add_option(
        "--guess-format", dest="guess_format", type="choice",
        choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
        help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option(
        "--sample-size", dest="sample_size", type="float",
        help="proportion of reads to sample. "
        "Provide a proportion of reads to sample, e.g. 0.1 for 10%, "
        "0.5 for 50%, etc [default=%default].")

    parser.add_option(
        "--pair-fastq-file", dest="pair", type="string",
        help="if data is paired, filename with second pair. "
        "Implemented for sampling [default=%default].")

    parser.add_option(
        "--map-tsv-file", dest="map_tsv_file", type="string",
        help="filename with tab-separated identifiers mapping for "
        "method apply [default=%default].")

    parser.add_option(
        "--num-bases", dest="nbases", type="int",
        help="number of bases to trim [default=%default].")

    parser.add_option(
        "--seed", dest="seed", type="int",
        help="seed for random number generator [default=%default].")

    parser.add_option(
        "--pattern-identifier", dest="renumber_pattern", type="string",
        help="rename reads in file by pattern [default=%default]")

    parser.add_option(
        "--grep-pattern", dest="grep_pattern", type="string",
        help="subset to reads matching pattern [default=%default]")

    parser.set_defaults(
        method=None,
        change_format=None,
        guess_format=None,
        sample_size=0.1,
        nbases=0,
        pair=None,
        apply=None,
        seed=None,
        renumber_pattern="read_%010i",
        grep_pattern=".*")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    c = E.Counter()

    if options.method == "change-format":
        for record in Fastq.iterate_convert(options.stdin,
                                            format=options.target_format,
                                            guess=options.guess_format):
            c.input += 1
            options.stdout.write("%s\n" % record)
            c.output += 1

    elif options.method == "grep":
        for record in Fastq.iterate(options.stdin):
            if re.match(options.grep_pattern, record.seq):
                options.stdout.write("%s\n" % record)

    elif options.method == "sample":
#.........这里部分代码省略.........
开发者ID:Q-KIM,项目名称:cgat,代码行数:103,代码来源:fastq2fastq.py

示例4: main

# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64',
                               'illumina-1.8', 'integer'),
                      help="The default behaviour of the script is to guess \
                      the quality format of the input fastq file. The user \
                      can specify the quality format of the input file using \
                      the --format option. The script will use this format if \
                      sequences qualities are ambiguous.[default=%default].")

    parser.add_option("-f", "--change-format", dest="change_format",
                      type="choice", choices=('sanger', 'solexa', 'phred64',
                                              'illumina-1.8', 'integer'),
                      help="The script guesses the quality format of the input \
                      file and converts quality scores to the destination \
                      format unless --format is specified [default=%default].")

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        min_quality=10)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.change_format:
        iterator = Fastq.iterate_convert(options.stdin,
                                         format=options.change_format,
                                         guess=options.guess_format)
    else:
        iterator = Fastq.iterate_guess(options.stdin,
                                       guess=options.guess_format)

    min_quality = options.min_quality
    number_of_reads = 0
    number_of_bases = 0
    read_lengths = []
    read_qualities = []
    bases_below_min = 0

    for record in iterator:
        number_of_reads += 1
        quals = record.toPhred()
        length_read = len(quals)
        number_of_bases += length_read
        bases_below_min += len([x for x in quals if x < min_quality])
        read_lengths.append(length_read)
        read_qualities.append(np.mean(quals))

    mean_length = round(np.mean(read_lengths), 2)
    median_length = round(np.median(read_lengths), 2)
    mean_quality = round(np.mean(read_qualities), 2)
    median_quality = round(np.median(read_qualities), 2)

    options.stdout.write(
        "reads\tbases\tmean_length\tmedian_length\tmean_quality\tmedian_quality\tnfailed\n")

    options.stdout.write(
        "%i\t%i\t%s\t%s\t%s\t%s\t%i\n" % (number_of_reads, number_of_bases,
                                          str(mean_length),
                                          str(median_length),
                                          str(mean_quality),
                                          str(median_quality),
                                          bases_below_min))
    E.Stop()
开发者ID:Charlie-George,项目名称:cgat,代码行数:79,代码来源:fastq2summary.py

示例5: main

# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=(
                          'sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'),
                      help="The default behaviour of the script is to guess the quality format of the input fastq file. The user can specify \
                            the quality format of the input file using the --format option. The script will use this format if the \
                            sequence qualities are ambiguous.[default=%default]."  )

    parser.add_option("-f", "--change-format", dest="change_format", type="choice",
                      choices=(
                          'sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'),
                      help="The script will guess the quality format of the input file and convert \
                            quality scores to the destination format unless --format is specified [default=%default]."  )

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        min_quality=10,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    c = E.Counter()

    if options.change_format:
        iterator = Fastq.iterate_convert(options.stdin,
                                         format=options.change_format,
                                         guess=options.guess_format)
    else:
        iterator = Fastq.iterate_guess(options.stdin,
                                       guess=options.guess_format)

    options.stdout.write("read\tnfailed\tnN\t%s\n" %
                         ("\t".join(Stats.Summary().getHeaders())))

    min_quality = options.min_quality

    for record in iterator:
        c.input += 1
        quals = record.toPhred()
        nfailed = len([x for x in quals if x < min_quality])
        nns = record.seq.count("N") + record.seq.count(".")
        options.stdout.write("%s\t%i\t%i\t%s\n" % (record.identifier,
                                                   nfailed,
                                                   nns,
                                                   str(Stats.Summary(quals))
                                                   ))
        c.output += 1

    # write footer and output benchmark information.
    E.info("%s" % str(c))
    E.Stop()
开发者ID:lesheng,项目名称:cgat,代码行数:67,代码来源:fastq2table.py


注:本文中的CGAT.Fastq.iterate_convert方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。