當前位置: 首頁>>代碼示例>>Python>>正文


Python Fastq.iterate方法代碼示例

本文整理匯總了Python中CGAT.Fastq.iterate方法的典型用法代碼示例。如果您正苦於以下問題:Python Fastq.iterate方法的具體用法?Python Fastq.iterate怎麽用?Python Fastq.iterate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在CGAT.Fastq的用法示例。


在下文中一共展示了Fastq.iterate方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", 
                             usage = globals()["__doc__"] )

    parser.add_option("-a", "--fastq1", dest="fastq1", type="string",
                      help="supply read1 fastq file"  )
    parser.add_option("-b", "--fastq2", dest="fastq2", type="string",
                      help="supply read2 fastq file"  )

    ## add common options (-h/--help, ...) and parse command line 
    (options, args) = E.Start( parser, argv = argv )

    fastq1 = IOTools.openFile(options.fastq1)
    fastq2 = IOTools.openFile(options.fastq2)

    E.info("iterating over fastq files")
    f1_count = 0
    for f1, f2 in itertools.izip_longest(Fastq.iterate(fastq1), Fastq.iterate(fastq2)):
        if not (f1 and f2) or (not f2 and f1):
            try:
                raise PairedReadError("unpaired reads detected. Are files sorted? are files of equal length?")
            except PairedReadError, e:
                raise PairedReadError(e), None, sys.exc_info()[2]
        else:
            assert f1.identifier.endswith("/1") and f2.identifier.endswith("/2"), "Reads in file 1 must end with /1 and reads in file 2 with /2"
            options.stdout.write(">%s\n%s\n>%s\n%s\n" % (f1.identifier, f1.seq, f2.identifier, f2.seq))
            f1_count += 1
開發者ID:BioinformaticsArchive,項目名稱:cgat,代碼行數:37,代碼來源:fastqs2fasta.py

示例2: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option(
        "-a", "--first-fastq-file", dest="fastq1", type="string",
        help="supply read1 fastq file")
    parser.add_option(
        "-b", "--second-fastq-file", dest="fastq2", type="string",
        help="supply read2 fastq file")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if args and len(args) == 2:
        options.fastq1, options.fastq2 = args

    fastq1 = IOTools.openFile(options.fastq1)
    fastq2 = IOTools.openFile(options.fastq2)

    E.info("iterating over fastq files")
    f1_count = 0
    for f1, f2 in zip_longest(Fastq.iterate(fastq1),
                              Fastq.iterate(fastq2)):
        if not (f1 and f2) or (not f2 and f1):
            try:
                raise PairedReadError(
                    "unpaired reads detected. Are files sorted? are "
                    "files of equal length?")
            except PairedReadError as e:
                raise PairedReadError(e).with_traceback(sys.exc_info()[2])
        else:
            assert f1.identifier.endswith("/1") and \
                f2.identifier.endswith("/2"), \
                "Reads in file 1 must end with /1 and reads in file 2 with /2"
            options.stdout.write(
                ">%s\n%s\n>%s\n%s\n" %
                (f1.identifier, f1.seq, f2.identifier, f2.seq))
            f1_count += 1

    E.info("output: %i pairs" % f1_count)

    # write footer and output benchmark information.
    E.Stop()
開發者ID:CGATOxford,項目名稱:cgat,代碼行數:56,代碼來源:fastqs2fasta.py

示例3: buildTrueTaxonomicRelativeAbundances

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def buildTrueTaxonomicRelativeAbundances(infiles, outfile):
    '''
    get species level relative abundances for the simulateds
    data. This involes creating maps between different identifiers
    from the NCBI taxonomy. This is so that the results are comparable
    to species level analysis from metaphlan
    '''
    levels = ["species", "genus", "family", "order", "class", "phylum"]
    taxa = open(infiles[1])
    header = taxa.readline()
    gi2taxa = collections.defaultdict(list)
    for line in taxa.readlines():
        data = line[:-1].split("\t")
        gi, strain, species, genus, family, order, _class, phylum = data[
            0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]
        gi2taxa[gi] = (species, genus, family, order, _class, phylum)

    outf = open(outfile, "w")
    outf.write("level\ttaxa\trelab\n")
    for i in range(len(levels)):
        total = 0
        result = collections.defaultdict(int)
        for fastq in Fastq.iterate(IOTools.openFile(infiles[0])):
            total += 1
            gi = fastq.identifier.split("|")[1]
            result[gi2taxa[gi][i]] += 1
        for taxa, value in result.iteritems():
            outf.write("%s\t%s\t%s\n" %
                       (levels[i], taxa, float(value) / total))
    outf.close()
開發者ID:BioXiao,項目名稱:CGATPipelines,代碼行數:32,代碼來源:PipelineMetagenomeBenchmark.py

示例4: filterReadsByPrimerMatch

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def filterReadsByPrimerMatch(infile, outfiles):
    '''Filter out reads where the start of read 1 does not match primer sequence (14bp)'''
    to_cluster = True
    primer = "a"
    if infile.find("_b.") > 0:
        primer = "b"
    if primer == "a":
        primer_seq = PARAMS["grep_primer_a"]
    else:
        primer_seq = PARAMS["grep_primer_b"]
    grep_filter_length = PARAMS["grep_filter_length"]
    primer_subseq = primer_seq[:grep_filter_length]

    track = P.snip(os.path.basename(infile), ".fastq.1.gz")
    infile2 = track + ".fastq.2.gz"
    outfile1, outfile2 = outfiles
    tempfile = "filtered/" + track + ".filtered.fastq.1.gz"

    # filter by primer match
    fastq_in = open(infile, "r")
    fastq_out = open(tempfile, "wb")
    for read in fq.iterate(fastq_in):
        if read.seq[:grep_filter_length] == primer_subseq:
            fastq_out.writeln("@" + read.id)
            fastq_out.writeln(read.seq)
            fastq_out.writeln("+")
            fastq_out.writeln(read.qual)
    fastq_in.close()
    fastq_out.close()

    # reconcile read pairs
    statement = '''python %(scriptsdir)s/fastqs2fastq.py --method=reconcile %(tempfile)s %(infile2)s --output-pattern=filtered/%(track)s.reconciled.fastq.%%i.gz'''
    P.run()
開發者ID:Charlie-George,項目名稱:cgat,代碼行數:35,代碼來源:pipeline_proj020.py

示例5: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--change-format", dest="change_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer'),
                      help="guess quality score format and set quality scores to format [default=%default].")

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer'),
                      help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option("--pattern", dest="pattern", type="string",
                      help="filename prefix [default=%default].")

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        pattern="%s.gz"
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    c = E.Counter()

    outfile_seq = IOTools.openFile(options.pattern % "csfasta", "w")
    outfile_qual = IOTools.openFile(options.pattern % "qual", "w")

    if options.change_format:
        iter = Fastq.iterate_convert(options.stdin,
                                     format=options.change_format,
                                     guess=options.guess_format)
    else:
        iter = Fastq.iterate(options.stdin)

    for record in iter:
        c.input += 1
        outfile_seq.write(">%s\n%s\n" % (record.identifier, record.seq))
        outfile_qual.write(">%s\n%s\n" % (record.identifier, record.quals))
        c.output += 1

    outfile_seq.close()
    outfile_qual.close()

    # write footer and output benchmark information.
    E.info("%s" % str(c))
    E.Stop()
開發者ID:Charlie-George,項目名稱:cgat,代碼行數:59,代碼來源:fastq2solid.py

示例6: replace

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def replace(fastqfile, baseToReplace):
    '''replaces the specified base with N'''

    # use gzip as default to open the fastq file
    outf = gzip.open("replaced_" + fastqfile, "w")
    fastq = gzip.open(fastqfile)
    iterator = Fastq.iterate(fastq)
    for record in iterator:
        x = list(record.seq)
        x[int(baseToReplace)] = "N"
        record.seq = "".join(x)
        outf.write("@" + record.identifier + "\n" + record.seq + "\n" + "+" + record.identifier + "\n" + record.quals + "\n")
開發者ID:BioinformaticsArchive,項目名稱:cgat,代碼行數:14,代碼來源:fastq2N.py

示例7: buildExpectedCoverageOverGenomes

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def buildExpectedCoverageOverGenomes(infiles, outfile):
    '''
    take sequence files and estimate the theoretical
    coverage we would get over genomes in the 
    sample i.e. at 1X coverage
    '''

    # if paired end then will have to multiply
    # by two
    multiply = False
    if infiles[0].endswith(".fastq.1.gz"):
        multiply = True

    # the theoretical coverage is defined as
    # (read length (L) * no. reads (N)) / genome size (G) (bp)

    # get genome sizes into memory
    genomes = open(infiles[1])
    header = genomes.readline()
    genome_sizes = {}
    for line in genomes.readlines():
        data = line[:-1].split("\t")
        gi = data[0].split("_")[1]
        size = data[1]
        genome_sizes[gi] = size

    # get the expected genome size
    expected_genome_sizes = collections.defaultdict(int)
    E.info("iterating over fastq file")
    for fastq in Fastq.iterate(IOTools.openFile(infiles[0])):
        gi = fastq.identifier.split("|")[1]
        expected_genome_sizes[gi] += 1
    E.info("iterating over fastq file: DONE")

    # get the proportion of each genome covered
    outf = open(outfile, "w")
    outf.write("gi\texpected_coverage\n")
    for gi, size in expected_genome_sizes.iteritems():
        if multiply:
            size = size * 2
        if gi not in genome_sizes:
            E.warn("could not find gi no. %s in dictionary" % gi)
            continue
        proportion_coverage = float(size) / float(genome_sizes[gi])
        if proportion_coverage > 1:
            proportion_coverage = 1
        outf.write("%s\t%f\n" % (gi, proportion_coverage))
    outf.close()
開發者ID:BioXiao,項目名稱:CGATPipelines,代碼行數:50,代碼來源:PipelineMetagenomeBenchmark.py

示例8: preprocessIdba

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def preprocessIdba(infile, outfile):
    '''
    preprocess pooled reads for IDBA
    '''
    # check for second read in the pair
    if infile.endswith(".fastq.gz"):
        E.info("converting fastq file to fasta file")
        outf = open(outfile, "w")
        for fastq in Fastq.iterate(IOTools.openFile(infile)):
            outf.write("%s\n%s\n" % (">" + fastq.identifier, fastq.seq))
        outf.close()
    elif infile.endswith(".1.gz"):
        read2 = P.snip(infile, ".1.gz") + ".2.gz"
        assert os.path.exists(read2), "file does not exist %s" % read2

        statement = '''python %(scriptsdir)s/fastqs2fasta.py
                   -a %(infile)s
                   -b %(read2)s
                   --log=%(infile)s.log
                   > %(outfile)s'''
        P.run()
開發者ID:Charlie-George,項目名稱:cgat,代碼行數:23,代碼來源:pipeline_metagenomeassembly.py

示例9: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--change-format", dest="change_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
                      help="guess quality score format and set quality scores to format [default=%default].")

    parser.add_option("--guess-format", dest="guess_format", type="choice",
                      choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
                      help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option("--sample", dest="sample", type="float",
                      help="sample a proportion of reads [default=%default].")

    parser.add_option("--pair", dest="pair", type="string",
                      help="if data is paired, filename with second pair. "
                      "Implemented for sampling [default=%default].")

    parser.add_option("--outfile-pair", dest="outfile_pair", type="string",
                      help="if data is paired, filename for second pair. "
                      "Implemented for sampling [default=%default].")

    parser.add_option("--uniq", dest="uniq", action="store_true",
                      help="remove duplicate reads (by name) [default=%default].")

    parser.add_option("--apply", dest="apply", type="string",
                      help="apply a filter to fastq file (taking only reads in filename) [default=%default].")

    parser.add_option("--trim3", dest="trim3", type="int",
                      help="trim # bases from 3' end [default=%default].")

    parser.add_option("--sort", dest="sort", action="store_true",
                      help="sort fastq by sequence id [default=%default].")

    parser.add_option("--seed", dest="seed", type="int",
                      help="seed for random number generator [default=%default].")

    parser.add_option("--renumber-ids", dest="renumber_ids", type="string",
                      help="rename reads in file by pattern [default=%default]")

    parser.set_defaults(
        change_format=None,
        guess_format=None,
        sample=None,
        trim3=None,
        pair=None,
        apply=None,
        uniq=False,
        outfile_pair=None,
        sort=None,
        seed=None,
        renumber_ids=None)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    c = E.Counter()

    if options.change_format:
        for record in Fastq.iterate_convert(options.stdin,
                                            format=options.change_format,
                                            guess=options.guess_format):
            c.input += 1
            options.stdout.write("%s\n" % record)
            c.output += 1

    elif options.sample:
        sample_threshold = min(1.0, options.sample)

        random.seed(options.seed)

        if options.pair:
            if not options.outfile_pair:
                raise ValueError(
                    "please specify output filename for second pair (--outfile-pair)")

            outfile1 = options.stdout
            outfile2 = IOTools.openFile(options.outfile_pair, "w")

            for record1, record2 in itertools.izip(Fastq.iterate(options.stdin), Fastq.iterate(IOTools.openFile(options.pair))):
                c.input += 1
                if random.random() <= sample_threshold:
                    c.output += 1
                    outfile1.write("%s\n" % record1)
                    outfile2.write("%s\n" % record2)

        for record in Fastq.iterate(options.stdin):
            c.input += 1
            if random.random() <= sample_threshold:
                c.output += 1
#.........這裏部分代碼省略.........
開發者ID:Charlie-George,項目名稱:cgat,代碼行數:103,代碼來源:fastq2fastq.py

示例10: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=(
                          "apply",
                          "change-format",
                          "renumber-reads",
                          "sample",
                          "sort",
                          "trim3",
                          "trim5",
                          "unique",
                          "grep"),
                      help="method to apply [%default]")

    parser.add_option(
        "--target-format", dest="target_format", type="choice",
        choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
        help="guess quality score format and set quality scores "
        "to format [default=%default].")

    parser.add_option(
        "--guess-format", dest="guess_format", type="choice",
        choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
        help="quality score format to assume if ambiguous [default=%default].")

    parser.add_option(
        "--sample-size", dest="sample_size", type="float",
        help="proportion of reads to sample. "
        "Provide a proportion of reads to sample, e.g. 0.1 for 10%, "
        "0.5 for 50%, etc [default=%default].")

    parser.add_option(
        "--pair-fastq-file", dest="pair", type="string",
        help="if data is paired, filename with second pair. "
        "Implemented for sampling [default=%default].")

    parser.add_option(
        "--map-tsv-file", dest="map_tsv_file", type="string",
        help="filename with tab-separated identifiers mapping for "
        "method apply [default=%default].")

    parser.add_option(
        "--num-bases", dest="nbases", type="int",
        help="number of bases to trim [default=%default].")

    parser.add_option(
        "--seed", dest="seed", type="int",
        help="seed for random number generator [default=%default].")

    parser.add_option(
        "--pattern-identifier", dest="renumber_pattern", type="string",
        help="rename reads in file by pattern [default=%default]")

    parser.add_option(
        "--grep-pattern", dest="grep_pattern", type="string",
        help="subset to reads matching pattern [default=%default]")

    parser.set_defaults(
        method=None,
        change_format=None,
        guess_format=None,
        sample_size=0.1,
        nbases=0,
        pair=None,
        apply=None,
        seed=None,
        renumber_pattern="read_%010i",
        grep_pattern=".*")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    c = E.Counter()

    if options.method == "change-format":
        for record in Fastq.iterate_convert(options.stdin,
                                            format=options.target_format,
                                            guess=options.guess_format):
            c.input += 1
            options.stdout.write("%s\n" % record)
            c.output += 1

    elif options.method == "grep":
        for record in Fastq.iterate(options.stdin):
            if re.match(options.grep_pattern, record.seq):
                options.stdout.write("%s\n" % record)

    elif options.method == "sample":
#.........這裏部分代碼省略.........
開發者ID:Q-KIM,項目名稱:cgat,代碼行數:103,代碼來源:fastq2fastq.py

示例11: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=('join', ),
                      help="method to apply [default=%default].")

    parser.set_defaults(
        method="join",
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if len(args) != 2:
        raise ValueError(
            "please supply at least two fastq files on the commandline")

    fn1, fn2 = args
    c = E.Counter()
    outfile = options.stdout

    if options.method == "join":
        # merge based on diagonals in dotplot
        iter1 = Fastq.iterate(IOTools.openFile(fn1))
        iter2 = Fastq.iterate(IOTools.openFile(fn2))
        tuple_size = 2
        for left, right in zip(iter1, iter2):
            c.input += 1

            # build dictionary of tuples
            s1, q1 = left.seq, left.quals
            d = collections.defaultdict(list)
            for x in range(len(s1) - tuple_size):
                d[s1[x:x + tuple_size]].append(x)

            s2, q2 = right.seq, right.quals
            # reverse complement
            s2 = Genomics.complement(s2)
            q2 = q2[::-1]

            # compute list of offsets/diagonals
            offsets = collections.defaultdict(int)
            for x in range(len(s2) - tuple_size):
                c = s2[x:x + tuple_size]
                for y in d[c]:
                    offsets[x - y] += 1

            # find maximum diagonal
            sorted = sorted([(y, x) for x, y in offsets.items()])
            max_count, max_offset = sorted[-1]

            E.debug('%s: maximum offset at %i' % (left.identifier,
                                                  max_offset))

            # simple merge sequence
            take = len(s2) - max_offset
            merged_seq = s1 + s2[take:]

            # simple merge quality scores
            merged_quals = q1 + q2[take:]

            new_entry = copy.copy(left)
            new_entry.seq = merged_seq
            new_entry.quals = merged_quals
            outfile.write(new_entry)
            c.output += 1

    # write footer and output benchmark information.
    E.info("%s" % str(c))
    E.Stop()
開發者ID:SCV,項目名稱:cgat,代碼行數:83,代碼來源:fastqs2fastq.py

示例12: main

# 需要導入模塊: from CGAT import Fastq [as 別名]
# 或者: from CGAT.Fastq import iterate [as 別名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("--split-barcode", dest="split", action="store_true",
                      help="barcode is split across read pair")
    parser.add_option("-p", "--bc-pattern", dest="pattern", type="string",
                      help="Barcode pattern. Ns are random bases X's fixed")
    parser.add_option("--bc-pattern2", dest="pattern2", type="string",
                      help="Barcode pattern. Ns are random bases X's fixed")
    parser.add_option("--read2-in", dest="read2_in", type="string",
                      help="file name for read pairs")
    parser.add_option("--3prime", dest="prime3", action="store_true",
                      help="barcode is on 3' end of read")
    parser.add_option("--read2-out", dest="read2_out", type="string",
                      help="file to output processed paired read to")
    parser.add_option("--supress-stats", dest="stats", action="store_false",
                      help="Suppress the writing of stats to the log")

    parser.set_defaults(split=False,
                        pattern=None,
                        pattern2=None,
                        read2_in=None,
                        read2_out=None,
                        prime3=False,
                        stats=True)

    # add common options (-h/--help, ...) and parse command line

    (options, args) = E.Start(parser, argv=argv)

    # check options
    if not options.pattern:
        raise ValueError("must specify a pattern using ``--bc-pattern``")

    if options.split:
        if not options.read2_in:
            raise ValueError("must specify a paired fastq ``--read2-in``")

        if not options.pattern2:
            options.pattern2 = options.pattern

    if options.read2_in:
        if not options.read2_out:
            raise ValueError("must specify an output for the paired end "
                             "``--read2-out``")


    # Initialise the processor
    processor = Extractor(options.pattern, options.pattern2, options.prime3)
    read1s = Fastq.iterate(options.stdin)

    if options.read2_in is None:

        for read in read1s:
            options.stdout.write(str(processor(read)) + "\n")

    else:

        read2s = Fastq.iterate(IOTools.openFile(options.read2_in))
        read2_out = IOTools.openFile(options.read2_out, "w")

        for read1, read2 in zip(read1s, read2s):
            new_1, new_2 = processor(read1, read2)
            options.stdout.write(str(new_1) + "\n")
            read2_out.write(str(new_2) + "\n")

    # write footer and output benchmark information.

    if options.stats:
     
        options.stdlog.write("\t".join(["Barcode", "UMI", "Sample", "Count"]) + "\n")
        for id in processor.bc_count:
            options.stdlog.write("\t".join(id+(str(processor.bc_count[id]),)) + "\n")
                             
    E.Stop()
開發者ID:jdblischak,項目名稱:UMI-tools,代碼行數:87,代碼來源:extract_umi.py


注:本文中的CGAT.Fastq.iterate方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。