本文整理汇总了Python中CGAT.Fastq.iterate_convert方法的典型用法代码示例。如果您正苦于以下问题:Python Fastq.iterate_convert方法的具体用法?Python Fastq.iterate_convert怎么用?Python Fastq.iterate_convert使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Fastq
的用法示例。
在下文中一共展示了Fastq.iterate_convert方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
usage=globals()["__doc__"])
parser.add_option("-f", "--change-format", dest="change_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer'),
help="guess quality score format and set quality scores to format [default=%default].")
parser.add_option("--guess-format", dest="guess_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer'),
help="quality score format to assume if ambiguous [default=%default].")
parser.add_option("--pattern", dest="pattern", type="string",
help="filename prefix [default=%default].")
parser.set_defaults(
change_format=None,
guess_format=None,
pattern="%s.gz"
)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
c = E.Counter()
outfile_seq = IOTools.openFile(options.pattern % "csfasta", "w")
outfile_qual = IOTools.openFile(options.pattern % "qual", "w")
if options.change_format:
iter = Fastq.iterate_convert(options.stdin,
format=options.change_format,
guess=options.guess_format)
else:
iter = Fastq.iterate(options.stdin)
for record in iter:
c.input += 1
outfile_seq.write(">%s\n%s\n" % (record.identifier, record.seq))
outfile_qual.write(">%s\n%s\n" % (record.identifier, record.quals))
c.output += 1
outfile_seq.close()
outfile_qual.close()
# write footer and output benchmark information.
E.info("%s" % str(c))
E.Stop()
示例2: main
# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
usage=globals()["__doc__"])
parser.add_option("-f", "--change-format", dest="change_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
help="guess quality score format and set quality scores to format [default=%default].")
parser.add_option("--guess-format", dest="guess_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
help="quality score format to assume if ambiguous [default=%default].")
parser.add_option("--sample", dest="sample", type="float",
help="sample a proportion of reads [default=%default].")
parser.add_option("--pair", dest="pair", type="string",
help="if data is paired, filename with second pair. "
"Implemented for sampling [default=%default].")
parser.add_option("--outfile-pair", dest="outfile_pair", type="string",
help="if data is paired, filename for second pair. "
"Implemented for sampling [default=%default].")
parser.add_option("--uniq", dest="uniq", action="store_true",
help="remove duplicate reads (by name) [default=%default].")
parser.add_option("--apply", dest="apply", type="string",
help="apply a filter to fastq file (taking only reads in filename) [default=%default].")
parser.add_option("--trim3", dest="trim3", type="int",
help="trim # bases from 3' end [default=%default].")
parser.add_option("--sort", dest="sort", action="store_true",
help="sort fastq by sequence id [default=%default].")
parser.add_option("--seed", dest="seed", type="int",
help="seed for random number generator [default=%default].")
parser.add_option("--renumber-ids", dest="renumber_ids", type="string",
help="rename reads in file by pattern [default=%default]")
parser.set_defaults(
change_format=None,
guess_format=None,
sample=None,
trim3=None,
pair=None,
apply=None,
uniq=False,
outfile_pair=None,
sort=None,
seed=None,
renumber_ids=None)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
c = E.Counter()
if options.change_format:
for record in Fastq.iterate_convert(options.stdin,
format=options.change_format,
guess=options.guess_format):
c.input += 1
options.stdout.write("%s\n" % record)
c.output += 1
elif options.sample:
sample_threshold = min(1.0, options.sample)
random.seed(options.seed)
if options.pair:
if not options.outfile_pair:
raise ValueError(
"please specify output filename for second pair (--outfile-pair)")
outfile1 = options.stdout
outfile2 = IOTools.openFile(options.outfile_pair, "w")
for record1, record2 in itertools.izip(Fastq.iterate(options.stdin), Fastq.iterate(IOTools.openFile(options.pair))):
c.input += 1
if random.random() <= sample_threshold:
c.output += 1
outfile1.write("%s\n" % record1)
outfile2.write("%s\n" % record2)
for record in Fastq.iterate(options.stdin):
c.input += 1
if random.random() <= sample_threshold:
c.output += 1
#.........这里部分代码省略.........
示例3: main
# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id$",
usage=globals()["__doc__"])
parser.add_option("-m", "--method", dest="method", type="choice",
choices=(
"apply",
"change-format",
"renumber-reads",
"sample",
"sort",
"trim3",
"trim5",
"unique",
"grep"),
help="method to apply [%default]")
parser.add_option(
"--target-format", dest="target_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
help="guess quality score format and set quality scores "
"to format [default=%default].")
parser.add_option(
"--guess-format", dest="guess_format", type="choice",
choices=('sanger', 'solexa', 'phred64', 'integer', 'illumina-1.8'),
help="quality score format to assume if ambiguous [default=%default].")
parser.add_option(
"--sample-size", dest="sample_size", type="float",
help="proportion of reads to sample. "
"Provide a proportion of reads to sample, e.g. 0.1 for 10%, "
"0.5 for 50%, etc [default=%default].")
parser.add_option(
"--pair-fastq-file", dest="pair", type="string",
help="if data is paired, filename with second pair. "
"Implemented for sampling [default=%default].")
parser.add_option(
"--map-tsv-file", dest="map_tsv_file", type="string",
help="filename with tab-separated identifiers mapping for "
"method apply [default=%default].")
parser.add_option(
"--num-bases", dest="nbases", type="int",
help="number of bases to trim [default=%default].")
parser.add_option(
"--seed", dest="seed", type="int",
help="seed for random number generator [default=%default].")
parser.add_option(
"--pattern-identifier", dest="renumber_pattern", type="string",
help="rename reads in file by pattern [default=%default]")
parser.add_option(
"--grep-pattern", dest="grep_pattern", type="string",
help="subset to reads matching pattern [default=%default]")
parser.set_defaults(
method=None,
change_format=None,
guess_format=None,
sample_size=0.1,
nbases=0,
pair=None,
apply=None,
seed=None,
renumber_pattern="read_%010i",
grep_pattern=".*")
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv, add_output_options=True)
c = E.Counter()
if options.method == "change-format":
for record in Fastq.iterate_convert(options.stdin,
format=options.target_format,
guess=options.guess_format):
c.input += 1
options.stdout.write("%s\n" % record)
c.output += 1
elif options.method == "grep":
for record in Fastq.iterate(options.stdin):
if re.match(options.grep_pattern, record.seq):
options.stdout.write("%s\n" % record)
elif options.method == "sample":
#.........这里部分代码省略.........
示例4: main
# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
usage=globals()["__doc__"])
parser.add_option("--guess-format", dest="guess_format", type="choice",
choices=('sanger', 'solexa', 'phred64',
'illumina-1.8', 'integer'),
help="The default behaviour of the script is to guess \
the quality format of the input fastq file. The user \
can specify the quality format of the input file using \
the --format option. The script will use this format if \
sequences qualities are ambiguous.[default=%default].")
parser.add_option("-f", "--change-format", dest="change_format",
type="choice", choices=('sanger', 'solexa', 'phred64',
'illumina-1.8', 'integer'),
help="The script guesses the quality format of the input \
file and converts quality scores to the destination \
format unless --format is specified [default=%default].")
parser.set_defaults(
change_format=None,
guess_format=None,
min_quality=10)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
if options.change_format:
iterator = Fastq.iterate_convert(options.stdin,
format=options.change_format,
guess=options.guess_format)
else:
iterator = Fastq.iterate_guess(options.stdin,
guess=options.guess_format)
min_quality = options.min_quality
number_of_reads = 0
number_of_bases = 0
read_lengths = []
read_qualities = []
bases_below_min = 0
for record in iterator:
number_of_reads += 1
quals = record.toPhred()
length_read = len(quals)
number_of_bases += length_read
bases_below_min += len([x for x in quals if x < min_quality])
read_lengths.append(length_read)
read_qualities.append(np.mean(quals))
mean_length = round(np.mean(read_lengths), 2)
median_length = round(np.median(read_lengths), 2)
mean_quality = round(np.mean(read_qualities), 2)
median_quality = round(np.median(read_qualities), 2)
options.stdout.write(
"reads\tbases\tmean_length\tmedian_length\tmean_quality\tmedian_quality\tnfailed\n")
options.stdout.write(
"%i\t%i\t%s\t%s\t%s\t%s\t%i\n" % (number_of_reads, number_of_bases,
str(mean_length),
str(median_length),
str(mean_quality),
str(median_quality),
bases_below_min))
E.Stop()
示例5: main
# 需要导入模块: from CGAT import Fastq [as 别名]
# 或者: from CGAT.Fastq import iterate_convert [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
usage=globals()["__doc__"])
parser.add_option("--guess-format", dest="guess_format", type="choice",
choices=(
'sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'),
help="The default behaviour of the script is to guess the quality format of the input fastq file. The user can specify \
the quality format of the input file using the --format option. The script will use this format if the \
sequence qualities are ambiguous.[default=%default]." )
parser.add_option("-f", "--change-format", dest="change_format", type="choice",
choices=(
'sanger', 'solexa', 'phred64', 'illumina-1.8', 'integer'),
help="The script will guess the quality format of the input file and convert \
quality scores to the destination format unless --format is specified [default=%default]." )
parser.set_defaults(
change_format=None,
guess_format=None,
min_quality=10,
)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
c = E.Counter()
if options.change_format:
iterator = Fastq.iterate_convert(options.stdin,
format=options.change_format,
guess=options.guess_format)
else:
iterator = Fastq.iterate_guess(options.stdin,
guess=options.guess_format)
options.stdout.write("read\tnfailed\tnN\t%s\n" %
("\t".join(Stats.Summary().getHeaders())))
min_quality = options.min_quality
for record in iterator:
c.input += 1
quals = record.toPhred()
nfailed = len([x for x in quals if x < min_quality])
nns = record.seq.count("N") + record.seq.count(".")
options.stdout.write("%s\t%i\t%i\t%s\n" % (record.identifier,
nfailed,
nns,
str(Stats.Summary(quals))
))
c.output += 1
# write footer and output benchmark information.
E.info("%s" % str(c))
E.Stop()