本文整理汇总了Python中qiime.util.qiime_open函数的典型用法代码示例。如果您正苦于以下问题:Python qiime_open函数的具体用法?Python qiime_open怎么用?Python qiime_open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了qiime_open函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
if opts.attempt_read_reorientation:
if not opts.mapping_fp:
option_parser.error("To use --attempt_read_reorientation, one must "
"supply a mapping file that contains both LinkerPrimerSequence "
"and ReversePrimer columns.")
if opts.input_type == "barcode_paired_end":
if not opts.fastq2:
option_parser.error("To use input_type of barcode_paired_end, "
"a second fastq file must be specified with --fastq2")
if not opts.fastq2:
disable_header_match = True
else:
disable_header_match = opts.disable_header_match
fastq1 = qiime_open(opts.fastq1)
if opts.fastq2:
fastq2 = qiime_open(opts.fastq2)
else:
fastq2 = None
create_dir(opts.output_dir)
if opts.mapping_fp:
map_fp = qiime_open(opts.mapping_fp)
else:
map_fp = None
extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type,
opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2,
opts.char_delineator, opts.switch_bc_order, map_fp,
opts.attempt_read_reorientation, disable_header_match)
示例2: write_synced_barcodes_fastq
def write_synced_barcodes_fastq(joined_fp, index_fp):
"""Writes new index file based on surviving assembled paired-ends.
-joined_fp : file path to paired-end assembled fastq file
-index_fp : file path to index / barcode reads fastq file
This function iterates through the joined reads file and index file.
Only those index-reads within the file at index_fp, that have headers
matching those within the joined-pairs at joined_fp, are written
to file.
WARNING: Assumes reads are in the same order in both files,
except for cases in which the corresponding
read in the joined_fp file is missing (i.e. pairs
failed to assemble).
"""
# open files (handles normal / gzipped data)
jh = qiime_open(joined_fp)
ih = qiime_open(index_fp)
# base new index file name on joined paired-end file name:
j_path,ext = os.path.splitext(joined_fp)
filtered_bc_outfile_path = j_path + '_barcodes.fastq'
fbc_fh = open(filtered_bc_outfile_path, 'w')
# Set up iterators
index_fastq_iter = MinimalFastqParser(ih, strict=False)
joined_fastq_iter = MinimalFastqParser(jh, strict=False)
# Write barcodes / index reads that we observed within
# the joined paired-ends. Warn if index and joined data
# are not in order.
for joined_label,joined_seq,joined_qual in joined_fastq_iter:
index_label,index_seq,index_qual = index_fastq_iter.next()
while joined_label != index_label:
try:
index_label,index_seq,index_qual = index_fastq_iter.next()
except StopIteration:
raise StopIteration, "\n\nReached end of index-reads file"+\
" before iterating through joined paired-end-reads file!"+\
" Except for missing paired-end reads that did not survive"+\
" assembly, your index and paired-end reads files must be in"+\
" the same order! Also, check that the index-reads and"+\
" paired-end reads have identical headers. The last joined"+\
" paired-end ID processed was:\n\'%s\'\n" %(joined_label)
else:
fastq_string = '@%s\n%s\n+\n%s\n'\
%(index_label,index_seq,index_qual)
fbc_fh.write(fastq_string)
ih.close()
jh.close()
fbc_fh.close()
return filtered_bc_outfile_path
示例3: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
sample_novelty_data = compute_sample_novelty(
[qiime_open(otu_table_fp) for otu_table_fp in opts.otu_table_fps],
qiime_open(opts.rep_set_fp), opts.verbose)
with open(opts.output_fp, 'w') as out_f:
header = ['SampleID', 'Number of novel OTUs',
'Percent novel sequences']
table_writer = writer(out_f, delimiter='\t', lineterminator='\n')
table_writer.writerow(header)
table_writer.writerows(sample_novelty_data)
示例4: extract_reads_from_interleaved
def extract_reads_from_interleaved(
input_fp, forward_id, reverse_id, output_dir):
"""Parses a single fastq file and creates two new files: forward and reverse, based on
the two values (comma separated) in read_direction_identifiers
input_fp: file path to input
read_direction_identifiers: comma separated values to identify forward and reverse reads
output_folder: file path to the output folder
"""
forward_fp = join(output_dir, "forward_reads.fastq")
reverse_fp = join(output_dir, "reverse_reads.fastq")
ffp = open(forward_fp, 'w')
rfp = open(reverse_fp, 'w')
for label, seq, qual in parse_fastq(qiime_open(input_fp), strict=False):
fastq_string = format_fastq_record(label, seq, qual)
if forward_id in label:
ffp.write(fastq_string)
elif reverse_id in label and forward_id not in label:
rfp.write(fastq_string)
else:
ffp.close()
rfp.close()
raise ValueError("One of the input sequences doesn't have either identifier "
"or it has both.\nLabel: %s\nForward: %s\n Reverse: %s" %
(label, forward_id, reverse_id))
ffp.close()
rfp.close()
示例5: get_biom_tables
def get_biom_tables(otu_table_dir):
"""Pass in a directory containing biom tables, either .biom or .biom.gz
Returns a list of biom table objects"""
otu_tables_fp = glob("%s/*biom*" % otu_table_dir) # look for both .biom and .biom.gz
biom_table_objects = []
for otu_table in otu_tables_fp:
biom_table = parse_biom_table(qiime_open(otu_table)) # qiime_open will open .biom and .biom.gz files
biom_table_objects.append(biom_table)
return biom_table_objects
示例6: make_flow_txt
def make_flow_txt(sff_fp, output_fp, use_sfftools=False):
"""Makes flowgram file from sff file."""
if use_sfftools:
_fail_on_gzipped_sff(sff_fp)
check_sffinfo()
_check_call(['sffinfo', sff_fp], stdout=open(output_fp, 'w'))
else:
try:
format_binary_sff(qiime_open(sff_fp, 'rb'), open(output_fp, 'w'))
except:
raise IOError("Could not parse SFF %s" % sff_fp)
示例7: convert_Ti_to_FLX
def convert_Ti_to_FLX(sff_fp, output_fp, use_sfftools=False):
"""Converts Titanium SFF to FLX length reads."""
if use_sfftools:
_fail_on_gzipped_sff(sff_fp)
check_sfffile()
_check_call(
['sfffile', '-flx', '-o', output_fp, sff_fp],
stdout=open(os.devnull, 'w'))
else:
header, reads = adjust_sff_cycles(parse_binary_sff(qiime_open(sff_fp, 'rb'),
True), 100)
write_binary_sff(open(output_fp, 'w'), header, reads)
示例8: make_qual
def make_qual(sff_fp, output_fp, use_sfftools=False,no_trim=False):
"""Makes qual file from sff file."""
if use_sfftools:
_fail_on_gzipped_sff(sff_fp)
check_sffinfo()
if no_trim:
_check_call(['sffinfo','-notrim','-q', sff_fp],
stdout=open(output_fp, 'w'))
else:
_check_call(['sffinfo', '-q', sff_fp], stdout=open(output_fp, 'w'))
else:
try:
format_binary_sff_as_fna(qiime_open(sff_fp, 'rb'), open(output_fp, 'w'), qual=True)
except:
raise IOError("Could not parse SFF %s" % sff_fp)
示例9: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
input_fps = glob(opts.input_glob)
d = {}
for input_fp in input_fps:
t = parse_biom_table(qiime_open(input_fp))
for obs_values, obs_id, _ in t.iterObservations():
if obs_id not in d:
d[obs_id] = set()
for i,c in enumerate(obs_values):
if c > 0:
d[obs_id].add(t.SampleIds[i])
for k,v in d.items():
print '%s\t%s' % (k,'\t'.join(v))
示例10: test_adjust_sff_cycles
def test_adjust_sff_cycles(self):
sff_data = parse_binary_sff(open(self.sff_fp))
sff_gz_data = parse_binary_sff(qiime_open(self.sff_gz_fp))
header, reads = adjust_sff_cycles(sff_data, 2)
header_gz, reads_gz = adjust_sff_cycles(sff_gz_data, 2)
expected_header = {
'header_length': 48,
'version': 1,
'index_length': 0,
'magic_number': 779314790,
'number_of_flows_per_read': 8,
'flowgram_format_code': 1,
'flow_chars': 'TACGTACG',
'index_offset': 0,
'key_sequence': 'TCAG',
'number_of_reads': 1,
'key_length': 4,
}
self.assertEqual(header, expected_header)
self.assertEqual(header_gz, expected_header)
expected_read = {
'name_length': 14,
'Name': 'FA6P1OK01CGMHQ',
'flowgram_values':
[1.04, 0.0, 1.01, 0.0, 0.0, 0.95999999999999996, 0.0, 1.02],
'clip_adapter_left': 0,
'read_header_length': 32,
'Bases': 'TCAG',
'number_of_bases': 4,
'flow_index_per_base': (1, 2, 3, 2),
'clip_qual_left': 4,
'clip_adapter_right': 0,
'clip_qual_right': 4,
'quality_scores': (32, 32, 32, 32),
}
reads = list(reads)
reads_gz = list(reads_gz)
self.assertEqual(len(reads), 1)
self.assertEqual(len(reads_gz), 1)
self.assertEqual(reads[0], expected_read)
self.assertEqual(reads_gz[0], expected_read)
示例11: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
otu_table_fp = opts.otu_table_fp
otu_table = parse_biom_table(qiime_open(otu_table_fp))
min_counts, max_counts, median_counts, mean_counts, counts_per_sample = compute_seqs_per_library_stats(
otu_table, opts.num_otus
)
num_otus = len(otu_table.ObservationIds)
counts_per_sample_values = counts_per_sample.values()
med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)
num_samples = len(counts_per_sample)
print "Num samples: %s" % str(num_samples)
print "Num otus: %s" % str(num_otus)
if not opts.num_otus:
num_observations = sum(counts_per_sample_values)
print "Num observations (sequences): %s" % str(num_observations)
# port denisty functionality to a tested function. the following is broken (should be
# count of non-zero cells rather than number of observations in the numerator)
# print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus))
print
if opts.num_otus:
print "OTUs/sample summary:"
else:
print "Seqs/sample summary:"
print " Min: %s" % str(min_counts)
print " Max: %s" % str(max_counts)
print " Median: %s" % str(median_counts)
print " Mean: %s" % str(mean_counts)
print " Std. dev.: %s" % (str(std(counts_per_sample_values)))
print " Median Absolute Deviation: %s" % str(med_abs_dev)
print " Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s" % str(even_sampling_depth)
print ""
if opts.num_otus:
print "OTUs/sample detail:"
else:
print "Seqs/sample detail:"
sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()]
sorted_counts_per_sample.sort()
total_count = 0
for v, k in sorted_counts_per_sample:
total_count += v
print " %s: %s" % (k, str(v))
if opts.mapping_fp:
if not opts.output_mapping_fp:
raise RuntimeError("input mapping file supplied, but no path to" + " output file")
f = open(opts.mapping_fp, "U")
mapping_lines, headers, comments = parse_mapping_file(f)
f.close()
if len(headers) == 1:
endoffset = 0 # if we only have the sample id, this data -> last col
else:
endoffset = 1 # usually make this data the penultimate column.
headers.insert(len(headers) - endoffset, "NumIndividuals")
for map_line in mapping_lines:
sample_id = map_line
try:
depth = str(counts_per_sample[map_line[0]])
except KeyError:
depth = "na"
map_line.insert(len(map_line) - endoffset, depth)
new_map_str = format_mapping_file(headers, mapping_lines, comments)
f = open(opts.output_mapping_fp, "w")
f.write(new_map_str)
f.close()
示例12: main
def main():
option_parser, opts,args = parse_command_line_parameters(**script_info)
otu_table_fp = opts.otu_table_fp
otu_table = parse_biom_table(qiime_open(otu_table_fp))
min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
compute_seqs_per_library_stats(otu_table, opts.num_otus)
num_otus = len(otu_table.ObservationIds)
counts_per_sample_values = counts_per_sample.values()
med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)
try:
sample_md_keys = otu_table.SampleMetadata[0].keys()
except TypeError:
sample_md_keys = ["None provided"]
try:
observation_md_keys = otu_table.ObservationMetadata[0].keys()
except TypeError:
observation_md_keys = ["None provided"]
num_samples = len(counts_per_sample)
print 'Num samples: %s' % str(num_samples)
print 'Num otus: %s' % str(num_otus)
if not opts.num_otus:
num_observations = sum(counts_per_sample_values)
print 'Num observations (sequences): %s' % str(num_observations)
print 'Table density (fraction of non-zero values): %1.4f' % \
otu_table.getTableDensity()
print
if opts.num_otus:
print 'OTUs/sample summary:'
else:
print 'Seqs/sample summary:'
print ' Min: %s' % str(min_counts)
print ' Max: %s' % str(max_counts)
print ' Median: %s' % str(median_counts)
print ' Mean: %s' % str(mean_counts)
print ' Std. dev.: %s' % (str(std(counts_per_sample_values)))
print ' Median Absolute Deviation: %s' % str(med_abs_dev)
print ' Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s' %\
str(even_sampling_depth)
print ' Sample Metadata Categories: %s' % '; '.join(sample_md_keys)
print ' Observation Metadata Categories: %s' % '; '.join(observation_md_keys)
print ''
if opts.num_otus:
print 'OTUs/sample detail:'
else:
print 'Seqs/sample detail:'
sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()]
sorted_counts_per_sample.sort()
total_count = 0
for v,k in sorted_counts_per_sample:
total_count += v
print ' %s: %s' % (k,str(v))
if opts.mapping_fp:
if not opts.output_mapping_fp:
raise RuntimeError('input mapping file supplied, but no path to'+\
' output file')
f = open(opts.mapping_fp,'U')
mapping_lines, headers, comments = parse_mapping_file(f)
f.close()
if len(headers)==1:
endoffset = 0 # if we only have the sample id, this data -> last col
else:
endoffset = 1 # usually make this data the penultimate column.
headers.insert(len(headers)-endoffset,'SequenceCount')
for map_line in mapping_lines:
sample_id = map_line
try:
depth = str(counts_per_sample[map_line[0]])
except KeyError:
depth = 'na'
map_line.insert(len(map_line)-endoffset,depth)
new_map_str = format_mapping_file(headers, mapping_lines, comments)
f = open(opts.output_mapping_fp, 'w')
f.write(new_map_str)
f.close()