本文整理汇总了Python中qiime.parse.fields_to_dict函数的典型用法代码示例。如果您正苦于以下问题:Python fields_to_dict函数的具体用法?Python fields_to_dict怎么用?Python fields_to_dict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fields_to_dict函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: map_otu_map_files
def map_otu_map_files(otu_files, failures_file=None):
# passing delim=None splits on any whitespace, so can handle mixed tabs
# and spaces
result = fields_to_dict(otu_files[0], delim=None)
for otu_file in otu_files[1:]:
current_otu_map = fields_to_dict(otu_file, delim=None)
result = expand_otu_map_seq_ids(current_otu_map, result)
if failures_file:
result = expand_failures(failures_file, result)
return result
示例2: rewrite_otu_table_with_taxonomy
def rewrite_otu_table_with_taxonomy(taxon_lines, otu_lines, id_map_lines=None,
outfile=stdout):
"""Rewrites OTU table including taxonomy."""
taxonomy = fields_to_dict(taxon_lines)
#sometimes have extra fields after OTU id
new_taxonomy = {}
for k, v in taxonomy.items():
new_taxonomy[k.split()[0]] = v
taxonomy = new_taxonomy
taxonomy = fix_taxonomy_delimiters(taxonomy)
if id_map_lines:
id_map = dict([map(strip, line.split('\t')) for line in
id_map_lines])
new_taxonomy = dict([(id_map[k], v) for k, v in taxonomy.items()
if k in id_map])
assert new_taxonomy != taxonomy
taxonomy = new_taxonomy
for line in otu_lines:
if not line.endswith('\n'):
line += '\n'
if line.startswith('#OTU ID'):
outfile.write(line[:-1]+'\tConsensus Lineage\n')
elif line.startswith('#'):
outfile.write(line)
else:
id_, rest = line.split('\t', 1)
t = taxonomy.get(id_, 'None')
outfile.write(line[:-1]+'\t'+t+'\n')
示例3: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
sample_id_map_fp = opts.sample_id_map_fp
if sample_id_map_fp:
sample_id_map = dict([(k,v[0]) \
for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
else:
sample_id_map = None
input_dm_fps = opts.input_dms.split(',')
output_f = open(opts.output_fp,'w')
output_f.write(comment)
output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n')
num_iterations = opts.num_iterations
for i,fp1 in enumerate(input_dm_fps):
for fp2 in input_dm_fps[i+1:]:
(dm1_labels, dm1), (dm2_labels, dm2) =\
make_compatible_distance_matrices(parse_distmat(open(fp1,'U')),
parse_distmat(open(fp2,'U')),
lookup=sample_id_map)
if len(dm1_labels) < 2:
output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels)))
continue
p = mantel(dm1,dm2,n=num_iterations)
p_str = format_p_value_for_num_iters(p,num_iterations)
output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str))
output_f.close()
示例4: test_parallel_rdp_taxonomy_assigner
def test_parallel_rdp_taxonomy_assigner(self):
""" parallel_rdp_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
'rdp_max_memory':1500,
'rdp_classifier_fp':getenv('RDP_JAR_PATH'),
'confidence':0.80,
'reference_seqs_fp':self.reference_seqs_file.name
}
app = ParallelRdpTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='RDPTEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 2)
self.assertEqual(len(results['X67228 some description']), 2)
self.assertEqual(len(results['EF503697']), 2)
示例5: _parse_taxonomic_information
def _parse_taxonomic_information(tax_map_lines, taxonomic_levels=8):
"""Parses a taxonomy mapping file to return mapping of seq ID to taxonomy.
Returns a dictionary with sequence ID as the key and a list containing the
taxonomy at each level. Empty taxonomic levels (i.e. ';;' or levels
containing only whitespace) are ignored.
Arguments:
tax_map_lines - list of lines from the taxonomy mapping file (the
result of calling readlines() on the open file handle)
taxonomic_levels - the number of taxonomic levels in the taxonomy
strings found in the taxonomy mapping file. All taxonomy strings
must have this number of levels (excluding empty taxonomic levels)
"""
tax_info = {}
if tax_map_lines[0] != \
"ID Number\tGenBank Number\tNew Taxon String\tSource\n":
raise ValueError("The taxonomy map file appears to be invalid "
"because it is either missing the header or has a "
"corrupt header.")
for seq_id, seq_info in fields_to_dict(tax_map_lines[1:]).items():
if len(seq_info) != 3:
raise ValueError("The taxonomy map file appears to be invalid "
"because it does not have exactly 4 columns.")
# Split at each level and remove any empty levels or levels that
# contain only whitespace.
taxonomy = [level for level in seq_info[1].split(';') \
if level.strip() != '']
if len(taxonomy) != taxonomic_levels:
raise ValueError("Encountered invalid taxonomy '%s'. Valid "
"taxonomy strings must have %d levels separated by "
"semicolons." % (seq_info[1], taxonomic_levels))
tax_info[seq_id] = taxonomy
return tax_info
示例6: get_seqs_to_keep_lookup_from_otu_map
def get_seqs_to_keep_lookup_from_otu_map(seqs_to_keep_f):
"""Generate a lookup dictionary from an OTU map"""
otu_map = fields_to_dict(seqs_to_keep_f)
seqs_to_keep = []
for seq_ids in otu_map.values():
seqs_to_keep += seq_ids
return {}.fromkeys(seqs_to_keep)
示例7: test_write_otu_map_prefix
def test_write_otu_map_prefix(self):
"""write_otu_map functions as expected w otu prefix """
write_otu_map(self.otu_map1, self.tmp_fp1, "my.otu.")
actual = fields_to_dict(open(self.tmp_fp1))
self.files_to_remove.append(self.tmp_fp1)
exp = {"my.otu.0": ["seq1", "seq2", "seq5"], "my.otu.1": ["seq3", "seq4"], "my.otu.2": ["seq6", "seq7", "seq8"]}
self.assertEqual(actual, exp)
示例8: _generate_taxonomic_agreement_summary
def _generate_taxonomic_agreement_summary(otu_map_lines, tax_map_lines,
taxonomic_levels=8):
"""Computes a summary of taxonomic agreement between ref and its seqs.
Returns a dictionary with OTU ID as the key. The value is a four-element
list. The first element is the size of the OTU (i.e. the number of seqs in
the OTU, including the reference). The second element is a list of sequence
identifiers for each sequence in the OTU. The reference sequence ID will
always be listed first, followed by the sequence IDs of the other members
of the OTU as they appear in the OTU map. The third element is a list
containing percent agreement at each taxonomic level (a list of floats).
The fourth element is a list containing all taxonomic values that were
encountered at each level. The reference taxonomic value will always be
listed first. The third and fourth elements of the top-level list will
always be the same length (taxonomic_levels) because they each contain
information for each taxonomic level.
Arguments:
otu_map_lines - list of lines in the OTU map (the result of calling
readlines() on the open file handle)
tax_map_lines - list of lines from the taxonomy mapping file (the
result of calling readlines() on the open file handle)
taxonomic_levels - the number of taxonomic levels in the taxonomy
strings found in the taxonomy mapping file. All taxonomy strings
must have this number of levels to prevent inconsistent results in
the summary
"""
tax_map = _parse_taxonomic_information(tax_map_lines, taxonomic_levels)
otu_map = fields_to_dict(otu_map_lines)
taxonomic_agreement = {}
for otu_id, seq_ids in otu_map.items():
otu_size = len(seq_ids)
taxonomic_agreement[otu_id] = [otu_size, seq_ids, [], []]
# The reference sequence is always the first sequence listed in the OTU
# map.
ref_seq_id = seq_ids[0]
ref_seq_tax = tax_map[ref_seq_id]
# Calculate percent agreement for each taxonomic level. If the OTU only
# contains a reference sequence, the percent agreement will be 100%.
# Also keep track of all unique taxonomic values that are encountered
# for each level (with the reference's taxonomic value listed first).
for level_idx, ref_level in enumerate(ref_seq_tax):
agreement_count = 0
encountered_levels = []
for seq_id in seq_ids:
seq_level = tax_map[seq_id][level_idx]
if ref_level == seq_level:
agreement_count += 1
if seq_level not in encountered_levels:
encountered_levels.append(seq_level)
taxonomic_agreement[otu_id][2].append(
(agreement_count / otu_size) * 100)
taxonomic_agreement[otu_id][3].append(encountered_levels)
return taxonomic_agreement
示例9: test_write_otu_map_prefix
def test_write_otu_map_prefix(self):
"""write_otu_map functions as expected w otu prefix """
write_otu_map(self.otu_map1,self.tmp_fp1,'my.otu.')
actual = fields_to_dict(open(self.tmp_fp1))
self.files_to_remove.append(self.tmp_fp1)
exp = {'my.otu.0':['seq1','seq2','seq5'],
'my.otu.1':['seq3','seq4'],
'my.otu.2':['seq6','seq7','seq8']}
self.assertEqual(actual,exp)
示例10: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# Create the output dir if it doesn't already exist.
try:
if not path.exists(opts.output_dir):
create_dir(opts.output_dir)
except:
option_parser.error("Could not create or access output directory " "specified with the -o option.")
sample_id_map = None
if opts.sample_id_map_fp:
sample_id_map = dict([(k, v[0]) for k, v in fields_to_dict(open(opts.sample_id_map_fp, "U")).items()])
input_dm_fps = opts.input_dms
distmats = [parse_distmat(open(dm_fp, "U")) for dm_fp in input_dm_fps]
if opts.method == "mantel":
output_f = open(path.join(opts.output_dir, "mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
sample_id_map=sample_id_map,
)
)
elif opts.method == "partial_mantel":
output_f = open(path.join(opts.output_dir, "partial_mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"partial_mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
control_dm_fp=opts.control_dm,
control_dm=parse_distmat(open(opts.control_dm, "U")),
sample_id_map=sample_id_map,
)
)
elif opts.method == "mantel_corr":
output_f = open(path.join(opts.output_dir, "mantel_correlogram_results.txt"), "w")
result_str, correlogram_fps, correlograms = run_mantel_correlogram(
input_dm_fps, distmats, opts.num_permutations, comment_corr, opts.alpha, sample_id_map=sample_id_map
)
output_f.write(result_str)
for corr_fp, corr in zip(correlogram_fps, correlograms):
corr.savefig(path.join(opts.output_dir, corr_fp + opts.image_type), format=opts.image_type)
output_f.close()
示例11: test_fields_to_dict
def test_fields_to_dict(self):
"""fields_to_dict should make first field key, rest val"""
test_data = \
"""0 R27DLI_4812 R27DLI_600 R27DLI_727 U1PLI_403 U1PLI_8969 U1PLI_9080 U1PLI_9526 W3Cecum_6642 W3Cecum_8992
1 U1PLI_7889
2 W3Cecum_4858
3 R27DLI_3243 R27DLI_4562 R27DLI_6828 R27DLI_9097 U1PLI_2780 U1PLI_67 U9PSI_10475 U9PSI_4341 W3Cecum_5191""".splitlines() #output from cd-hit
obs = fields_to_dict(test_data)
exp = {'0':['R27DLI_4812','R27DLI_600','R27DLI_727','U1PLI_403','U1PLI_8969','U1PLI_9080','U1PLI_9526','W3Cecum_6642','W3Cecum_8992'],
'1':['U1PLI_7889'],
'2':['W3Cecum_4858'],
'3':['R27DLI_3243','R27DLI_4562','R27DLI_6828','R27DLI_9097','U1PLI_2780','U1PLI_67','U9PSI_10475','U9PSI_4341','W3Cecum_5191']}
self.assertEqual(obs, exp)
示例12: main
def main():
"""opens files as necessary based on prefs"""
option_parser, opts, args = parse_command_line_parameters(**script_info)
data = {}
fasta_file = opts.input_fasta_fp
# load the input alignment
data['aln'] = SequenceCollection.from_fasta_records(
parse_fasta(open(fasta_file)), DNA)
# Load the otu file
otu_path = opts.otu_map_fp
otu_f = open(otu_path, 'U')
otus = fields_to_dict(otu_f)
otu_f.close()
data['otus'] = otus
# Determine which which samples to extract from representative seqs
# and from otus file
if opts.samples_to_extract:
prefs = process_extract_samples(opts.samples_to_extract)
filepath = opts.input_fasta_fp
filename = filepath.strip().split('/')[-1]
filename = filename.split('.')[0]
if opts.output_dir:
if os.path.exists(opts.output_dir):
dir_path = opts.output_dir
else:
try:
os.mkdir(opts.output_dir)
dir_path = opts.output_dir
except OSError:
pass
else:
dir_path = './'
try:
action = filter_samples
except NameError:
action = None
# Place this outside try/except so we don't mask NameError in action
if action:
action(prefs, data, dir_path, filename)
示例13: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
exclude_otus_fp = opts.exclude_otus_fp
if opts.output_fp:
outfile = open(opts.output_fp, 'w')
else:
outfile = stdout
if not opts.taxonomy_fname:
otu_to_taxonomy = None
else:
infile = open(opts.taxonomy_fname,'U')
otu_to_taxonomy = parse_taxonomy(infile)
otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U'))
if exclude_otus_fp:
otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U'))
outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
示例14: test_parallel_blast_taxonomy_assigner
def test_parallel_blast_taxonomy_assigner(self):
""" parallel_blast_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
'blastmat_dir':None,
'e_value':0.001,
'blast_db':None,
'reference_seqs_fp':self.reference_seqs_file.name
}
app = ParallelBlastTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='BTATEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 6)
self.assertEqual(len(results['s1']), 3)
self.assertEqual(len(results['s6']), 3)
示例15: test_parallel_uclust_taxonomy_assigner
def test_parallel_uclust_taxonomy_assigner(self):
""" parallel_uclust_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp': self.id_to_taxonomy_file.name,
'reference_seqs_fp': self.reference_seqs_file.name,
'min_consensus_fraction': 0.51,
'similarity': 0.90,
'uclust_max_accepts': 3
}
app = ParallelUclustConsensusTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='UTATEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 6)
self.assertEqual(len(results['s1']), 3)
self.assertEqual(len(results['s6']), 3)