本文整理汇总了Python中qiime.parse.parse_distmat函数的典型用法代码示例。如果您正苦于以下问题:Python parse_distmat函数的具体用法?Python parse_distmat怎么用?Python parse_distmat使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_distmat函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUp
def setUp(self):
"""Define some distance matrices that will be used by the tests."""
self.dm1_str = ["\ts1\ts2\ts3", "s1\t0\t0.5\t0.2", "s2\t0.5\t0\t0.3",
"s3\t0.2\t0.3\t0"]
self.dm1 = parse_distmat(self.dm1_str)
self.dm2_str = ["\ts1\ts2\ts3", "s1\t0\t0.8\t0.25", "s2\t0.8\t0\t0.4",
"s3\t0.25\t0.4\t0"]
self.dm2 = parse_distmat(self.dm2_str)
self.dm3_str = ["\ts1\ts2\ts3", "s1\t0\t0.1\t0.2", "s2\t0.1\t0\t0.9",
"s3\t0.2\t0.9\t0"]
self.dm3 = parse_distmat(self.dm3_str)
self.dm4_str = ["\tz1\tz2\tz3", "z1\t0\t0.1\t0.2", "z2\t0.1\t0\t0.9",
"z3\t0.2\t0.9\t0"]
self.dm4 = parse_distmat(self.dm4_str)
self.distmats = [self.dm1, self.dm2, self.dm3]
# Sample filepaths (these aren't created or modified, just used as
# strings to be added to the results).
self.fp1 = "foo.txt"
self.fp2 = "bar.txt"
self.fp3 = "baz.txt"
self.fps = [self.fp1, self.fp2, self.fp3]
# Some sample parameters to use for many of the tests.
self.num_perms = 999
self.comment = "# A sample comment.\n"
self.alpha = 0.01
self.tail_type = 'greater'
self.sample_id_map = {'z1':'s1', 'z2':'s2', 'z3':'s3', 's1':'s1',
's2':'s2', 's3':'s3'}
示例2: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# Open the input distance matrices, parse them, find the intersection, and
# write the two new distance matrices to the output filepaths.
input_dm_fps = opts.input_dms.split(',')
output_dm_fps = opts.output_dms.split(',')
if len(input_dm_fps) != 2 or len(output_dm_fps) != 2:
option_parser.error("You must provide exactly two input and output "
"distance matrix filepaths.")
labels1, dm1_data = parse_distmat(open(input_dm_fps[0], 'U'))
labels2, dm2_data = parse_distmat(open(input_dm_fps[1], 'U'))
(dm1_labels, dm1), (dm2_labels, dm2) = make_compatible_distance_matrices(
parse_distmat(open(input_dm_fps[0],'U')),
parse_distmat(open(input_dm_fps[1],'U')))
assert (dm1_labels == dm2_labels), "The order of sample IDs is not the " +\
"same for the two matrices."
output1_f = open(output_dm_fps[0], 'w')
output2_f = open(output_dm_fps[1], 'w')
output1_f.write(format_distance_matrix(dm1_labels, dm1))
output2_f.write(format_distance_matrix(dm2_labels, dm2))
output1_f.close()
output2_f.close()
示例3: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
sample_id_map_fp = opts.sample_id_map_fp
if sample_id_map_fp:
sample_id_map = dict([(k,v[0]) \
for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
else:
sample_id_map = None
input_dm_fps = opts.input_dms.split(',')
output_f = open(opts.output_fp,'w')
output_f.write(comment)
output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n')
num_iterations = opts.num_iterations
for i,fp1 in enumerate(input_dm_fps):
for fp2 in input_dm_fps[i+1:]:
(dm1_labels, dm1), (dm2_labels, dm2) =\
make_compatible_distance_matrices(parse_distmat(open(fp1,'U')),
parse_distmat(open(fp2,'U')),
lookup=sample_id_map)
if len(dm1_labels) < 2:
output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels)))
continue
p = mantel(dm1,dm2,n=num_iterations)
p_str = format_p_value_for_num_iters(p,num_iterations)
output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str))
output_f.close()
示例4: test_filter_samples_from_distance_matrix
def test_filter_samples_from_distance_matrix(self):
"""filter_samples_from_distance_matrix functions as expected """
actual = filter_samples_from_distance_matrix(parse_distmat(self.input_dm1),
["GHI blah","XYZ"])
self.assertEqual(actual,expected_dm1a)
actual = filter_samples_from_distance_matrix(parse_distmat(self.input_dm1),
["GHI","DEF"])
self.assertEqual(actual,expected_dm1b)
示例5: test_filter_samples_from_distance_matrix_negate
def test_filter_samples_from_distance_matrix_negate(self):
"""filter_samples_from_distance_matrix functions w negate """
actual = filter_samples_from_distance_matrix(
parse_distmat(self.input_dm1),
["ABC blah","DEF"],
negate=True)
self.assertEqual(actual,expected_dm1a)
actual = filter_samples_from_distance_matrix(\
parse_distmat(self.input_dm1),
["ABC","XYZ"],
negate=True)
self.assertEqual(actual,expected_dm1b)
示例6: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# Create the output dir if it doesn't already exist.
try:
if not path.exists(opts.output_dir):
create_dir(opts.output_dir)
except:
option_parser.error("Could not create or access output directory " "specified with the -o option.")
sample_id_map = None
if opts.sample_id_map_fp:
sample_id_map = dict([(k, v[0]) for k, v in fields_to_dict(open(opts.sample_id_map_fp, "U")).items()])
input_dm_fps = opts.input_dms
distmats = [parse_distmat(open(dm_fp, "U")) for dm_fp in input_dm_fps]
if opts.method == "mantel":
output_f = open(path.join(opts.output_dir, "mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
sample_id_map=sample_id_map,
)
)
elif opts.method == "partial_mantel":
output_f = open(path.join(opts.output_dir, "partial_mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"partial_mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
control_dm_fp=opts.control_dm,
control_dm=parse_distmat(open(opts.control_dm, "U")),
sample_id_map=sample_id_map,
)
)
elif opts.method == "mantel_corr":
output_f = open(path.join(opts.output_dir, "mantel_correlogram_results.txt"), "w")
result_str, correlogram_fps, correlograms = run_mantel_correlogram(
input_dm_fps, distmats, opts.num_permutations, comment_corr, opts.alpha, sample_id_map=sample_id_map
)
output_f.write(result_str)
for corr_fp, corr in zip(correlogram_fps, correlograms):
corr.savefig(path.join(opts.output_dir, corr_fp + opts.image_type), format=opts.image_type)
output_f.close()
示例7: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.binning is None:
ranges = []
else:
# simple ranges format validation
if opts.binning.count('[')!=opts.binning.count(']') or\
opts.binning.count('[')!=opts.binning.count(','):
raise ValueError, "The binning input has an error: '%s'; " % opts.binning +\
"\nthe format should be [increment1,top_limit1][increment2,top_limit2]"
# spliting in ranges
rgn_txt = opts.binning.split('][')
# removing left [ and right ]
rgn_txt[0] = rgn_txt[0][1:]
rgn_txt[-1] = rgn_txt[-1][:-1]
# converting into int
ranges = []
max = 0
for i,r in enumerate(rgn_txt):
values = map(float,r.split(','))
if len(values)!=2:
raise ValueError, "All ranges must have only 2 values: [%s]" % r
elif i+1!=len(rgn_txt):
if values[0]>values[1]:
raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
elif values<0:
raise ValueError, "This value can not be negative: [%s]" % r
elif max>values[1]:
raise ValueError, "This value can not smaller than the previous one: [%s]" % r
else:
max=values[1]
ranges.append(values)
x_samples, x_distmtx = parse_distmat(open(opts.input_path_x,'U'))
y_samples, y_distmtx = parse_distmat(open(opts.input_path_y,'U'))
(x_val,y_val,x_fit,y_fit) = fit_semivariogram(x_distmtx, y_distmtx, opts.model, ranges)
plot(x_val, y_val, 'o', color="white")
plot(x_fit, y_fit, linewidth=2.0, color="blue")
x_label = 'Distance (m)'
y_label = 'Community Dissimilarity'
fig_title = 'Semivariogram (%s)' % opts.model
xlabel(x_label)
ylabel(y_label)
title(fig_title)
savefig(opts.output_path)
示例8: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
output_f = open(opts.output_distance_matrix, 'w')
if opts.otu_table_fp:
otu_table = load_table(opts.otu_table_fp)
samples_to_keep = otu_table.ids()
# samples_to_keep = \
# sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
elif opts.sample_id_fp:
samples_to_keep = \
get_seqs_to_keep_lookup_from_seq_id_file(
open(opts.sample_id_fp, 'U'))
elif opts.mapping_fp and opts.valid_states:
try:
samples_to_keep = sample_ids_from_metadata_description(
open(opts.mapping_fp, 'U'), opts.valid_states)
except ValueError as e:
option_parser.error(e.message)
else:
option_parser.error('must pass either --sample_id_fp, -t, or -m and '
'-s')
# note that negate gets a little weird here. The function we're calling
# removes the specified samples from the distance matrix, but the other
# QIIME filter scripts keep these samples specified. So, the interface of
# this script is designed to keep the specified samples, and therefore
# negate=True is passed to filter_samples_from_distance_matrix by default.
d = filter_samples_from_distance_matrix(
parse_distmat(
open(opts.input_distance_matrix, 'U')),
samples_to_keep,
negate=not opts.negate)
output_f.write(d)
output_f.close()
示例9: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.output_path != None:
outf = open(opts.output_path,'w')
else:
outf = sys.stdout
dists = parse_distmat(open(opts.input_path,'U'))
map_data = parse_mapping_file_to_dict(open(opts.map,'U'))
diff_dists, same_dists = clust_qual_ratio(dists, map_data, opts.category)
if opts.short:
print >> outf, numpy.mean(diff_dists)/numpy.mean(same_dists)
else:
print >> outf, "dissimilarity ratio between/within (large for clustered data):"
print >> outf, numpy.mean(diff_dists)/numpy.mean(same_dists)
print >> outf, "dissimilarities between clusters: mean, std, num:"
print >> outf, '\t'.join(map(str,[numpy.mean(diff_dists), numpy.std(diff_dists),
len(diff_dists)]))
print >> outf, "dissimilarities within clusters: mean, std, num:"
print >> outf, '\t'.join(map(str,[numpy.mean(same_dists), numpy.std(same_dists),
len(same_dists)]))
示例10: test_get_adjacent_distances
def test_get_adjacent_distances(self):
""" extracting adjacent distances works as expected
"""
dm_str = ["\ts1\ts2\ts3", "s1\t0\t2\t4", "s2\t2\t0\t3.2", "s3\t4\t3.2\t0"]
dm_header, dm = parse_distmat(dm_str)
# error cases: fewer than 2 valid sample ids
self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, [])
self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s1"])
self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s0", "s1"])
self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s1", "s4"])
# one pair of valid distances
self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s2"]), ([2], [("s1", "s2")]))
self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s1"]), ([0], [("s1", "s1")]))
self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s3"]), ([4], [("s1", "s3")]))
self.assertEqual(get_adjacent_distances(dm_header, dm, ["s2", "s3"]), ([3.2], [("s2", "s3")]))
# multiple valid distances
self.assertEqual(
get_adjacent_distances(dm_header, dm, ["s1", "s2", "s3"]), ([2, 3.2], [("s1", "s2"), ("s2", "s3")])
)
self.assertEqual(
get_adjacent_distances(dm_header, dm, ["s1", "s3", "s2", "s1"]),
([4, 3.2, 2], [("s1", "s3"), ("s3", "s2"), ("s2", "s1")]),
)
# mixed valid and invalid distances ignores invalid distances
self.assertEqual(
get_adjacent_distances(dm_header, dm, ["s1", "s3", "s4", "s5", "s6", "s2", "s1"]),
([4, 3.2, 2], [("s1", "s3"), ("s3", "s2"), ("s2", "s1")]),
)
# strict=True results in missing sample ids raising an error
self.assertRaises(
ValueError, get_adjacent_distances, dm_header, dm, ["s1", "s3", "s4", "s5", "s6", "s2", "s1"], strict=True
)
示例11: nmds
def nmds(file,dimensions=2):
samples, distmtx = parse_distmat(file)
nmds_res = nmds_module.NMDS(distmtx,verbosity=0,dimension=dimensions)
pts = nmds_res.getPoints()
stress = nmds_res.getStress()
return format_nmds_coords(samples, pts, stress)
示例12: test_shuffle_dm
def test_shuffle_dm(self):
"""Test shuffling labels of distance matrix."""
exp_labels, exp_dm = parse_distmat(self.dm_f1)
order_changed = False
for i in range(20):
obs_labels, obs_dm = parse_distmat(
shuffle_dm(self.dm_f1).split('\n'))
self.assertFloatEqual(obs_dm, exp_dm)
try:
self.assertIsPermutation(obs_labels, exp_labels)
except AssertionError:
pass
else:
order_changed = True
self.assertTrue(order_changed)
示例13: test_parse_distmat
def test_parse_distmat(self):
"""parse_distmat should read distmat correctly"""
lines = """\ta\tb\tc
a\t0\t1\t2
b\t1\t0\t3.5
c\t1\t3.5\t0
""".splitlines()
exp = (['a','b','c'], array([[0,1,2],[1,0,3.5],[1,3.5,0]]))
obs = parse_distmat(lines)
self.assertEqual(obs, exp)
示例14: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# Open the input distance matrix and parse it. Shuffle its labels and write
# them and the original data to the output file.
labels, dm_data = parse_distmat(open(opts.input_distance_matrix, 'U'))
shuffle(labels)
output_f = open(opts.output_distance_matrix, 'w')
output_f.write(format_distance_matrix(labels, dm_data))
output_f.close()
示例15: test_subset_dm
def test_subset_dm(self):
"""Test picking a subset of a distance matrix."""
# Don't actually subset.
exp = parse_distmat(self.dm_f1)
obs = parse_distmat(subset_dm(self.dm_f1, 3).split('\n'))
self.assertFloatEqual(obs, exp)
obs_labels, obs_dm = parse_distmat(
subset_dm(self.dm_f1, 1).split('\n'))
self.assertEqual(len(obs_labels), 1)
self.assertTrue(obs_labels[0] in exp[0])
obs_labels, obs_dm = parse_distmat(
subset_dm(self.dm_f1, 2).split('\n'))
self.assertEqual(len(obs_labels), 2)
self.assertTrue(obs_labels[0] in exp[0])
self.assertTrue(obs_labels[1] in exp[0])
self.assertRaises(ValueError, subset_dm, self.dm_f1, 4)