本文整理汇总了Python中skbio.parse.sequences.parse_fasta函数的典型用法代码示例。如果您正苦于以下问题:Python parse_fasta函数的具体用法?Python parse_fasta怎么用?Python parse_fasta使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_fasta函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_split_fasta_diff_num_seqs_per_file_alt
def test_split_fasta_diff_num_seqs_per_file_alt(self):
"""split_fasta funcs always catches all seqs
"""
# start with 59 seqs (b/c it's prime, so should make more
# confusing splits)
in_seqs = SequenceCollection.from_fasta_records(
[('seq%s' % k, 'AACCTTAA') for k in range(59)], DNA)
infile = in_seqs.to_fasta().split('\n')
# test seqs_per_file from 1 to 1000
for i in range(1, 1000):
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
actual = split_fasta(infile, i, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
# remove the files now, so if the test fails they still get
# cleaned up
remove_files(actual)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例2: fast_denoiser
def fast_denoiser(sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True, titanium=False):
"""wrapper function calling methods from the Denoiser package."""
if num_cpus > 1:
denoise_seqs(
sff_fps,
fasta_fp,
tmp_outdir,
primer=primer,
cluster=True,
num_cpus=num_cpus,
verbose=verbose,
titanium=titanium,
)
else:
denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, verbose=verbose, titanium=titanium)
# read centroids and singletons
centroids = parse_fasta(open(tmp_outdir + "/centroids.fasta"))
singletons = parse_fasta(open(tmp_outdir + "/singletons.fasta"))
seqs = chain(centroids, singletons)
# read mapping
mapping = {}
cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt")
for i, cluster in enumerate(cluster_mapping):
cluster, members = cluster.split(":")
members = members.split()
clust = [cluster]
clust.extend(members)
mapping[i] = clust
return seqs, mapping
示例3: test_split_fasta_diff_num_seqs_per_file
def test_split_fasta_diff_num_seqs_per_file(self):
"""split_fasta funcs as expected when diff num seqs go to each file
"""
fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
prefix='split_fasta_tests',
suffix='')
close(fd)
infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
'>seq3', 'CCTT--AA']
actual = split_fasta(infile, 2, filename_prefix)
actual_seqs = []
for fp in actual:
actual_seqs += list(open(fp))
remove_files(actual)
expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(2)]
# list of file paths is as expected
self.assertEqual(actual, expected)
# building seq collections from infile and the split files result in
# equivalent seq collections
self.assertEqual(
SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
示例4: setUp
def setUp(self):
""" """
self.fasta_lines1 = fasta_lines1.split("\n")
self.fasta_lines1_mixed_case = fasta_lines1_mixed_case.split("\n")
self.fasta_lines1_exp = list(parse_fasta(fasta_lines1_exp.split("\n")))
self.fasta_lines1_mixed_case_exp = list(parse_fasta(fasta_lines1_mixed_case_exp.split("\n")))
self.fasta_lines1_exp_null_desc_mapper = list(parse_fasta(fasta_lines1_exp_null_desc_mapper.split("\n")))
示例5: __call__
def __call__(self, seq_path, result_path=None, log_path=None,
failure_path=None):
# load candidate sequences
seq_file = open(seq_path, 'U')
candidate_sequences = parse_fasta(seq_file)
# load template sequences
template_alignment = []
template_alignment_fp = self.Params['template_filepath']
for seq_id, seq in parse_fasta(open(template_alignment_fp)):
# replace '.' characters with '-' characters
template_alignment.append((seq_id, seq.replace('.', '-').upper()))
try:
template_alignment = LoadSeqs(data=template_alignment, moltype=DNA,
aligned=DenseAlignment)
except KeyError as e:
raise KeyError('Only ACGT-. characters can be contained in template alignments.' +
' The offending character was: %s' % e)
# initialize_logger
logger = NastLogger(log_path)
# get function for pairwise alignment method
pairwise_alignment_f = pairwise_alignment_methods[
self.Params['pairwise_alignment_method']]
pynast_aligned, pynast_failed = pynast_seqs(
candidate_sequences,
template_alignment,
min_pct=self.Params['min_pct'],
min_len=self.Params['min_len'],
align_unaligned_seqs_f=pairwise_alignment_f,
logger=logger,
temp_dir=get_qiime_temp_dir())
logger.record(str(self))
if failure_path is not None:
fail_file = open(failure_path, 'w')
for seq in pynast_failed:
fail_file.write(seq.toFasta())
fail_file.write('\n')
fail_file.close()
if result_path is not None:
result_file = open(result_path, 'w')
for seq in pynast_aligned:
result_file.write(seq.toFasta())
result_file.write('\n')
result_file.close()
return None
else:
try:
return LoadSeqs(data=pynast_aligned, aligned=DenseAlignment)
except ValueError:
return {}
示例6: __call__
def __call__(self, seq_path, result_path=None, log_path=None,
failure_path=None):
# load candidate sequences
seq_file = open(seq_path, 'U')
candidate_sequences = parse_fasta(seq_file)
# load template sequences
template_alignment = []
template_alignment_fp = self.Params['template_filepath']
for seq_id, seq in parse_fasta(open(template_alignment_fp)):
# replace '.' characters with '-' characters
template_alignment.append((seq_id, seq.replace('.', '-').upper()))
template_alignment = Alignment.from_fasta_records(
template_alignment, DNASequence, validate=True)
# initialize_logger
logger = NastLogger(log_path)
# get function for pairwise alignment method
pairwise_alignment_f = pairwise_alignment_methods[
self.Params['pairwise_alignment_method']]
pynast_aligned, pynast_failed = pynast_seqs(
candidate_sequences,
template_alignment,
min_pct=self.Params['min_pct'],
min_len=self.Params['min_len'],
align_unaligned_seqs_f=pairwise_alignment_f,
logger=logger,
temp_dir=get_qiime_temp_dir())
logger.record(str(self))
for i, seq in enumerate(pynast_failed):
skb_seq = DNASequence(str(seq), id=seq.Name)
pynast_failed[i] = skb_seq
pynast_failed = SequenceCollection(pynast_failed)
for i, seq in enumerate(pynast_aligned):
skb_seq = DNASequence(str(seq), id=seq.Name)
pynast_aligned[i] = skb_seq
pynast_aligned = Alignment(pynast_aligned)
if failure_path is not None:
fail_file = open(failure_path, 'w')
fail_file.write(pynast_failed.to_fasta())
fail_file.close()
if result_path is not None:
result_file = open(result_path, 'w')
result_file.write(pynast_aligned.to_fasta())
result_file.close()
return None
else:
return pynast_aligned
示例7: setUp
def setUp(self):
fd, self.pynast_test1_input_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
with open(self.pynast_test1_input_fp, "w") as f:
f.write(pynast_test1_input_fasta)
fd, self.pynast_test1_template_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test1_template_fp, "w") as f:
f.write(pynast_test1_template_fasta)
fd, self.pynast_test_template_w_dots_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_dots_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("-", "."))
fd, self.pynast_test_template_w_u_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_u_fp, "w") as f:
f.write(pynast_test1_template_fasta.replace("T", "U"))
fd, self.pynast_test_template_w_lower_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
close(fd)
with open(self.pynast_test_template_w_lower_fp, "w") as f:
f.write(pynast_test1_template_fasta.lower())
# create temp file names (and touch them so we can reliably
# clean them up)
fd, self.result_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.result_fp, "w").close()
fd, self.failure_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
close(fd)
open(self.failure_fp, "w").close()
fd, self.log_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".log")
close(fd)
open(self.log_fp, "w").close()
self._paths_to_clean_up = [
self.pynast_test1_input_fp,
self.result_fp,
self.failure_fp,
self.log_fp,
self.pynast_test1_template_fp,
self.pynast_test_template_w_dots_fp,
self.pynast_test_template_w_u_fp,
self.pynast_test_template_w_lower_fp,
]
self.pynast_test1_aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 15})
self.pynast_test1_expected_aln = Alignment.from_fasta_records(parse_fasta(pynast_test1_expected_alignment), DNA)
self.pynast_test1_expected_fail = SequenceCollection.from_fasta_records(
parse_fasta(pynast_test1_expected_failure), DNA
)
示例8: test_deblur_with_non_default_error_profile
def test_deblur_with_non_default_error_profile(self):
error_dist = [
1,
0.05,
0.000005,
0.000005,
0.000005,
0.000005,
0.0000025,
0.0000025,
0.0000025,
0.0000025,
0.0000025,
0.0000005,
0.0000005,
0.0000005,
0.0000005,
]
seqs_f = StringIO(TEST_SEQS_2)
obs = deblur(parse_fasta(seqs_f), error_dist=error_dist)
exp = [
Sequence(
"E.Coli-999;size=720;",
"tacggagggtgcaagcgttaatcggaattactgggcgtaaagcgcacgcaggcggt"
"ttgttaagtcagatgtgaaatccccgggctcaacctgggaactgcatctgatactg"
"gcaagcttgagtctcgtagaggggggcagaattccag",
)
]
# Trying with a numpy array
error_dist = np.array(
[
1,
0.05,
0.000005,
0.000005,
0.000005,
0.000005,
0.0000025,
0.0000025,
0.0000025,
0.0000025,
0.0000025,
0.0000005,
0.0000005,
0.0000005,
0.0000005,
]
)
seqs_f = StringIO(TEST_SEQS_2)
obs = deblur(parse_fasta(seqs_f), error_dist=error_dist)
self.assertEqual(obs, exp)
示例9: combine_mappings
def combine_mappings(fasta_fh, mapping_fh, denoised_seqs_fh,
otu_picker_otu_map_fh, out_dir):
"""Combine denoiser and OTU picker mapping file, replace flowgram IDs.
fasta_fh: a fasta file with labels as produced by Qiime's split_libraries.py
used to replace flowgram id with the unique se_sample_id
mapping_fh: The cluster mapping from the denoiser.py
denoised_seqs_fh: the Fasta output files from denoiser.py
otu_picker_map_fh: cluster map from otu picker on denoised_seqs_fh
out_dir: output directory
"""
# read in mapping from split_library file
labels = imap(lambda a_b: a_b[0], parse_fasta(fasta_fh))
# mapping from seq_id to sample_id
sample_id_mapping = extract_read_to_sample_mapping(labels)
denoiser_mapping = read_denoiser_mapping(mapping_fh)
# read in cd_hit otu map
# and write out combined otu_picker+denoiser map
otu_fh = open(out_dir + "/denoised_otu_map.txt", "w")
for otu_line in otu_picker_otu_map_fh:
otu_split = otu_line.split()
otu = otu_split[0]
ids = otu_split[1:]
get_sample_id = sample_id_mapping.get
# concat lists
# make sure the biggest one is first for pick_repr
all_ids = sort_ids(ids, denoiser_mapping)
all_ids.extend(sum([denoiser_mapping[id] for id in ids], []))
try:
otu_fh.write("%s\t" % otu +
"\t".join(map(get_sample_id, all_ids)) + "\n")
except TypeError:
# get returns Null if denoiser_mapping id not present in
# sample_id_mapping
print "Found id in denoiser output, which was not found in split_libraries " +\
"output FASTA file. Wrong file?"
exit()
fasta_out_fh = open(out_dir + "/denoised_all.fasta", "w")
for label, seq in parse_fasta(denoised_seqs_fh):
id = label.split()[0]
newlabel = "%s %s" % (sample_id_mapping[id], id)
fasta_out_fh.write(BiologicalSequence(seq, id=newlabel).to_fasta())
示例10: test_call_write_to_file
def test_call_write_to_file(self):
"""ReferenceRepSetPicker.__call__ otu map correctly written to file"""
app = ReferenceRepSetPicker(params={'Algorithm': 'first',
'ChoiceF': first_id})
app(self.tmp_seq_filepath,
self.tmp_otu_filepath,
self.ref_seq_filepath,
result_path=self.result_filepath)
with open(self.result_filepath) as f:
actual = SequenceCollection.from_fasta_records(parse_fasta(f), DNA)
expected = SequenceCollection.from_fasta_records(
parse_fasta(rep_seqs_reference_result_file_exp.split('\n')), DNA)
# we don't care about order in the results
self.assertEqual(set(actual), set(expected))
示例11: seqs_from_file
def seqs_from_file(ids, file_lines):
"""Extract labels and seqs from file"""
for label, seq in parse_fasta(file_lines):
if id_from_fasta_label_line(label) in ids:
yield label, seq
示例12: check_fasta_seqs_lens
def check_fasta_seqs_lens(input_fasta_fp):
""" Creates bins of sequence lens
Useful for checking for valid aligned sequences.
input_fasta_fp: input fasta filepath
"""
seq_lens = defaultdict(int)
input_fasta_f = open(input_fasta_fp, "U")
for label, seq in parse_fasta(input_fasta_f):
seq_lens[len(seq)] += 1
input_fasta_f.close()
formatted_seq_lens = []
for curr_key in seq_lens:
formatted_seq_lens.append((seq_lens[curr_key], curr_key))
formatted_seq_lens.sort(reverse=True)
return formatted_seq_lens
示例13: output_test
def output_test(self, aligned_basename):
""" Test results of test_load_zip() and test_load_gzip()
"""
f_log = open(aligned_basename + ".log", "U")
f_log_str = f_log.read()
self.assertTrue("Total reads passing E-value threshold" in f_log_str)
self.assertTrue("Total reads for de novo clustering" in f_log_str)
self.assertTrue("Total OTUs" in f_log_str)
f_log.seek(0)
for line in f_log:
if line.startswith(" Total reads passing E-value threshold"):
num_hits = (re.split('Total reads passing E-value threshold = | \(', line)[1]).strip()
elif line.startswith(" Total reads for de novo clustering"):
num_failures_log = (re.split('Total reads for de novo clustering = ',
line)[1]).strip()
elif line.startswith(" Total OTUs"):
num_clusters_log = (re.split('Total OTUs = ', line)[1]).strip()
f_log.close()
# Correct number of reads mapped
self.assertEqual("99999", num_hits)
# Correct number of clusters recorded
self.assertEqual("272", num_clusters_log)
# Correct number of clusters in OTU-map
with open(aligned_basename + "_otus.txt", 'U') as f_otumap:
num_clusters_file = sum(1 for line in f_otumap)
self.assertEqual(272, num_clusters_file)
num_failures_file = 0
with open(aligned_basename + "_denovo.fasta", 'U') as f_denovo:
for label, seq in parse_fasta(f_denovo):
num_failures_file += 1
# Correct number of reads for de novo clustering
self.assertEqual(num_failures_log, str(num_failures_file))
示例14: align_two_alignments
def align_two_alignments(aln1, aln2, moltype, params=None):
"""Returns an Alignment object from two existing Alignments.
aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
used to build them.
- Mafft profile alignment only works with aligned sequences. Alignment
object used to handle unaligned sequences.
params: dict of parameters to pass in to the Mafft app controller.
"""
#create SequenceCollection object from seqs
aln1 = Alignment(aln1,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
aln1_int_map, aln1_int_keys = aln1.getIntMap()
#Create SequenceCollection from int_map.
aln1_int_map = Alignment(aln1_int_map,MolType=moltype)
#create Alignment object from aln
aln2 = Alignment(aln2,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
#Create SequenceCollection from int_map.
aln2_int_map = Alignment(aln2_int_map,MolType=moltype)
#Update aln1_int_keys with aln2_int_keys
aln1_int_keys.update(aln2_int_keys)
#Create Mafft app.
app = Mafft(InputHandler='_input_as_paths',\
params=params,
SuppressStderr=False)
app._command = 'mafft-profile'
aln1_path = app._tempfile_as_multiline_string(aln1_int_map.toFasta())
aln2_path = app._tempfile_as_multiline_string(aln2_int_map.toFasta())
filepaths = [aln1_path,aln2_path]
#Get results using int_map as input to app
res = app(filepaths)
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['StdOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
key = k.replace('_seed_','')
new_alignment[aln1_int_keys[key]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
remove(aln1_path)
remove(aln2_path)
remove('pre')
remove('trace')
del(aln1,aln1_int_map,aln1_int_keys,\
aln2,aln2_int_map,aln2_int_keys,app,res,alignment)
return new_alignment
示例15: test_main
def test_main(self):
"""Denoiser should always give same result on test data"""
expected = ">FS8APND01D3TW3 | cluster size: 94 \nCTCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC\n"
expected_map = """FS8APND01EWRS4:
FS8APND01DXG45:
FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN
FS8APND01BSTVP:
FS8APND01EFK0W:
FS8APND01DCIOO:
FS8APND01CKOMZ:
"""
command = " ".join(["denoiser.py",
"--force", "-o", self.test_dir, "-i",
"%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME])
result = Popen(command, shell=True, universal_newlines=True,
stdout=PIPE, stderr=STDOUT).stdout.read()
self.result_dir = self.test_dir
observed = "".join(list(open(self.result_dir + "centroids.fasta")))
self.assertEqual(observed, expected)
self.assertEqual(
len(list(parse_fasta(open(self.result_dir + "singletons.fasta")))),
6)
observed = "".join(
list(open(self.result_dir + "denoiser_mapping.txt")))
self.assertEqual(observed, expected_map)