本文整理汇总了Python中Bio.SeqIO.write方法的典型用法代码示例。如果您正苦于以下问题:Python SeqIO.write方法的具体用法?Python SeqIO.write怎么用?Python SeqIO.write使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SeqIO
的用法示例。
在下文中一共展示了SeqIO.write方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: model_from_bam
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def model_from_bam(args):
"""Main function for the `iss model` submodule
This submodule write all variables necessary for building an ErrorModel
to args.output + .npz
Args:
args (object): the command-line arguments from argparse
"""
logger = logging.getLogger(__name__)
logger.debug('iss version %s' % __version__)
logger.debug('Using verbose logger')
try: # try to import bam module and write model data to file
logger.info('Starting iss model')
from iss import bam
except ImportError as e:
logger.error('Failed to import bam module: %s' % e)
sys.exit(1)
else:
logger.info('Using KDE ErrorModel')
bam.to_model(args.bam, args.output)
logger.info('Model generation complete')
示例2: main
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def main(args):
for record in SeqIO.parse(args.infile, 'fasta'):
if args.discard:
if sum([1 for rx in args.discard if re.match(rx, record.id)]) > 0:
continue
subseqcounter = 0
printlog(args.debug, "DEBUG: convert to upper case", record.id)
sequence = str(record.seq).upper()
printlog(args.debug, "DEBUG: split seq by Ns", record.id)
subseqs = [ss for ss in re.split('[^ACGT]+', sequence) if len(ss) > args.minlength]
printlog(args.debug, "DEBUG: print subseqs", record.id)
for subseq in subseqs:
subseqcounter += 1
subid = '{:s}_chunk_{:d}'.format(record.id, subseqcounter)
subrecord = SeqRecord(Seq(subseq), subid, '', '')
SeqIO.write(subrecord, args.outfile, 'fasta')
示例3: write_fasta_file
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def write_fasta_file(seq_records, outname, outdir=None, outext='.faa', force_rerun=False):
"""Write a FASTA file for a SeqRecord or a list of SeqRecord objects.
Args:
seq_records (SeqRecord, list): SeqRecord or a list of SeqRecord objects
outname: Name of the output file which will have outext appended to it
outdir: Path to directory to output sequences to
outext: Extension of FASTA file, default ".faa"
force_rerun: If file should be overwritten if it exists
Returns:
str: Path to output FASTA file.
"""
if not outdir:
outdir = ''
outfile = ssbio.utils.outfile_maker(inname='', outname=outname, outdir=outdir, outext=outext)
if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
SeqIO.write(seq_records, outfile, "fasta")
return outfile
示例4: write_all_sequences_file
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def write_all_sequences_file(self, outname, outdir=None):
"""Write all the stored sequences as a single FASTA file. By default, sets IDs to model gene IDs.
Args:
outname (str): Name of the output FASTA file without the extension
outdir (str): Path to output directory for the file, default is the sequences directory
"""
if not outdir:
outdir = self.sequence_dir
if not outdir:
raise ValueError('Output directory must be specified')
outfile = op.join(outdir, outname + '.faa')
SeqIO.write(self.sequences, outfile, "fasta")
log.info('{}: wrote all protein sequences to file'.format(outfile))
return outfile
示例5: test_prepare_with_alignment_with_ref_name
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def test_prepare_with_alignment_with_ref_name(self, test_file, test_seqs, existing_with_ref, existing_aln, ref_seq, out_file):
"""Test that, given a set of test sequences, an existing alignment, and a reference sequence name, no changes are made."""
aln_outfile, seqs_outfile, _ = align.prepare([test_file,], existing_with_ref, out_file, ref_seq.id, None)
assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
assert aln_outfile == existing_with_ref, "Rewrote the alignment file unexpectedly"
# Alignment file should be unchanged
aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence dropped from alignment"
for seq in existing_aln:
assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
# test sequences should be unchanged
assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
for seq in test_seqs:
assert seq in seq_output, "Some test sequences unexpectedly dropped"
assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
assert seq_output.keys() == test_seqs.keys()
示例6: test_prepare_with_alignment_with_ref_seq
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def test_prepare_with_alignment_with_ref_seq(self, test_file, test_seqs, existing_file, existing_aln, ref_seq, ref_file, out_file):
"""Test that, given a set of test sequences, an existing alignment, and a reference sequence, the reference
is added to the existing alignment and no other changes are made."""
aln_outfile, seqs_outfile, ref_name = align.prepare([test_file,], existing_file, out_file, None, ref_file)
assert ref_name == ref_seq.id, "Didn't return strain name from refrence file"
assert os.path.isfile(aln_outfile), "Didn't write existing alignment where it said"
assert aln_outfile != existing_aln, "Unexpectedly overwrote existing alignment"
# Alignment file should have the reference added
aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta"))
assert aln_output[ref_seq.id].seq == ref_seq.seq, "Reference sequence not added to alignment"
for seq in existing_aln:
assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly"
assert aln_output[seq].seq == existing_aln[seq].seq, "Some existing alignment sequences changed unexpectedly"
# test sequences should be unchanged
assert os.path.isfile(seqs_outfile), "Didn't write test sequences where it said"
seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta"))
for seq in test_seqs:
assert seq in seq_output, "Some test sequences unexpectedly dropped"
assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
assert seq_output.keys() == test_seqs.keys()
示例7: main
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def main():
"""Make a jazz noise here"""
args = get_args()
regex = re.compile(args.pattern)
out_fh = args.outfile or sys.stdout
checked, took = 0, 0
for fh in args.file:
in_format = args.format or guess_format(fh.name)
for rec in SeqIO.parse(fh, in_format):
checked += 1
out_fmt = args.out_format or args.format
if any(map(regex.search, [rec.id, rec.description])):
took += 1
SeqIO.write(rec, out_fh, args.out_format or in_format)
print(f'Done, checked {checked}, took {took}.', file=sys.stderr)
# --------------------------------------------------
示例8: base_complement
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def base_complement(k):
""" Return complement of base.
Performs the subsitutions: A<=>T, C<=>G, X=>X for both upper and lower
case. The return value is identical to the argument for all other values.
:param k: A base.
:returns: Complement of base.
:rtype: str
"""
try:
return comp[k]
except KeyError:
sys.stderr.write(
"WARNING: No reverse complement for {} found, returning argument.".format(k))
return k
示例9: _write_doc_template
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def _write_doc_template(schema):
s = """Write to {} format.
Parameters
----------
filename : str
File to write {} string to. If no filename is given, a {} string
will be returned.
sequence_col : str (default='sequence')
Sequence column name in DataFrame.
id_col : str (default='id')
ID column name in DataFrame
id_only : bool (default=False)
If True, use only the ID column to label sequences in fasta.
""".format(schema, schema, schema)
return s
示例10: depth_summary
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def depth_summary(all_genes,prefix):
"""Given the coding.depth file, calculate the average depth across the recovered sequence."""
depths = {g:0 for g in all_genes}
gene = "ZZZZ"
gene_depths = []
for line in open("coding.depth"):
line = line.split()
if line[0].split("-")[-1] == gene:
gene_depths.append(int(line[2]))
else:
if gene == "ZZZZ":
gene = line[0].split("-")[-1]
gene_depths = [int(line[2])]
else:
mean_depth = sum(gene_depths)/float(len(gene_depths))
depths[gene] = mean_depth
gene_depths = []
gene = line[0].split("-")[-1]
output_list = [str(depths[key]) for key in sorted(depths)]
sys.stdout.write("{}\t{}\n".format(prefix,"\t".join(output_list)))
示例11: extract_paralogs
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def extract_paralogs(gene,prefix):
putative_paralog_ids = list(set([x.split()[1].rstrip() for x in open(os.path.join(gene,prefix,"paralog_warning.txt"))]))
try:
chosen_paralog = open(os.path.join(gene,prefix,"exonerate_stats.csv")).readline().rstrip()
except IOError:
return 0
exonerate_dict = SeqIO.to_dict(SeqIO.parse(os.path.join(gene,prefix,"exonerate_results.fasta"),'fasta'))
if not os.path.isdir(os.path.join(gene,prefix,'paralogs')):
os.mkdir(os.path.join(gene,prefix,"paralogs"))
seqs_to_write = [exonerate_dict[x] for x in putative_paralog_ids]
for seq in range(len(seqs_to_write)):
if seqs_to_write[seq].id == chosen_paralog:
seqs_to_write[seq].id = "{}.{}".format(prefix,"main")
else:
seqs_to_write[seq].id = "{}.{}".format(prefix,seq)
SeqIO.write(seqs_to_write,os.path.join(gene,prefix,'paralogs','{}_paralogs.fasta'.format(gene)),'fasta')
return len(seqs_to_write)
示例12: concatenate_sequences
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def concatenate_sequences(gene_dict,fastafiles,unique_names):
'''Given a dictionary of dictionaries with complete sampling in each gene, write out concatenated sequences to stdout. Returns a list of partition lengths.'''
new_seq_dict = {}
partition_lengths = []
for gene in fastafiles:
for name in unique_names:
try:
new_seq_dict[name] += gene_dict[gene][name]
except KeyError:
new_seq_dict[name] = gene_dict[gene][name]
partition_lengths.append(len(next(iter(gene_dict[gene].values()))))
for final_seq in new_seq_dict:
SeqIO.write(new_seq_dict[final_seq],sys.stdout,'fasta')
final_seq_length = len(new_seq_dict[final_seq])
sys.stderr.write("Final conatenated sequence length: {}\n".format(final_seq_length))
return partition_lengths
示例13: raxml_partition
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def raxml_partition(fastafiles,partition_lengths,partition_type):
'''Generate a raxml partition file for the given fastafiles. User specifies the partition type'''
gene_start = 1
partition_file = open("partition.raxml",'w')
if partition_type == 'CODON':
for g in range(len(fastafiles)):
codon3_start = gene_start + 2
codon3_end = gene_start + partition_lengths[g] - 1
codon1_end = codon3_end - 2
codon2_start = gene_start + 1
codon2_end = codon3_end - 1
partition_file.write("{},{}{}={}-{}\\3,{}-{}\\3\n".format("DNA",fastafiles[g],"12",gene_start,codon1_end,codon2_start,codon2_end))
partition_file.write("{},{}{}={}-{}\\3\n".format("DNA",fastafiles[g],"3",codon3_start,codon3_end))
gene_start = codon3_end + 1
else:
for g in range(len(fastafiles)):
gene_end = gene_start + partition_lengths[g] - 1
partition_file.write("{},{}={}-{}\n".format(partition_type,fastafiles[g],gene_start,gene_end))
gene_start = gene_end + 1
partition_file.close()
示例14: supercontig_exonerate
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def supercontig_exonerate(supercontig,protseq,prefix,thresh=55):
"""Given a long, joined contig and a protein sequence, return the exonerate hit(s)"""
logger = logging.getLogger("pipeline")
exonerate_ryo = '>%ti,%qi,%qab,%qae,%pi,(%tS)\\n%tcs\\n'
temp_prot_filename = "%s/temp.prot.fa"%prefix
temp_contig_filename = "%s/temp.contig.fa"%prefix
SeqIO.write(protseq,temp_prot_filename,'fasta')
SeqIO.write(supercontig,temp_contig_filename,'fasta')
logger.debug("Conducting exonerate search on supercontig")
proc = subprocess.Popen(['exonerate','-m','protein2genome','--showalignment','no','-V','0','--showvulgar','no','--ryo',exonerate_ryo,temp_prot_filename,temp_contig_filename],stdout=subprocess.PIPE,universal_newlines=True)
proc.wait()
#print proc.stdout.readlines()
supercontig_cds = [i for i in SeqIO.parse(proc.stdout,'fasta') if float(i.id.split(",")[4])>thresh]
logger.debug("Supercontig lengths: %s" % " ".join([str(len(x.seq)) for x in supercontig_cds]))
return supercontig_cds
示例15: make_intron_supercontig
# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import write [as 别名]
def make_intron_supercontig(contig_info,gene,prefix,add_N = False):
cap3contigs = SeqIO.to_dict(SeqIO.parse("../{}_contigs.fasta".format(gene),'fasta'))
intron_supercontig = SeqRecord(Seq(''))
for i in contig_info:
if i[5] == "(+)":
intron_supercontig += cap3contigs[i[0]]
elif i[5] == "(-)":
intron_supercontig += cap3contigs[i[0]].reverse_complement()
else:
sys.stderr.write("Strandedness not found!")
sys.exit(1)
if add_N and i != contig_info[-1]:
intron_supercontig += "NNNNNNNNNN"
intron_supercontig.id = '{}-{}'.format(prefix,gene)
intron_supercontig.description = ''
SeqIO.write(intron_supercontig,'sequences/intron/{}_supercontig.fasta'.format(gene),'fasta')