本文整理汇总了Python中Bio.SeqRecord.SeqRecord方法的典型用法代码示例。如果您正苦于以下问题:Python SeqRecord.SeqRecord方法的具体用法?Python SeqRecord.SeqRecord怎么用?Python SeqRecord.SeqRecord使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SeqRecord
的用法示例。
在下文中一共展示了SeqRecord.SeqRecord方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mut_sequence
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def test_mut_sequence():
random.seed(42)
np.random.seed(42)
err_mod = basic.BasicErrorModel()
read = SeqRecord(
Seq(str('AAAAA' * 25),
IUPAC.unambiguous_dna
),
id='read_1',
description='test read'
)
read.letter_annotations["phred_quality"] = [5] * 125
read.seq = err_mod.mut_sequence(read, 'forward')
assert str(read.seq[:10]) == 'AAAACAGAAA'
示例2: main
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def main(args):
for record in SeqIO.parse(args.infile, 'fasta'):
if args.discard:
if sum([1 for rx in args.discard if re.match(rx, record.id)]) > 0:
continue
subseqcounter = 0
printlog(args.debug, "DEBUG: convert to upper case", record.id)
sequence = str(record.seq).upper()
printlog(args.debug, "DEBUG: split seq by Ns", record.id)
subseqs = [ss for ss in re.split('[^ACGT]+', sequence) if len(ss) > args.minlength]
printlog(args.debug, "DEBUG: print subseqs", record.id)
for subseq in subseqs:
subseqcounter += 1
subid = '{:s}_chunk_{:d}'.format(record.id, subseqcounter)
subrecord = SeqRecord(Seq(subseq), subid, '', '')
SeqIO.write(subrecord, args.outfile, 'fasta')
示例3: cast_to_str
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def cast_to_str(obj):
"""Return a string representation of a Seq or SeqRecord.
Args:
obj (str, Seq, SeqRecord): Biopython Seq or SeqRecord
Returns:
str: String representation of the sequence
"""
if isinstance(obj, str):
return obj
if isinstance(obj, Seq):
return str(obj)
if isinstance(obj, SeqRecord):
return str(obj.seq)
else:
raise ValueError('Must provide a string, Seq, or SeqRecord object.')
示例4: cast_to_seq
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def cast_to_seq(obj, alphabet=IUPAC.extended_protein):
"""Return a Seq representation of a string or SeqRecord object.
Args:
obj (str, Seq, SeqRecord): Sequence string or Biopython SeqRecord object
alphabet: See Biopython SeqRecord docs
Returns:
Seq: Seq representation of the sequence
"""
if isinstance(obj, Seq):
return obj
if isinstance(obj, SeqRecord):
return obj.seq
if isinstance(obj, str):
obj = obj.upper()
return Seq(obj, alphabet)
else:
raise ValueError('Must provide a string, Seq, or SeqRecord object.')
示例5: seq
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def seq(self, s):
if not s:
self._seq = None
elif self.sequence_file:
raise ValueError('{}: unable to set sequence, sequence file is associated with this object'.format(self.id))
elif type(s) == str or type(s) == Seq:
self._seq = ssbio.protein.sequence.utils.cast_to_seq(obj=s)
# If a SeqRecord, copy all attributes
elif type(s) == SeqRecord:
self._seq = s.seq
if self.name == '<unknown name>':
self.name = s.name
if self.description == '<unknown description>':
self.description = s.description
if not self.dbxrefs:
self.dbxrefs = s.dbxrefs
if not self.features:
self.features = s.features
if not self.annotations:
self.annotations = s.annotations
if not self.letter_annotations:
self.letter_annotations = s.letter_annotations
示例6: test_prune_seqs_matching_alignment
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def test_prune_seqs_matching_alignment(self):
sequence = {
"seq1": SeqRecord(Seq("GTAC"), name="seq1"),
"seq2": SeqRecord(Seq("CGTT"), name="seq2"),
"seq3": SeqRecord(Seq("TAGC"), name="seq3"),
}
alignment = MultipleSeqAlignment(
[
SeqRecord(Seq("GTAC"), name="seq1"),
SeqRecord(Seq("TAGC"), name="seq3"),
]
)
result = align.prune_seqs_matching_alignment(sequence.values(), alignment)
assert [r.name for r in result] == ["seq2"]
for r in result:
assert r.seq == sequence[r.name].seq
示例7: fake_alignment
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def fake_alignment(T):
"""
Fake alignment to appease treetime when only using it for naming nodes...
This is lifted from refine.py and ideally could be imported
Parameters
-------
T : <class 'Bio.Phylo.BaseTree.Tree'>
Returns
-------
<class 'Bio.Align.MultipleSeqAlignment'>
"""
from Bio import SeqRecord, Seq, Align
seqs = []
for n in T.get_terminals():
seqs.append(SeqRecord.SeqRecord(seq=Seq.Seq('ACGT'), id=n.name, name=n.name, description=''))
aln = Align.MultipleSeqAlignment(seqs)
return aln
示例8: count_records
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def count_records(input_object, format='fasta'):
"""Count SeqRecord objects from a file in the specified format.
:param input_object: A file object or a file name.
:param format: Input format (fasta by default).
:returns: Number of records in input file.
:rtype: int
"""
handle = input_object
if type(handle) == str:
handle = open(handle, "rU")
counter = 0
for _ in SeqIO.parse(handle, format):
counter += 1
return counter
示例9: make_intron_supercontig
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def make_intron_supercontig(contig_info,gene,prefix,add_N = False):
cap3contigs = SeqIO.to_dict(SeqIO.parse("../{}_contigs.fasta".format(gene),'fasta'))
intron_supercontig = SeqRecord(Seq(''))
for i in contig_info:
if i[5] == "(+)":
intron_supercontig += cap3contigs[i[0]]
elif i[5] == "(-)":
intron_supercontig += cap3contigs[i[0]].reverse_complement()
else:
sys.stderr.write("Strandedness not found!")
sys.exit(1)
if add_N and i != contig_info[-1]:
intron_supercontig += "NNNNNNNNNN"
intron_supercontig.id = '{}-{}'.format(prefix,gene)
intron_supercontig.description = ''
SeqIO.write(intron_supercontig,'sequences/intron/{}_supercontig.fasta'.format(gene),'fasta')
示例10: remove_exons
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def remove_exons(gff_filename,supercontig_filename,mode="all"):
'''Given a supercontig and corresponding annotation, remove the exon sequences. In "intron" mode, only return sequences specifically annotated as introns'''
exon_starts = []
exon_ends = []
gff = open(gff_filename).readlines()
for line in gff:
line = line.rstrip().split("\t")
if len(line) > 2:
if line[2] == "exon":
exon_starts.append(int(line[3]))
exon_ends.append(int(line[4]))
supercontig = SeqIO.read(supercontig_filename,'fasta')
exonless_contig = SeqRecord(Seq(''),id=supercontig.id)
start = 0
for exon in range(len(exon_starts)):
exonless_contig += supercontig[start:exon_starts[exon]-1]
start = exon_ends[exon]
exonless_contig += supercontig[start:]
exonless_contig.description = ''
return exonless_contig
示例11: transeq
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def transeq(data):
dummy = int(data[1])
record = data[0]
if dummy == 0:
prot = (translate_frameshifted(record.seq[0:]))
prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand0plus"))
if dummy == 1:
prot = (translate_frameshifted(record.seq[1:])) # second frame
prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand1plus"))
if dummy == 2:
prot = (translate_frameshifted(record.seq[2:])) # third frame
prot_rec =(SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand2plus"))
if dummy == 3:
prot = (translate_frameshifted(reverse_complement(record.seq))) # negative first frame
prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand0minus"))
if dummy == 4:
prot = (translate_frameshifted(reverse_complement(record.seq[:len(record.seq) - 1]))) # negative second frame
prot_rec =(SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand1minus"))
if dummy == 5:
prot = (translate_frameshifted(reverse_complement(record.seq[:len(record.seq) - 2]))) # negative third frame
prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand2minus"))
return(prot_rec)
示例12: single_fasta
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def single_fasta(ref, wd_split):
"""
From a fasta file make single files with each sequence
"""
fasta_file = open(ref, 'r')
single_fasta_list = []
count = 0
dict_ref_name = {}
ref_rename = ref + ".rename.fasta"
with open(ref_rename, "w") as fh:
for record in SeqIO.parse(fasta_file, "fasta"):
count += 1
new_name = "seq" + str(count)
dict_ref_name[new_name] = record.id
new_rec = SeqRecord(record.seq, new_name, '', '')
fasta_name = wd_split + '/' + new_name + '.fasta'
single_fasta_list.append(fasta_name)
output_handle = open(fasta_name, "w")
SeqIO.write(new_rec, output_handle, "fasta")
SeqIO.write(new_rec, fh, "fasta")
output_handle.close()
return single_fasta_list, dict_ref_name, ref_rename
示例13: get_seq_record
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def get_seq_record(self):
"""
Gets a SeqRecord for this hit.
:return: A SeqRecord for this hit.
"""
return SeqRecord(Seq(self.get_genome_contig_hsp_seq()), id=self.get_amr_gene_id(),
description=(
'isolate: {}, contig: {}, contig_start: {}, contig_end: {}, database_gene_start: {},'
' database_gene_end: {}, hsp/length: {}/{}, pid: {:0.2f}%, plength: {:0.2f}%').format(
self.get_genome_id(),
self.get_genome_contig_id(),
self.get_genome_contig_start(),
self.get_genome_contig_end(),
self.get_amr_gene_start(),
self.get_amr_gene_end(),
self.get_hsp_length(),
self.get_amr_gene_length(),
self.get_pid(),
self.get_plength()))
示例14: generate_pan_genome_reference
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def generate_pan_genome_reference(G, output_dir, split_paralogs=False):
# need to treat paralogs differently?
centroids = set()
records = []
for node in G.nodes():
if not split_paralogs and G.nodes[node]['centroid'][0] in centroids:
continue
records.append(
SeqRecord(Seq(max(G.nodes[node]['dna'], key=lambda x: len(x)),
generic_dna),
id=G.nodes[node]['name'],
description=""))
for centroid in G.nodes[node]['centroid']:
centroids.add(centroid)
with open(output_dir + "pan_genome_reference.fa", 'w') as outfile:
SeqIO.write(records, outfile, "fasta")
return
示例15: translate_sequences
# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def translate_sequences(sequence_dic):
protein_list = []
for strain_id in sequence_dic:
sequence_record = sequence_dic[strain_id]
if (len(sequence_record.seq) % 3) != 0:
raise ValueError(
"Coding sequence not divisible by 3, is it complete?!")
protien_sequence = translate(str(sequence_record.seq))
if protien_sequence[-1] == "*":
protien_sequence = protien_sequence[0:-1]
if "*" in protien_sequence:
print(sequence_record)
print(protien_sequence)
# raise ValueError("Premature stop codon in a gene!")
protein_record = SeqRecord(Seq(protien_sequence),
id=strain_id,
description=strain_id)
protein_list.append(protein_record)
return protein_list