本文整理汇总了Python中Bio.Seq方法的典型用法代码示例。如果您正苦于以下问题:Python Bio.Seq方法的具体用法?Python Bio.Seq怎么用?Python Bio.Seq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio
的用法示例。
在下文中一共展示了Bio.Seq方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sanitise_sequence
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def sanitise_sequence(record: Record) -> Record:
""" Ensures all sequences use N for gaps instead of -, and that all other
characters are A, C, G, T, or N
Arguments:
records: the secmet.Records to alter
Returns:
the same Record instance as given
"""
has_real_content = False
sanitised = []
for char in record.seq.upper():
if char == "-":
continue
elif char in "ACGT":
sanitised.append(char)
has_real_content = True
else:
sanitised.append("N")
record.seq = Seq("".join(sanitised), alphabet=record.seq.alphabet)
if not has_real_content:
record.skip = "contains no sequence"
return record
示例2: ref
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def ref(self, in_ref):
"""
Parameters
----------
in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord
reference sequence will read and stored a byte array
"""
read_from_file=False
if in_ref and isfile(in_ref):
for fmt in ['fasta', 'genbank']:
try:
in_ref = SeqIO.read(in_ref, fmt)
self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1)
read_from_file=True
break
except:
continue
if not read_from_file:
raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.')
if in_ref:
self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length)
self.full_length = self._ref.shape[0]
self.compressed_to_full_sequence_map = None
self.multiplicity = None
示例3: swissprot_seq
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def swissprot_seq(organism = 9606, isoforms = False):
"""
Loads all sequences for an organism, optionally
for all isoforms, by default only first isoform.
"""
result = {}
url = urls.urls['uniprot_basic']['url']
post = {
'query': 'organism:%s AND reviewed:yes' % str(organism),
'format': 'tab',
'columns': 'id,sequence'
}
c = curl.Curl(url, post = post, silent = False, timeout = 900)
data = c.result
data = data.split('\n')
del data[0]
for l in data:
l = l.strip().split('\t')
if len(l) == 2:
result[l[0]] = Seq(l[0], l[1])
if isoforms:
data = get_isoforms(organism = organism)
for unip, isoforms in iteritems(data):
for isof, seq in iteritems(isoforms):
if unip in result:
result[unip].add_seq(seq, isof)
return result
示例4: get_biopython
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def get_biopython(self, isoform = 1):
isoform = int(isoform)
if isoform not in self.isof:
raise ValueError('No isoform %u available for protein `%s`.' % (
isoform, self.protein))
try:
import Bio.Seq
import Bio.SeqRecord
srec = Bio.SeqRecord.SeqRecord(
Bio.Seq.Seq(self.isof[isoform],
Bio.Alphabet.ProteinAlphabet()),
id = self.protein
)
srec.annotations['isoform'] = isoform
return srec
except ImportError:
sys.stdout.write('\t:: Module `Bio` (biopython)'\
'could not be imported.\n')
sys.stdout.flush()
示例5: is_nucl_seq
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def is_nucl_seq(sequence: Union[Seq, str]) -> bool:
""" Determines if a sequence is a nucleotide sequence based on content.
Arguments:
sequence: the sequence to check, either a string or Bio.Seq
Returns:
True if more than 80% of characters are nucleotide bases
"""
other = str(sequence).lower()
for char in "acgtn":
other = other.replace(char, "")
return len(other) < 0.2 * len(sequence)
示例6: write_out_informative_fasta
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def write_out_informative_fasta(compress_seq, alignment, stripFile=None):
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
sequences = compress_seq['sequences']
ref = compress_seq['reference']
positions = compress_seq['positions']
#If want to exclude sites from initial treebuild, read in here
strip_pos = load_mask_sites(stripFile) if stripFile else []
#Get sequence names
seqNames = list(sequences.keys())
#Check non-ref sites to see if informative
printPositionMap = False #If true, prints file mapping Fasta position to real position
sites = []
pos = []
for key in positions:
if key not in strip_pos:
pattern = []
for k in sequences.keys():
#looping try/except is faster than list comprehension
try:
pattern.append(sequences[k][key])
except KeyError:
pattern.append(ref[key])
origPattern = list(pattern)
if '-' in pattern or 'N' in pattern:
#remove gaps/Ns to see if otherwise informative
pattern = [value for value in origPattern if value != '-' and value != 'N']
un = np.unique(pattern, return_counts=True)
#If not all - or N, not all same base, and >1 differing base, append
if len(un[0])!=0 and len(un[0])!=1 and not (len(un[0])==2 and min(un[1])==1):
sites.append(origPattern)
pos.append("\t".join([str(len(pos)+1),str(key)]))
#Rotate and convert to SeqRecord
sites = np.asarray(sites)
align = np.rot90(sites)
seqNamesCorr = list(reversed(seqNames))
toFasta = [ SeqRecord(id=seqNamesCorr[i], seq=Seq("".join(align[i])), description='') for i in range(len(sequences.keys()))]
fasta_file = os.path.join(os.path.dirname(alignment), 'informative_sites.fasta')
#now output this as fasta to read into raxml or iqtree
SeqIO.write(toFasta, fasta_file, 'fasta')
#If want a position map, print:
if printPositionMap:
with open(fasta_file+".positions.txt", 'w', encoding='utf-8') as the_file:
the_file.write("\n".join(pos))
return fasta_file
示例7: __init__
# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def __init__(self, aln, ref=None, logger=None, convert_upper=True,
sequence_length=None, compress=True, word_length=1, sequence_type=None,
fill_overhangs=True, seq_multiplicity=None, ambiguous=None, **kwargs):
"""construct an sequence data object
Parameters
----------
aln : Bio.Align.MultipleSeqAlignment, str
alignment or file name
ref : Seq, str
sequence or file name
logger : callable, optional
logging function
convert_upper : bool, optional
convert all sequences to upper case, default true
sequence_length : None, optional
length of the sequence, only necessary when no alignment or ref is given
compress : bool, optional
compress identical alignment columns into one
word_length : int
length of state (typically 1 A,C,G,T, but could be 3 for codons)
fill_overhangs : bool
treat gaps at either end of sequence as missing data
seq_multiplicity : dict
store the multiplicity of sequence, for example read count in a deep sequencing experiment
ambiguous : byte
character signifying missing data
**kwargs
Description
"""
self.logger = logger if logger else simple_logger
self._aln = None
self._ref = None
self.likely_alphabet = None
self.compressed_to_full_sequence_map = None
self.multiplicity = None
self.is_sparse = None
self.convert_upper = convert_upper
self.compress = compress
self.seq_multiplicity = seq_multiplicity or {} # possibly a dict mapping sequences to their read cound/sample count
self.additional_constant_sites = kwargs['additional_constant_sites'] if 'additional_constant_sites' in kwargs else 0
# if not specified, this will be set as the alignment_length or reference length
self._full_length = None
self.full_length = sequence_length
self._compressed_length = None
self.word_length = word_length
self.fill_overhangs = fill_overhangs
self.ambiguous = ambiguous
self.sequence_type = sequence_type
self.ref = ref
self.aln = aln