当前位置: 首页>>代码示例>>Python>>正文


Python Bio.Seq方法代码示例

本文整理汇总了Python中Bio.Seq方法的典型用法代码示例。如果您正苦于以下问题:Python Bio.Seq方法的具体用法?Python Bio.Seq怎么用?Python Bio.Seq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio的用法示例。


在下文中一共展示了Bio.Seq方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sanitise_sequence

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def sanitise_sequence(record: Record) -> Record:
    """ Ensures all sequences use N for gaps instead of -, and that all other
        characters are A, C, G, T, or N

        Arguments:
            records: the secmet.Records to alter

        Returns:
            the same Record instance as given
    """
    has_real_content = False
    sanitised = []
    for char in record.seq.upper():
        if char == "-":
            continue
        elif char in "ACGT":
            sanitised.append(char)
            has_real_content = True
        else:
            sanitised.append("N")
    record.seq = Seq("".join(sanitised), alphabet=record.seq.alphabet)
    if not has_real_content:
        record.skip = "contains no sequence"
    return record 
开发者ID:antismash,项目名称:antismash,代码行数:26,代码来源:record_processing.py

示例2: ref

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def ref(self, in_ref):
        """
        Parameters
        ----------
        in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord
            reference sequence will read and stored a byte array
        """
        read_from_file=False
        if in_ref and isfile(in_ref):
            for fmt in ['fasta', 'genbank']:
                try:
                    in_ref = SeqIO.read(in_ref, fmt)
                    self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1)
                    read_from_file=True
                    break
                except:
                    continue
            if not read_from_file:
                raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.')

        if in_ref:
            self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length)
            self.full_length = self._ref.shape[0]
            self.compressed_to_full_sequence_map = None
            self.multiplicity = None 
开发者ID:neherlab,项目名称:treetime,代码行数:27,代码来源:sequence_data.py

示例3: swissprot_seq

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def swissprot_seq(organism = 9606, isoforms = False):
    """
    Loads all sequences for an organism, optionally
    for all isoforms, by default only first isoform.
    """
    
    result = {}
    url = urls.urls['uniprot_basic']['url']
    post = {
        'query': 'organism:%s AND reviewed:yes' % str(organism),
        'format': 'tab',
        'columns': 'id,sequence'
    }
    c = curl.Curl(url, post = post, silent = False, timeout = 900)
    data = c.result
    data = data.split('\n')
    del data[0]
    
    for l in data:
        
        l = l.strip().split('\t')
        
        if len(l) == 2:
            result[l[0]] = Seq(l[0], l[1])
    
    if isoforms:
        
        data = get_isoforms(organism = organism)
        
        for unip, isoforms in iteritems(data):
            
            for isof, seq in iteritems(isoforms):
                
                if unip in result:
                    
                    result[unip].add_seq(seq, isof)
    
    return result 
开发者ID:saezlab,项目名称:pypath,代码行数:40,代码来源:seq.py

示例4: get_biopython

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def get_biopython(self, isoform = 1):
        
        isoform = int(isoform)
        
        if isoform not in self.isof:
            
            raise ValueError('No isoform %u available for protein `%s`.' % (
                isoform, self.protein))
        
        try:
            import Bio.Seq
            import Bio.SeqRecord
            
            srec = Bio.SeqRecord.SeqRecord(
                Bio.Seq.Seq(self.isof[isoform],
                            Bio.Alphabet.ProteinAlphabet()),
                id = self.protein
            )
            
            srec.annotations['isoform'] = isoform
            
            return srec
            
        except ImportError:
            sys.stdout.write('\t:: Module `Bio` (biopython)'\
                'could not be imported.\n')
            sys.stdout.flush() 
开发者ID:saezlab,项目名称:pypath,代码行数:29,代码来源:seq.py

示例5: is_nucl_seq

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def is_nucl_seq(sequence: Union[Seq, str]) -> bool:
    """ Determines if a sequence is a nucleotide sequence based on content.

        Arguments:
            sequence: the sequence to check, either a string or Bio.Seq

        Returns:
            True if more than 80% of characters are nucleotide bases
    """
    other = str(sequence).lower()
    for char in "acgtn":
        other = other.replace(char, "")
    return len(other) < 0.2 * len(sequence) 
开发者ID:antismash,项目名称:antismash,代码行数:15,代码来源:record_processing.py

示例6: write_out_informative_fasta

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def write_out_informative_fasta(compress_seq, alignment, stripFile=None):
    from Bio import SeqIO
    from Bio.SeqRecord import SeqRecord
    from Bio.Seq import Seq

    sequences = compress_seq['sequences']
    ref = compress_seq['reference']
    positions = compress_seq['positions']

    #If want to exclude sites from initial treebuild, read in here
    strip_pos = load_mask_sites(stripFile) if stripFile else []

    #Get sequence names
    seqNames = list(sequences.keys())

    #Check non-ref sites to see if informative
    printPositionMap = False    #If true, prints file mapping Fasta position to real position
    sites = []
    pos = []

    for key in positions:
        if key not in strip_pos:
            pattern = []
            for k in sequences.keys():
                #looping try/except is faster than list comprehension
                try:
                    pattern.append(sequences[k][key])
                except KeyError:
                    pattern.append(ref[key])
            origPattern = list(pattern)
            if '-' in pattern or 'N' in pattern:
                #remove gaps/Ns to see if otherwise informative
                pattern = [value for value in origPattern if value != '-' and value != 'N']
            un = np.unique(pattern, return_counts=True)
            #If not all - or N, not all same base, and >1 differing base, append
            if len(un[0])!=0 and len(un[0])!=1 and not (len(un[0])==2 and min(un[1])==1):
                sites.append(origPattern)
                pos.append("\t".join([str(len(pos)+1),str(key)]))

    #Rotate and convert to SeqRecord
    sites = np.asarray(sites)
    align = np.rot90(sites)
    seqNamesCorr = list(reversed(seqNames))
    toFasta = [ SeqRecord(id=seqNamesCorr[i], seq=Seq("".join(align[i])), description='') for i in range(len(sequences.keys()))]

    fasta_file = os.path.join(os.path.dirname(alignment), 'informative_sites.fasta')

    #now output this as fasta to read into raxml or iqtree
    SeqIO.write(toFasta, fasta_file, 'fasta')

    #If want a position map, print:
    if printPositionMap:
        with open(fasta_file+".positions.txt", 'w', encoding='utf-8') as the_file:
            the_file.write("\n".join(pos))

    return fasta_file 
开发者ID:nextstrain,项目名称:augur,代码行数:58,代码来源:tree.py

示例7: __init__

# 需要导入模块: import Bio [as 别名]
# 或者: from Bio import Seq [as 别名]
def __init__(self, aln, ref=None, logger=None, convert_upper=True,
                 sequence_length=None, compress=True, word_length=1, sequence_type=None,
                 fill_overhangs=True, seq_multiplicity=None, ambiguous=None, **kwargs):
        """construct an sequence data object

        Parameters
        ----------
        aln : Bio.Align.MultipleSeqAlignment, str
            alignment or file name
        ref : Seq, str
            sequence or file name
        logger : callable, optional
            logging function
        convert_upper : bool, optional
            convert all sequences to upper case, default true
        sequence_length : None, optional
            length of the sequence, only necessary when no alignment or ref is given
        compress : bool, optional
            compress identical alignment columns into one
        word_length : int
            length of state (typically 1 A,C,G,T, but could be 3 for codons)
        fill_overhangs : bool
            treat gaps at either end of sequence as missing data
        seq_multiplicity : dict
            store the multiplicity of sequence, for example read count in a deep sequencing experiment
        ambiguous : byte
            character signifying missing data
        **kwargs
            Description
        """
        self.logger = logger if logger else simple_logger
        self._aln = None
        self._ref = None
        self.likely_alphabet = None
        self.compressed_to_full_sequence_map = None
        self.multiplicity = None
        self.is_sparse = None
        self.convert_upper = convert_upper
        self.compress = compress
        self.seq_multiplicity = seq_multiplicity or {} # possibly a dict mapping sequences to their read cound/sample count
        self.additional_constant_sites = kwargs['additional_constant_sites'] if 'additional_constant_sites' in kwargs else 0

        # if not specified, this will be set as the alignment_length or reference length
        self._full_length = None
        self.full_length = sequence_length
        self._compressed_length = None
        self.word_length = word_length
        self.fill_overhangs = fill_overhangs
        self.ambiguous = ambiguous
        self.sequence_type = sequence_type

        self.ref = ref
        self.aln = aln 
开发者ID:neherlab,项目名称:treetime,代码行数:55,代码来源:sequence_data.py


注:本文中的Bio.Seq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。