当前位置: 首页>>代码示例>>Python>>正文


Python SeqRecord.SeqRecord方法代码示例

本文整理汇总了Python中Bio.SeqRecord.SeqRecord方法的典型用法代码示例。如果您正苦于以下问题:Python SeqRecord.SeqRecord方法的具体用法?Python SeqRecord.SeqRecord怎么用?Python SeqRecord.SeqRecord使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SeqRecord的用法示例。


在下文中一共展示了SeqRecord.SeqRecord方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_mut_sequence

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def test_mut_sequence():
    random.seed(42)
    np.random.seed(42)

    err_mod = basic.BasicErrorModel()

    read = SeqRecord(
        Seq(str('AAAAA' * 25),
            IUPAC.unambiguous_dna
            ),
        id='read_1',
        description='test read'
    )
    read.letter_annotations["phred_quality"] = [5] * 125
    read.seq = err_mod.mut_sequence(read, 'forward')
    assert str(read.seq[:10]) == 'AAAACAGAAA' 
开发者ID:HadrienG,项目名称:InSilicoSeq,代码行数:18,代码来源:test_error_model.py

示例2: main

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def main(args):
    for record in SeqIO.parse(args.infile, 'fasta'):
        if args.discard:
            if sum([1 for rx in args.discard if re.match(rx, record.id)]) > 0:
                continue

        subseqcounter = 0
        printlog(args.debug, "DEBUG: convert to upper case", record.id)
        sequence = str(record.seq).upper()
        printlog(args.debug, "DEBUG: split seq by Ns", record.id)
        subseqs = [ss for ss in re.split('[^ACGT]+', sequence) if len(ss) > args.minlength]
        printlog(args.debug, "DEBUG: print subseqs", record.id)
        for subseq in subseqs:
            subseqcounter += 1
            subid = '{:s}_chunk_{:d}'.format(record.id, subseqcounter)
            subrecord = SeqRecord(Seq(subseq), subid, '', '')
            SeqIO.write(subrecord, args.outfile, 'fasta') 
开发者ID:kevlar-dev,项目名称:kevlar,代码行数:19,代码来源:prep-genome.py

示例3: cast_to_str

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def cast_to_str(obj):
    """Return a string representation of a Seq or SeqRecord.

    Args:
        obj (str, Seq, SeqRecord): Biopython Seq or SeqRecord

    Returns:
        str: String representation of the sequence

    """

    if isinstance(obj, str):
        return obj
    if isinstance(obj, Seq):
        return str(obj)
    if isinstance(obj, SeqRecord):
        return str(obj.seq)
    else:
        raise ValueError('Must provide a string, Seq, or SeqRecord object.') 
开发者ID:SBRG,项目名称:ssbio,代码行数:21,代码来源:utils.py

示例4: cast_to_seq

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def cast_to_seq(obj, alphabet=IUPAC.extended_protein):
    """Return a Seq representation of a string or SeqRecord object.

    Args:
        obj (str, Seq, SeqRecord): Sequence string or Biopython SeqRecord object
        alphabet: See Biopython SeqRecord docs

    Returns:
        Seq: Seq representation of the sequence

    """

    if isinstance(obj, Seq):
        return obj
    if isinstance(obj, SeqRecord):
        return obj.seq
    if isinstance(obj, str):
        obj = obj.upper()
        return Seq(obj, alphabet)
    else:
        raise ValueError('Must provide a string, Seq, or SeqRecord object.') 
开发者ID:SBRG,项目名称:ssbio,代码行数:23,代码来源:utils.py

示例5: seq

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def seq(self, s):
        if not s:
            self._seq = None

        elif self.sequence_file:
            raise ValueError('{}: unable to set sequence, sequence file is associated with this object'.format(self.id))

        elif type(s) == str or type(s) == Seq:
            self._seq = ssbio.protein.sequence.utils.cast_to_seq(obj=s)

        # If a SeqRecord, copy all attributes
        elif type(s) == SeqRecord:
            self._seq = s.seq
            if self.name == '<unknown name>':
                self.name = s.name
            if self.description == '<unknown description>':
                self.description = s.description
            if not self.dbxrefs:
                self.dbxrefs = s.dbxrefs
            if not self.features:
                self.features = s.features
            if not self.annotations:
                self.annotations = s.annotations
            if not self.letter_annotations:
                self.letter_annotations = s.letter_annotations 
开发者ID:SBRG,项目名称:ssbio,代码行数:27,代码来源:seqprop.py

示例6: test_prune_seqs_matching_alignment

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def test_prune_seqs_matching_alignment(self):
        sequence = {
            "seq1": SeqRecord(Seq("GTAC"), name="seq1"),
            "seq2": SeqRecord(Seq("CGTT"), name="seq2"),
            "seq3": SeqRecord(Seq("TAGC"), name="seq3"),
        }
        alignment = MultipleSeqAlignment(
            [
                SeqRecord(Seq("GTAC"), name="seq1"),
                SeqRecord(Seq("TAGC"), name="seq3"),
            ]
        )
        
        result = align.prune_seqs_matching_alignment(sequence.values(), alignment)
        assert [r.name for r in result] == ["seq2"]
        for r in result:
            assert r.seq == sequence[r.name].seq 
开发者ID:nextstrain,项目名称:augur,代码行数:19,代码来源:test_align.py

示例7: fake_alignment

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def fake_alignment(T):
    """
    Fake alignment to appease treetime when only using it for naming nodes...
    This is lifted from refine.py and ideally could be imported

    Parameters
    -------
    T : <class 'Bio.Phylo.BaseTree.Tree'>

    Returns
    -------
    <class 'Bio.Align.MultipleSeqAlignment'>
    """
    from Bio import SeqRecord, Seq, Align
    seqs = []
    for n in T.get_terminals():
        seqs.append(SeqRecord.SeqRecord(seq=Seq.Seq('ACGT'), id=n.name, name=n.name, description=''))
    aln = Align.MultipleSeqAlignment(seqs)
    return aln 
开发者ID:nextstrain,项目名称:augur,代码行数:21,代码来源:import_beast.py

示例8: count_records

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def count_records(input_object, format='fasta'):
    """Count SeqRecord objects from a file in the specified format.

    :param input_object: A file object or a file name.
    :param format: Input format (fasta by default).
    :returns: Number of records in input file.
    :rtype: int

    """
    handle = input_object
    if type(handle) == str:
        handle = open(handle, "rU")
    counter = 0
    for _ in SeqIO.parse(handle, format):
        counter += 1
    return counter 
开发者ID:nanoporetech,项目名称:wub,代码行数:18,代码来源:seq.py

示例9: make_intron_supercontig

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def make_intron_supercontig(contig_info,gene,prefix,add_N = False):
    cap3contigs = SeqIO.to_dict(SeqIO.parse("../{}_contigs.fasta".format(gene),'fasta'))
    intron_supercontig = SeqRecord(Seq(''))
    for i in contig_info:
        if i[5] == "(+)":
            intron_supercontig += cap3contigs[i[0]]
        elif i[5] == "(-)":
            intron_supercontig += cap3contigs[i[0]].reverse_complement()    
        else:
            sys.stderr.write("Strandedness not found!")
            sys.exit(1)
        if add_N and i != contig_info[-1]:
            intron_supercontig += "NNNNNNNNNN"    
    intron_supercontig.id = '{}-{}'.format(prefix,gene)
    intron_supercontig.description = ''
    SeqIO.write(intron_supercontig,'sequences/intron/{}_supercontig.fasta'.format(gene),'fasta') 
开发者ID:mossmatters,项目名称:HybPiper,代码行数:18,代码来源:intronerate.py

示例10: remove_exons

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def remove_exons(gff_filename,supercontig_filename,mode="all"):
    '''Given a supercontig and corresponding annotation, remove the exon sequences. In "intron" mode, only return sequences specifically annotated as introns'''
    exon_starts = []
    exon_ends = []
    gff = open(gff_filename).readlines()
    for line in gff:
        line = line.rstrip().split("\t")
        if len(line) > 2:
            if line[2] == "exon":
                exon_starts.append(int(line[3]))
                exon_ends.append(int(line[4]))
    supercontig = SeqIO.read(supercontig_filename,'fasta')
    exonless_contig = SeqRecord(Seq(''),id=supercontig.id)
    start = 0
    for exon in range(len(exon_starts)):
        exonless_contig += supercontig[start:exon_starts[exon]-1] 
        start = exon_ends[exon]
    exonless_contig += supercontig[start:]    
    exonless_contig.description = ''
    return exonless_contig 
开发者ID:mossmatters,项目名称:HybPiper,代码行数:22,代码来源:intronerate.py

示例11: transeq

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def transeq(data):
    dummy = int(data[1])
    record = data[0]
    if dummy == 0:
        prot = (translate_frameshifted(record.seq[0:]))
        prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand0plus"))
    if dummy == 1:
        prot = (translate_frameshifted(record.seq[1:]))  # second frame
        prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand1plus"))
    if dummy == 2:
        prot = (translate_frameshifted(record.seq[2:]))  # third frame
        prot_rec =(SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand2plus"))
    if dummy == 3:
        prot = (translate_frameshifted(reverse_complement(record.seq)))  # negative first frame
        prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand0minus"))
    if dummy == 4:
        prot = (translate_frameshifted(reverse_complement(record.seq[:len(record.seq) - 1])))  # negative second frame
        prot_rec =(SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand1minus"))
    if dummy == 5:
        prot = (translate_frameshifted(reverse_complement(record.seq[:len(record.seq) - 2])))  # negative third frame
        prot_rec = (SeqRecord(Seq(prot, IUPAC.protein), id=record.id + "_strand2minus"))
    return(prot_rec) 
开发者ID:lfaino,项目名称:LoReAn,代码行数:24,代码来源:proteinAlign.py

示例12: single_fasta

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def single_fasta(ref, wd_split):
    """
    From a fasta file make single files with each sequence
    """

    fasta_file = open(ref, 'r')
    single_fasta_list = []
    count = 0
    dict_ref_name = {}
    ref_rename = ref + ".rename.fasta"
    with open(ref_rename, "w") as fh:
        for record in SeqIO.parse(fasta_file, "fasta"):
            count += 1
            new_name = "seq" + str(count)
            dict_ref_name[new_name] = record.id
            new_rec = SeqRecord(record.seq, new_name, '', '')
            fasta_name = wd_split + '/' + new_name + '.fasta'
            single_fasta_list.append(fasta_name)
            output_handle = open(fasta_name, "w")
            SeqIO.write(new_rec, output_handle, "fasta")
            SeqIO.write(new_rec, fh, "fasta")
            output_handle.close()
    return single_fasta_list, dict_ref_name, ref_rename 
开发者ID:lfaino,项目名称:LoReAn,代码行数:25,代码来源:multithreadLargeFasta.py

示例13: get_seq_record

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def get_seq_record(self):
        """
        Gets a SeqRecord for this hit.
        :return: A SeqRecord for this hit.
        """
        return SeqRecord(Seq(self.get_genome_contig_hsp_seq()), id=self.get_amr_gene_id(),
                         description=(
                             'isolate: {}, contig: {}, contig_start: {}, contig_end: {}, database_gene_start: {},'
                             ' database_gene_end: {}, hsp/length: {}/{}, pid: {:0.2f}%, plength: {:0.2f}%').format(
                             self.get_genome_id(),
                             self.get_genome_contig_id(),
                             self.get_genome_contig_start(),
                             self.get_genome_contig_end(),
                             self.get_amr_gene_start(),
                             self.get_amr_gene_end(),
                             self.get_hsp_length(),
                             self.get_amr_gene_length(),
                             self.get_pid(),
                             self.get_plength())) 
开发者ID:phac-nml,项目名称:staramr,代码行数:21,代码来源:AMRHitHSP.py

示例14: generate_pan_genome_reference

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def generate_pan_genome_reference(G, output_dir, split_paralogs=False):

    # need to treat paralogs differently?
    centroids = set()
    records = []

    for node in G.nodes():
        if not split_paralogs and G.nodes[node]['centroid'][0] in centroids:
            continue
        records.append(
            SeqRecord(Seq(max(G.nodes[node]['dna'], key=lambda x: len(x)),
                          generic_dna),
                      id=G.nodes[node]['name'],
                      description=""))
        for centroid in G.nodes[node]['centroid']:
            centroids.add(centroid)

    with open(output_dir + "pan_genome_reference.fa", 'w') as outfile:
        SeqIO.write(records, outfile, "fasta")

    return 
开发者ID:gtonkinhill,项目名称:panaroo,代码行数:23,代码来源:generate_output.py

示例15: translate_sequences

# 需要导入模块: from Bio import SeqRecord [as 别名]
# 或者: from Bio.SeqRecord import SeqRecord [as 别名]
def translate_sequences(sequence_dic):
    protein_list = []
    for strain_id in sequence_dic:
        sequence_record = sequence_dic[strain_id]
        if (len(sequence_record.seq) % 3) != 0:
            raise ValueError(
                "Coding sequence not divisible by 3, is it complete?!")
        protien_sequence = translate(str(sequence_record.seq))
        if protien_sequence[-1] == "*":
            protien_sequence = protien_sequence[0:-1]
        if "*" in protien_sequence:
            print(sequence_record)
            print(protien_sequence)
            # raise ValueError("Premature stop codon in a gene!")
        protein_record = SeqRecord(Seq(protien_sequence),
                                   id=strain_id,
                                   description=strain_id)
        protein_list.append(protein_record)
    return protein_list 
开发者ID:gtonkinhill,项目名称:panaroo,代码行数:21,代码来源:prokka.py


注:本文中的Bio.SeqRecord.SeqRecord方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。