当前位置: 首页>>代码示例>>Python>>正文


Python SeqFeature.SeqFeature类代码示例

本文整理汇总了Python中Bio.SeqFeature.SeqFeature的典型用法代码示例。如果您正苦于以下问题:Python SeqFeature类的具体用法?Python SeqFeature怎么用?Python SeqFeature使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SeqFeature类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: t_write_from_recs

 def t_write_from_recs(self):
     """Write out GFF3 from SeqRecord inputs.
     """
     seq = Seq("GATCGATCGATCGATCGATC")
     rec = SeqRecord(seq, "ID1")
     qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"],
                   "ID": "gene1"}
     sub_qualifiers = {"source": "prediction"}
     top_feature = SeqFeature(FeatureLocation(0, 20), type="gene", strand=1,
                                                       qualifiers=qualifiers)
     top_feature.sub_features = [SeqFeature(FeatureLocation(0, 5), type="exon", strand=1,
                                            qualifiers=sub_qualifiers),
                                 SeqFeature(FeatureLocation(15, 20), type="exon", strand=1,
                                            qualifiers=sub_qualifiers)]
     rec.features = [top_feature]
     out_handle = StringIO.StringIO()
     GFF.write([rec], out_handle)
     wrote_info = out_handle.getvalue().split("\n")
     assert wrote_info[0] == "##gff-version 3"
     assert wrote_info[1] == "##sequence-region ID1 1 20"
     assert wrote_info[2].split("\t") == ['ID1', 'prediction', 'gene', '1',
                                          '20', '10.0', '+', '.',
                                          'other=Some,annotations;ID=gene1']
     assert wrote_info[3].split("\t") == ['ID1', 'prediction', 'exon', '1', '5',
                                          '.', '+', '.', 'Parent=gene1']
开发者ID:JCVI-Cloud,项目名称:VICVB,代码行数:25,代码来源:test_GFFSeqIOFeatureAdder.py

示例2: mergeRecords

def mergeRecords(file): #adapted from SeqHandler by NF Alikhan (github.com/happykhan/seqhandler)

#SeqHandler is a script for merging, converting and splitting sequence files (Genbank, EMBL, fasta and others). Please use it to merge multi-Genbank files before running bwast.py

    filetype = determineFileType(file) #determine file type
    readInMultifasta = open(file, "r")
    records = list(SeqIO.parse(readInMultifasta, filetype))

    mergingFile = records[0]
    from Bio.SeqFeature import SeqFeature, FeatureLocation
    contigs = SeqFeature(FeatureLocation(0, len(mergingFile) ), type="fasta_record",\
                strand=1)
    contigs.qualifiers["note"] = records[0].name #pull out contig number of first contig
    mergingFile.features.append(contigs) #append first contig to mergingFile
    for nextRecord in records[1:]:
        contigs = SeqFeature(FeatureLocation(len(mergingFile), len(mergingFile) + len(nextRecord)), type="fasta_record",\
                strand=1)
        contigs.qualifiers["note"] = nextRecord.name 
        mergingFile.features.append(contigs) #append subsequent contigs to mergingFile
        mergingFile += nextRecord
    mergingFile.name = records[0].name
    mergingFile.description = records[0].description
    mergingFile.annotations = records[0].annotations

    for feature in mergingFile.features:
        if feature.type == 'source':
            mergingFile.features.remove(feature)
    contigs = SeqFeature(FeatureLocation(0, len(mergingFile)), type="source", strand=1)
    mergingFile.features.insert(0,contigs)
    merged_file = re.sub(r"\.\w+$", r".merged.fa", file)
    out_handle = open(merged_file, "w")
    SeqIO.write(mergingFile, out_handle, filetype)
    return merged_file
开发者ID:bawee,项目名称:bwast,代码行数:33,代码来源:bwast.py

示例3: mergeMod

def mergeMod(args):

    filetype = args.inFormat
    # Load file as SeqRecord
    int_handle = open(args.input, "r")
    recs = list(SeqIO.parse(int_handle, filetype))
    # For each SeqRecord, I.e. complete gbk annotation obj in file
    fgbk = recs[0]
    from Bio.SeqFeature import SeqFeature, FeatureLocation

    d = SeqFeature(FeatureLocation(0, len(fgbk)), type="fasta_record", strand=1)
    d.qualifiers["note"] = recs[0].name
    fgbk.features.append(d)
    for l in recs[1:]:
        d = SeqFeature(FeatureLocation(len(fgbk), len(fgbk) + len(l)), type="fasta_record", strand=1)
        d.qualifiers["note"] = l.name
        fgbk.features.append(d)
        fgbk += l
    fgbk.name = recs[0].name
    fgbk.description = recs[0].description
    fgbk.annotations = recs[0].annotations
    if args.accession != None:
        fgbk.name = args.accession
    if args.ver != None:
        fgbk.id = fgbk.name + "." + args.ver
    for f in fgbk.features:
        if f.type == "source":
            fgbk.features.remove(f)
    d = SeqFeature(FeatureLocation(0, len(fgbk)), type="source", strand=1)
    fgbk.features.insert(0, d)
    outtype = filetype
    if args.outFormat != None:
        outtype = args.outFormat
    out_handle = open(args.output, "w")
    SeqIO.write(fgbk, out_handle, outtype)
开发者ID:happykhan,项目名称:seqhandler,代码行数:35,代码来源:SeqHandler.py

示例4: _findGeneInSeq

    def _findGeneInSeq(self, record):
        """Extract gene sequence from larger sequence (e.g. genomes)
by searching features."""
        if not record.features:
            # if there aren't any features, just return the record
            return record
        for feature in record.features:
            feature_names = []
            if 'gene' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['gene'])
            if 'gene_synonym' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['gene_synonym'])
            if 'product' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['product'])
            gene_names = [e.lower() for e in self.gene_names]
            feature_names = [e.lower() for e in feature_names]
            if set(gene_names) & set(feature_names):
                try:
                    extractor = SeqFeature(feature.location)
                    found_seq = extractor.extract(record)
                except ValueError:
                    # catch value errors raised for sequences
                    #  with "fuzzy" positions
                    # TODO: what are fuzzy positions and can I use
                    #  them?
                    return record
                else:
                    return found_seq
        return record
开发者ID:DomBennett,项目名称:pG-lt,代码行数:29,代码来源:download_tools.py

示例5: make_protein_feature

def make_protein_feature(feature_name, feature_start, feature_end, feature_type):
    ''' Returns sequence feature, using start, end, name and type as input
    '''
    feature = SeqFeature(FeatureLocation(int(feature_start), int(feature_end)), type=feature_type)
    if feature_type == "Region":
        feature.qualifiers = {'name': [feature_name]}
    return feature
开发者ID:yklsorok,项目名称:prot_evol_toolkit,代码行数:7,代码来源:add_features.py

示例6: seqBlastToFeatures

 def seqBlastToFeatures(self, blastDB, blastExe, seqFile, blastType = "blastn",scoreMin = 1e-3, logFile = None):
     '''
     Blast sequence file against blast database 
     parse files into records.
     This function may not work as well on very large blast comparisons because 
     it does a full read of the result for the conversion to features.
     '''
     print ">blast %s %s %s %s" % (self.blastDB, self.blastExe, seqFile, blastType) 
     blastRecords = self.seqBlast(seqFile, blastType = "blastn", scoreMin = 1e-3, logFile = None)
     
     result = []
     index = 0
     for r in blastRecords:
         recordFeatures = []
         for alignment in r.alignments:
             name = alignment.title
             query = r.query
             for hsp in alignment.hsps:
                 if hsp.expect < scoreMin:
                     (ts,ss) = hsp.frame
                     strand = ss
                     start = hsp.sbjct_start
                     end = hsp.sbjct_end
                     location = FeatureLocation(start,end)
                     feature = SeqFeature(id=query,location=location,strand=strand)
                     aMatch = hsp.query + "\n" + hsp.match + "\n" + hsp.sbjct
                     feature.qualifiers["query"] = hsp.query
                     feature.qualifiers["subject"] = hsp.sbjct
                     feature.qualifiers["alignment"] = aMatch
                     recordFeatures.append(feature)
         result.append(recordFeatures)
         index = index + 1
         
     return result
开发者ID:bionomicron,项目名称:Redirector,代码行数:34,代码来源:SequenceTools.py

示例7: test_translation_checks_cds

 def test_translation_checks_cds(self):
     """Test that a CDS feature is subject to respective checks."""
     seq = Seq.Seq("GGTTACACTTACCGATAATGTCTCTGATGA", generic_dna)
     f = SeqFeature(FeatureLocation(0, 30), type="CDS")
     f.qualifiers['transl_table'] = [11]
     with self.assertRaises(TranslationError):
         f.translate(seq)
开发者ID:HuttonICS,项目名称:biopython,代码行数:7,代码来源:test_SeqFeature.py

示例8: to_seqfeature

 def to_seqfeature(self):
     """Create a SeqFeature from the ProteinDomain Object."""
     feat = SeqFeature(location=FeatureLocation(self.start, self.end),
                       id=self.value)
     if hasattr(self, 'confidence'):
         feat.qualifiers['confidence'] = self.confidence
     return feat
开发者ID:HuttonICS,项目名称:biopython,代码行数:7,代码来源:PhyloXML.py

示例9: annotate_dna_reference

def annotate_dna_reference(ref, protein):
    '''Annotate DNA reference with the protein secondary structures'''
    from Bio.SeqFeature import SeqFeature, FeatureLocation

    annotation_table = parse_secondary_structure(protein)

    if protein == 'gagpol':
        start_protein = ref.annotation['gag'].location.nofuzzy_start
    elif protein == 'vpu':
        # Uniprot starts one aa downstream of us
        start_protein = ref.annotation[protein].location.nofuzzy_start + 3
    else:
        start_protein = ref.annotation[protein].location.nofuzzy_start

    features = []
    for _, datum in annotation_table.iterrows():
        start_dna = datum['start'] * 3 + start_protein
        end_dna = datum['end'] * 3 + start_protein

        # Notice ribosomal slippage site
        if protein == 'gagpol':
            if start_dna >= slippage_site:
                start_dna -= 1
            if end_dna >= slippage_site:
                end_dna -= 1

        anno = SeqFeature(FeatureLocation(start_dna, end_dna), strand=+1)
        anno.type = datum['feature']
        features.append(anno)

    return features
开发者ID:iosonofabio,项目名称:HIV_fitness_landscape,代码行数:31,代码来源:secondary_structure_test.py

示例10: parse

	def parse(self):
		with open(self._file) as handle:
			genbank = SeqRecord(Seq.UnknownSeq(0))
			header_pattern = re.compile(r"ref\|(?P<id>.*?)\|:(?P<start>[0-9]+)-(?P<end>[0-9]+)\|(?P<description>.*?)\|\s*\[gene=(?P<gene>\S+)\]\s*\[locus_tag=(?P<locus_tag>\S+)\]\s*")
			first = True			
			for record in SeqIO.parse(handle, "fasta"):
				header = record.description
				match = header_pattern.match(header)
				if not match:
					self.errors.append("Invalid header: >" + header)
					continue
				
				if first:
					first = False
					genbank.id = match.group("id")
					genbank.name = match.group("id")
				
				feature = SeqFeature(FeatureLocation(int(match.group("start")), int(match.group("end"))), type = "gene")
				feature.qualifiers = {"locus_tag": match.group("locus_tag"),
							"gene": match.group("gene"),
							"note": match.group("description"),
							"sequence": record.seq}
				genbank.features.append(feature)
			
			return genbank
		return None
开发者ID:CyanoFactory,项目名称:CyanoFactoryKB,代码行数:26,代码来源:fasta_to_genbank.py

示例11: get_longest

def get_longest(seq_record, gene2isoforms):
	l = []
	c = 0;

	chrom = adjust_name(seq_record.name);	
	
	for gene, isoforms in gene2isoforms.iteritems():
		longest = max(isoforms, key = lambda i: sum([len(x) for x in i]))
		
		if(args.format == 'bed'):
			compound_to_bed(longest, chrom, gene)
			
		elif(args.format == 'fasta'):
			if(len(longest) > 1):
				location = CompoundLocation(longest, operator = "join")
			else:
				location = longest[0];
				
			feature = SeqFeature(location=location, type='utr', strand = longest[0].strand)
			
			#print longest[0].strand
			
			f = feature.extract(seq_record)
			f.name = gene
			f.id = gene
			f.description = gene
			l.append(f);
	return l;	
开发者ID:afilipch,项目名称:nrlbio,代码行数:28,代码来源:get_longest_3utr.py

示例12: test_deletion__overlapping_features

    def test_deletion__overlapping_features(self):
        # Example based on intersection of nei and arbB genes in MG1655.
        before_overlap = 'GCCCTGGCTGCCAGCA'
        overlap = 'CTAG'
        after_overlap = 'GCCGACCGCTTCGG'
        raw_seq_str = before_overlap + overlap + after_overlap
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)

        feature_1_loc = FeatureLocation(0,
                len(before_overlap) + len(overlap), strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
        seq_record.features.append(feature_1)

        feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
                strand=1)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
        seq_record.features.append(feature_2)

        maker = VCFToGenbankMaker(seq_record, None, None)
        maker._update_genome_record_for_variant(
                len(before_overlap), overlap, '')

        # Assert the sequence is correct.
        EXPECTED_SEQ = before_overlap + after_overlap
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))

        # Assert the feature annotations are still correct.
        EXPECTED_FEATURE_1_SEQ = before_overlap
        self.assertEqual(EXPECTED_FEATURE_1_SEQ,
                str(feature_1.extract(seq_record.seq)))

        EXPECTED_FEATURE_2_SEQ = after_overlap
        self.assertEqual(EXPECTED_FEATURE_2_SEQ,
                str(feature_2.extract(seq_record.seq)))
开发者ID:churchlab,项目名称:reference_genome_maker,代码行数:35,代码来源:vcf_to_genbank_test.py

示例13: create_gene_feature

def create_gene_feature(gene_name, feature_location, feature_qualifiers):
    """Creates a minimal SeqFeature to represent a gene.
    """
    gene_feature = SeqFeature(feature_location, type='gene')
    gene_feature.qualifiers = {'gene': [gene_name]}
    gene_feature.qualifiers = dict(gene_feature.qualifiers.items() +
            feature_qualifiers.items())
    return gene_feature
开发者ID:churchlab,项目名称:recoli_c321d_genome_annotation,代码行数:8,代码来源:annotate_lambda_prophage.py

示例14: makeSeqObjectsForTblastnNeighbors

def makeSeqObjectsForTblastnNeighbors(tblastn_id, clusterrunid, cur, N=200000):
    """
    Given a tBBLSATn ID and a dictionary from sanitized contig IDs (which is what will be
    present in the TBLASTN id) to non-sanitized IDs (which are what is in the database),
    returns a list of seq objects INCLUDING the TBLASTN hit itself (so that we can show that
    on the region drawing).

    We pick an N large enough to get at least one gene and then pick the closest one and get
    all of its neighbors with a call to makeSeqFeaturesForGeneNeighbors() and just tack the TBLASTN
    onto it.
    """
    # Lets first get the contig and start/stop locations (which tell us teh strand) out of
    # the TBLASTN id. This returns a ValueError if it fails which the calling function can catch if needed.
    sanitizedToNot = getSanitizedContigList(cur)

    contig, start, stop = splitTblastn(tblastn_id)
    if contig in sanitizedToNot:
        contig = sanitizedToNot[contig]

    start = int(start)
    stop = int(stop)

    # Create a seq object for the TBLASTN hit itself
    if start < stop:
        strand = +1
    else:
        strand = -1
    tblastn_feature = SeqFeature(FeatureLocation(start, stop), strand=strand, id=tblastn_id)
    tblastn_feature.qualifiers["cluster_id"] = -1

    # Find the neighboring genes.
    neighboring_genes = getGenesInRegion(contig, start - N, stop + N, cur)
    if len(neighboring_genes) == 0:
        sys.stderr.write(
            "WARNING: No neighboring genes found for TBLASTN hit %s within %d nucleotides in contig %s\n"
            % (tblastn_id, N, contig)
        )
        return [tblastn_feature]
    else:
        neighboring_geneinfo = getGeneInfo(neighboring_genes, cur)

    # Find the closest gene to ours and get the clusters for those neighbors based on the specific clusterrunid
    minlen = N
    mingene = None
    minstrand = None
    for geneinfo in neighboring_geneinfo:
        genestart = int(geneinfo[5])
        geneend = int(geneinfo[6])
        distance = min(abs(genestart - start), abs(geneend - start), abs(genestart - stop), abs(geneend - stop))
        if distance < minlen:
            mingene = geneinfo[0]
            minlen = distance

    neighboring_features = makeSeqFeaturesForGeneNeighbors(mingene, clusterrunid, cur)
    # Add the TBLASTN itself and return it.
    neighboring_features.append(tblastn_feature)
    return neighboring_features
开发者ID:BioPunk,项目名称:clusterDbAnalysis,代码行数:57,代码来源:db_makeNeighborhoodTree.py

示例15: get_gene_and_201bp_upstream

def get_gene_and_201bp_upstream(genefeature,genomeseq):
    mystart = genefeature.location.start
    myend = genefeature.location.end
    mystrand = genefeature.location.strand
    if mystrand == 1:
        newfeature = SeqFeature(FeatureLocation(mystart-201,myend),strand=mystrand)
    elif mystrand == -1:
        newfeature = SeqFeature(FeatureLocation(mystart,myend+201),strand=mystrand)
    return newfeature.extract(genomeseq)
开发者ID:rohanmaddamsetti,项目名称:STLE-analysis,代码行数:9,代码来源:align_replaced_mutations.py


注:本文中的Bio.SeqFeature.SeqFeature类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。