当前位置: 首页>>代码示例>>Python>>正文


Python SeqFeature.extract方法代码示例

本文整理汇总了Python中Bio.SeqFeature.SeqFeature.extract方法的典型用法代码示例。如果您正苦于以下问题:Python SeqFeature.extract方法的具体用法?Python SeqFeature.extract怎么用?Python SeqFeature.extract使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SeqFeature.SeqFeature的用法示例。


在下文中一共展示了SeqFeature.extract方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_deletion__overlapping_features

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
    def test_deletion__overlapping_features(self):
        # Example based on intersection of nei and arbB genes in MG1655.
        before_overlap = 'GCCCTGGCTGCCAGCA'
        overlap = 'CTAG'
        after_overlap = 'GCCGACCGCTTCGG'
        raw_seq_str = before_overlap + overlap + after_overlap
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)

        feature_1_loc = FeatureLocation(0,
                len(before_overlap) + len(overlap), strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
        seq_record.features.append(feature_1)

        feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
                strand=1)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
        seq_record.features.append(feature_2)

        maker = VCFToGenbankMaker(seq_record, None, None)
        maker._update_genome_record_for_variant(
                len(before_overlap), overlap, '')

        # Assert the sequence is correct.
        EXPECTED_SEQ = before_overlap + after_overlap
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))

        # Assert the feature annotations are still correct.
        EXPECTED_FEATURE_1_SEQ = before_overlap
        self.assertEqual(EXPECTED_FEATURE_1_SEQ,
                str(feature_1.extract(seq_record.seq)))

        EXPECTED_FEATURE_2_SEQ = after_overlap
        self.assertEqual(EXPECTED_FEATURE_2_SEQ,
                str(feature_2.extract(seq_record.seq)))
开发者ID:churchlab,项目名称:reference_genome_maker,代码行数:37,代码来源:vcf_to_genbank_test.py

示例2: test_update_genome_record_for_variant__overlapping_features

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
    def test_update_genome_record_for_variant__overlapping_features(self):
        """Tests handling a record that lands in a region of overlapping
        features.
        """
        # Example based on intersection of nei and arbB genes in MG1655.
        before_overlap = 'GCCCTGGCTGCCAGCA'
        overlap = 'CTAG'
        after_overlap = 'GCCGACCGCTTCGG'
        raw_seq_str = before_overlap + overlap + after_overlap
        seq = Seq(raw_seq_str, generic_dna)
        seq_record = SeqRecord(seq)

        feature_1_loc = FeatureLocation(0,
                len(before_overlap) + len(overlap), strand=1)
        feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
        seq_record.features.append(feature_1)

        feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
                strand=1)
        feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
        seq_record.features.append(feature_2)

        maker = VCFToGenbankMaker(seq_record, None, None)
        overlap_replacement = 'TTAA'
        maker._update_genome_record_for_variant(len(before_overlap), overlap,
                overlap_replacement)

        # Features changed, so requery them.
        feature_1 = None
        feature_2 = None
        for feature in seq_record.features:
            if feature.id == '1':
                feature_1 = feature
            elif feature.id == '2':
                feature_2 = feature
        assert feature_1
        assert feature_2

        # Assert the sequence is correct.
        EXPECTED_SEQ = before_overlap + overlap_replacement + after_overlap
        self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))

        # Feature added to represent swap.
        # self.assertEqual(3, len(seq_record.features))

        # Assert the feature annotations are still correct.
        EXPECTED_FEATURE_1_SEQ = before_overlap + overlap_replacement
        self.assertEqual(EXPECTED_FEATURE_1_SEQ,
                str(feature_1.extract(seq_record.seq)))

        EXPECTED_FEATURE_2_SEQ = overlap_replacement + after_overlap
        self.assertEqual(EXPECTED_FEATURE_2_SEQ,
                str(feature_2.extract(seq_record.seq)))
开发者ID:churchlab,项目名称:reference_genome_maker,代码行数:55,代码来源:vcf_to_genbank_test.py

示例3: get_longest

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def get_longest(seq_record, gene2isoforms):
	l = []
	c = 0;

	chrom = adjust_name(seq_record.name);	
	
	for gene, isoforms in gene2isoforms.iteritems():
		longest = max(isoforms, key = lambda i: sum([len(x) for x in i]))
		
		if(args.format == 'bed'):
			compound_to_bed(longest, chrom, gene)
			
		elif(args.format == 'fasta'):
			if(len(longest) > 1):
				location = CompoundLocation(longest, operator = "join")
			else:
				location = longest[0];
				
			feature = SeqFeature(location=location, type='utr', strand = longest[0].strand)
			
			#print longest[0].strand
			
			f = feature.extract(seq_record)
			f.name = gene
			f.id = gene
			f.description = gene
			l.append(f);
	return l;	
开发者ID:afilipch,项目名称:nrlbio,代码行数:30,代码来源:get_longest_3utr.py

示例4: _findGeneInSeq

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
    def _findGeneInSeq(self, record):
        """Extract gene sequence from larger sequence (e.g. genomes)
by searching features."""
        if not record.features:
            # if there aren't any features, just return the record
            return record
        for feature in record.features:
            feature_names = []
            if 'gene' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['gene'])
            if 'gene_synonym' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['gene_synonym'])
            if 'product' in feature.qualifiers.keys():
                feature_names.extend(feature.qualifiers['product'])
            gene_names = [e.lower() for e in self.gene_names]
            feature_names = [e.lower() for e in feature_names]
            if set(gene_names) & set(feature_names):
                try:
                    extractor = SeqFeature(feature.location)
                    found_seq = extractor.extract(record)
                except ValueError:
                    # catch value errors raised for sequences
                    #  with "fuzzy" positions
                    # TODO: what are fuzzy positions and can I use
                    #  them?
                    return record
                else:
                    return found_seq
        return record
开发者ID:DomBennett,项目名称:pG-lt,代码行数:31,代码来源:download_tools.py

示例5: get_gene_and_201bp_upstream

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def get_gene_and_201bp_upstream(genefeature,genomeseq):
    mystart = genefeature.location.start
    myend = genefeature.location.end
    mystrand = genefeature.location.strand
    if mystrand == 1:
        newfeature = SeqFeature(FeatureLocation(mystart-201,myend),strand=mystrand)
    elif mystrand == -1:
        newfeature = SeqFeature(FeatureLocation(mystart,myend+201),strand=mystrand)
    return newfeature.extract(genomeseq)
开发者ID:rohanmaddamsetti,项目名称:STLE-analysis,代码行数:11,代码来源:align_replaced_mutations.py

示例6: find_cds

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def find_cds ():
    seq_des = str(record_dict[keys].description).split("|")
    for i in seq_des:
        if re.match("CDS", i):
            feature, cds_start, cds_end = re.split(":|-", i)
    cds_feature = SeqFeature(FeatureLocation(int(cds_start)-1,int(cds_end)-1),
                type=str(feature))
    cds_sequence = cds_feature.extract(record_dict[keys].seq)
    print cds_sequence.translate()
    return cds_start, cds_end, cds_sequence
开发者ID:bluecerebudgerigar,项目名称:randomproteinsnp,代码行数:12,代码来源:randomproteinsnp12.py

示例7: retrieveCompositeSequence

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def retrieveCompositeSequence(seq_record,seqList) :
    # true seq 
    listePosition = list()
    for node in seqList :
        seq,coord = node.split(":")
        start,end = coord.split("..")
        listePosition.append(int(float(start)))
        listePosition.append(int(float(end)))

    start = min(listePosition)
    end = max(listePosition)
    f = SeqFeature(FeatureLocation(start,end))
    seq = f.extract(seq_record)
    seqId = seq_record.id+"|"+str(start)+"_"+str(end)
    return SeqRecord(seq=seq.seq,id=seqId,description="")
开发者ID:raphael-upmc,项目名称:network,代码行数:17,代码来源:componentDetection.py

示例8: retrievePartialSequence

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def retrievePartialSequence(seqList,seq2coord,gi2domain,gi2phylum,gi2species,seqId2seqRecord) :
    # true seq 
    seq_set = set()
    for node in seqList :
        seq,coord = node.split(":")
        seq_set.add(seq)
#    print len(seq_set)

    # get fasta
    seqListExtracted = list()


    for seqId in seq_set :
        if seqId in seqId2seqRecord :
#            print seq_record.id
            seq_record = seqId2seqRecord[seqId]
            for node in seqList :
#                print "\t"+node.split(":")[0]
                if seq_record.id == node.split(":")[0] :
                    # extracting subseq thanks to coordinates
#                    print "\t\t"+seq_record.id+"\t"+node+"\t"+str(seq2coord[node])
                    start = seq2coord[node][0]
                    end = seq2coord[node][1]
                    f = SeqFeature(FeatureLocation(int(start),int(end) ) )
                    seq = f.extract(seq_record)
 #                   print seq
                    gi = node.split(":")[0]
                    species = gi2species[gi]
                    phylum = gi2phylum[gi]
                    seqId = gi+"|"+phylum+"|"+species+"|"+node.split(":")[1].replace("..","_")
                    
                    seqRecord = SeqRecord(seq=seq.seq,id=seqId,description="")
                    seqListExtracted.append(seqRecord)
                    seqList.remove(node)
                    break
                else :
                    continue
        else :
            continue
    return seqListExtracted
开发者ID:raphael-upmc,项目名称:network,代码行数:42,代码来源:componentDetection.py

示例9: extract_sequences_one_sample

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def extract_sequences_one_sample(args):
  '''
    Function for extracting protein sequences given annotated regions and produce fasta files that can be used for clustering
  '''
  (fasta_path,annotation_path,sample, output_dir)=args
  domainInfo=load_annotation_pfam(annotation_path)
  print "Generating domain fasta sequences for "+sample+" ..."
  from Bio import SeqIO
  from Bio.SeqFeature import SeqFeature, FeatureLocation
  from Bio.SeqRecord import SeqRecord 
  (annot,start,stop,strand,evalue)=domainInfo
  record_dict=index_fasta(fasta_path)
  recordlist=[]
  outfilename=output_dir +'/forClustering/'+ sample +'.fasta'
  outhandle=open(outfilename,'w')
  for domainID in annot.keys():
      for i in range(len(annot[domainID])):
          domain=annot[domainID][i]
          try:
              seq=record_dict[domain]
          except KeyError:
             print "Error: " + domain + " not in fasta file.\n"
             break    
          a=start[domainID][i]
          b=stop[domainID][i]
          seq_strand=strand[domainID][i]
          seq_evalue=evalue[domainID][i]
          if seq_strand in '+':
             domain_feature = SeqFeature(FeatureLocation(a-1, b-1), type="domain", strand=1)
          elif seq_strand in '-':
               domain_feature = SeqFeature(FeatureLocation(a-1, b-1), type="domain", strand=-1)
          feature_seq = domain_feature.extract(seq)
          feature_seq.id=feature_seq.id+' '+domainID+' '+seq_evalue 
          recordlist.append(feature_seq)
  
  
  SeqIO.write(recordlist, outhandle, "fasta")
  outhandle.close()
  
  print "Done"
开发者ID:tobbeost,项目名称:hirbin,代码行数:42,代码来源:clusterBinsToSubbins.py

示例10: test_get_codon_feature_location__reverse_strand

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
    def test_get_codon_feature_location__reverse_strand(self):
        FEATURE_SEQ_RAW = reverse_complement('ATGTTTGGGTAG')
        SEQ = Seq('CCCCCC' + FEATURE_SEQ_RAW + 'AGTA', generic_dna)
        seq_record = SeqRecord(SEQ)
        FEATURE_1_ID = '1'
        FEATURE_1_LOC = FeatureLocation(6, 18)
        feature_1 = SeqFeature(FEATURE_1_LOC, type='CDS',
                id=FEATURE_1_ID, strand=-1)
        add_feature_to_seq_record(seq_record, feature_1)

        # Sanity check for the feature sequence.
        feature_seq = str(feature_1.extract(seq_record.seq))
        self.assertEqual('ATG', feature_seq[0:3])
        self.assertEqual('TTT', feature_seq[3:6])

        CODON_0_FEATURE_LOCATION = get_codon_feature_location(feature_1, 0)
        self.assertEqual(15, CODON_0_FEATURE_LOCATION.start)
        self.assertEqual(18, CODON_0_FEATURE_LOCATION.end)

        CODON_1_FEATURE_LOCATION = get_codon_feature_location(feature_1, 1)
        self.assertEqual(12, CODON_1_FEATURE_LOCATION.start)
        self.assertEqual(15, CODON_1_FEATURE_LOCATION.end)
开发者ID:churchlab,项目名称:recoli_c321d_genome_annotation,代码行数:24,代码来源:biopython_util_test.py

示例11: annotate_like_HXB2

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def annotate_like_HXB2(refname, VERBOSE=0):
    '''Annotate copying from HXB2'''
    hxb2 = load_custom_reference('HXB2', 'gb')
    ref = load_custom_reference(refname, 'fasta')
    refs = str(ref.seq)

    def get_sublocation(sublocation):
        hxb2_seq = sublocation.extract(hxb2)
        ref_seq = trim_to_refseq(refs, hxb2_seq).replace('-', '')
        start = refs.find(ref_seq)
        end = start + len(ref_seq)
        return FeatureLocation(start, end, strand=+1)

    for fea in hxb2.features:
        if VERBOSE >= 1:
            print fea.id
        loc = [get_sublocation(loc) for loc in fea.location.parts]
        if len(loc) == 1:
            loc = loc[0]
        else:
            loc = CompoundLocation(loc)

        feature = SeqFeature(loc, type=fea.type, id=fea.id)

        # Test length of old and new
        if fea.id not in ["LTR5'", "LTR3'", 'V4']:
            L1 = len(fea.extract(hxb2))
            L2 = len(feature.extract(ref))
            s = str(L2)+' vs '+str(L1)
            if 1.0 * L2 / L1 < 0.9:
                raise ValueError('Feature: '+fea.id+' is too short: '+s)
            elif 1.0 * L2 / L1 > 1.1:
                raise ValueError('Feature: '+fea.id+' is too long: '+s)

        ref.features.append(feature)

    return ref
开发者ID:iosonofabio,项目名称:hivwholeseq,代码行数:39,代码来源:annotate_like_HXB2.py

示例12: TSS

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
        # if we had a TSS (not ever gene does)
        if tss:
            # open the fasta file sequence.fasta
            for seq_record in SeqIO.parse("sequence.fasta", "fasta"):
                # check from genestart to TSS is forward or tss to gene start if backwards with -1 or 1 strange appropraitely
                if direction is "-":
                    # print(tss)

                    test_feature = SeqFeature(FeatureLocation(int(geneStarts[i]), int(tss)), type="gene", strand=-1)
                else:
                    test_feature = SeqFeature(FeatureLocation(int(tss), int(geneStarts[i])), type="gene", strand=1)

                # test_feature=SeqFeature(FeatureLocation(40417,TSS),type="gene",strand=strandDirection)

                # example_seq now contains the sequence which we need to search through
                example_seq = test_feature.extract(seq_record)

                print (example_seq.seq)

                # need to transcribe the sequence
                sequence = str(example_seq.seq.transcribe())
                print sequence

                letters = ["U", "A", "C", "G"]

                # default the stalling position to x
                stallingPosition = ["x", "x", "x", "x"]

                # for each letter in UACG
                for letter in letters:
开发者ID:SiFTW,项目名称:transcriptionalStalling,代码行数:32,代码来源:stallingNucleotide.py

示例13: SeqFeature

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
                                 type=typ,
                                 id=name)

            else:
                fea = SeqFeature(CompoundLocation([FeatureLocation(edge[0], edge[1], strand=+1)
                                                   for edge in edges]),
                                 type=typ,
                                 id=name)
            
            seqnew.features.append(fea)

    
    print 'Sanity checks'
    for fea in seqnew.features:
        if fea.type in ('gene', 'protein'):
            feaseq = fea.extract(seqnew)

            # vpr has an additional T in HXB2
            if fea.id == 'vpr':
                assert ((len(feaseq) - 1) % 3) == 0
                T_pos = 5771
                prot = (feaseq[: T_pos - fea.location.nofuzzy_start].seq + \
                        feaseq[T_pos + 1 - fea.location.nofuzzy_start:].seq).translate()
            else:
                assert (len(feaseq) % 3) == 0
                prot = feaseq.seq.translate()


            # These genes contain premature stops in HXB2
            if fea.id in ('tat', 'nef'):
                assert prot[-1] == '*'
开发者ID:iosonofabio,项目名称:hivwholeseq,代码行数:33,代码来源:annotate_HXB2.py

示例14: gene2features

# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def gene2features(r, gene, gene2position, gene2product, start, end, gcode, partialyes, verbose):
    """
    """
    contig, CDSs, gffstrand, function, frames = gene2position[gene]
    if gffstrand in ('1','+'):
        strand = +1
    else:
        strand = -1
        CDSs.reverse()
    '''#add stop codon if not partial seq
    if strand==1 and CDSs[-1][1]+3 <= len(r.seq):
            CDSs[-1][1] += 3
    elif strand==-1 and CDSs[0][0]-3 > 0:
        CDSs[0][0] -= 3'''
    cdsloc, mrnaloc = get_locations(CDSs, start, end, strand)
    #add gene
    geneid = gene #".".join(gene.split('.')[:-1])
    #get product
    product = "hypothetical protein"
    if geneid in gene2product:
        product = gene2product[geneid]    
    if gene.endswith('.t1'):
        sf = SeqFeature(FeatureLocation(BeforePosition(start-1),AfterPosition(end)), strand=strand, type='gene', id=geneid)
        sf.qualifiers={"locus_tag": geneid, "gene": geneid, "product": product}
        r.features.append(sf)
    #get mRNA sf
    sf = SeqFeature(mrnaloc, type='mRNA', id=gene)
    sf.qualifiers={"locus_tag": geneid, "gene": geneid, "product": product} #"protein_id": gene
    r.features.append(sf)
    #get CDS sf
    sf = SeqFeature(cdsloc, type='CDS', id=gene)
    #get translation
    seq = sf.extract(r.seq)
    aa = str(seq.translate(table=gcode))
    #solve non-triplets issue
    if len(seq) % 3:
        if strand==1:
            end   -= len(seq) % 3
        else:
            start += len(seq) % 3
    ##check for partial sequence - no M as first or no * as last aa
    partial = 0
    #both ends partial
    if aa[0]!="M" and aa[-1]!="*":
        partial = 1
        sf.location = FeatureLocation(BeforePosition(start-1),AfterPosition(end))
    #left end partial
    elif aa[0]!="M" and strand==1 or aa[-1]!="*" and strand==-1:
        partial = 1                
        sf.location = FeatureLocation(BeforePosition(start-1),end)
    #right end partial
    elif aa[-1]!="*" and strand==1 or aa[0]!="M" and strand==-1:
        partial = 1
        sf.location = FeatureLocation(start-1,AfterPosition(end))
    #strip stop codon
    aa = aa.strip("*")
    #replace internal stop codons by X
    if "*" in aa:
        if verbose:
            sys.stderr.write("[Warning] Stop codon(s) in: %s. Skipped!\n" % gene)
        return r
        #aa = aa.replace("*","X")
    sf.qualifiers = {'transl_table': gcode, "locus_tag": geneid, "gene": geneid, "product": product, "translation": aa} #"protein_id": gene,
    if function:
        sf.qualifiers['note'] = function
    #inform about partial entries
    if partial:
        #skip if not partial are allowed
        if not partialyes:
            return r
        if aa[0]!="M":
            sf.qualifiers['codon_start'] = 1
        sf.qualifiers['product']    += ", partial cds"
        if verbose:
            sys.stderr.write("[Warning] Partial sequence: %s\n" % (gene,))
            #sys.stderr.write("[Warning] Partial sequence: %s %s\n" % (gene,sf))
    #add to features
    r.features.append(sf)
    return r 
开发者ID:jpmtavares,项目名称:bin,代码行数:81,代码来源:gtf2embl.py


注:本文中的Bio.SeqFeature.SeqFeature.extract方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。