本文整理汇总了Python中Bio.SeqFeature.SeqFeature.extract方法的典型用法代码示例。如果您正苦于以下问题:Python SeqFeature.extract方法的具体用法?Python SeqFeature.extract怎么用?Python SeqFeature.extract使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SeqFeature.SeqFeature
的用法示例。
在下文中一共展示了SeqFeature.extract方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_deletion__overlapping_features
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def test_deletion__overlapping_features(self):
# Example based on intersection of nei and arbB genes in MG1655.
before_overlap = 'GCCCTGGCTGCCAGCA'
overlap = 'CTAG'
after_overlap = 'GCCGACCGCTTCGG'
raw_seq_str = before_overlap + overlap + after_overlap
seq = Seq(raw_seq_str, generic_dna)
seq_record = SeqRecord(seq)
feature_1_loc = FeatureLocation(0,
len(before_overlap) + len(overlap), strand=1)
feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
seq_record.features.append(feature_1)
feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
strand=1)
feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
seq_record.features.append(feature_2)
maker = VCFToGenbankMaker(seq_record, None, None)
maker._update_genome_record_for_variant(
len(before_overlap), overlap, '')
# Assert the sequence is correct.
EXPECTED_SEQ = before_overlap + after_overlap
self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))
# Assert the feature annotations are still correct.
EXPECTED_FEATURE_1_SEQ = before_overlap
self.assertEqual(EXPECTED_FEATURE_1_SEQ,
str(feature_1.extract(seq_record.seq)))
EXPECTED_FEATURE_2_SEQ = after_overlap
self.assertEqual(EXPECTED_FEATURE_2_SEQ,
str(feature_2.extract(seq_record.seq)))
示例2: test_update_genome_record_for_variant__overlapping_features
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def test_update_genome_record_for_variant__overlapping_features(self):
"""Tests handling a record that lands in a region of overlapping
features.
"""
# Example based on intersection of nei and arbB genes in MG1655.
before_overlap = 'GCCCTGGCTGCCAGCA'
overlap = 'CTAG'
after_overlap = 'GCCGACCGCTTCGG'
raw_seq_str = before_overlap + overlap + after_overlap
seq = Seq(raw_seq_str, generic_dna)
seq_record = SeqRecord(seq)
feature_1_loc = FeatureLocation(0,
len(before_overlap) + len(overlap), strand=1)
feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
seq_record.features.append(feature_1)
feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
strand=1)
feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
seq_record.features.append(feature_2)
maker = VCFToGenbankMaker(seq_record, None, None)
overlap_replacement = 'TTAA'
maker._update_genome_record_for_variant(len(before_overlap), overlap,
overlap_replacement)
# Features changed, so requery them.
feature_1 = None
feature_2 = None
for feature in seq_record.features:
if feature.id == '1':
feature_1 = feature
elif feature.id == '2':
feature_2 = feature
assert feature_1
assert feature_2
# Assert the sequence is correct.
EXPECTED_SEQ = before_overlap + overlap_replacement + after_overlap
self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))
# Feature added to represent swap.
# self.assertEqual(3, len(seq_record.features))
# Assert the feature annotations are still correct.
EXPECTED_FEATURE_1_SEQ = before_overlap + overlap_replacement
self.assertEqual(EXPECTED_FEATURE_1_SEQ,
str(feature_1.extract(seq_record.seq)))
EXPECTED_FEATURE_2_SEQ = overlap_replacement + after_overlap
self.assertEqual(EXPECTED_FEATURE_2_SEQ,
str(feature_2.extract(seq_record.seq)))
示例3: get_longest
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def get_longest(seq_record, gene2isoforms):
l = []
c = 0;
chrom = adjust_name(seq_record.name);
for gene, isoforms in gene2isoforms.iteritems():
longest = max(isoforms, key = lambda i: sum([len(x) for x in i]))
if(args.format == 'bed'):
compound_to_bed(longest, chrom, gene)
elif(args.format == 'fasta'):
if(len(longest) > 1):
location = CompoundLocation(longest, operator = "join")
else:
location = longest[0];
feature = SeqFeature(location=location, type='utr', strand = longest[0].strand)
#print longest[0].strand
f = feature.extract(seq_record)
f.name = gene
f.id = gene
f.description = gene
l.append(f);
return l;
示例4: _findGeneInSeq
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def _findGeneInSeq(self, record):
"""Extract gene sequence from larger sequence (e.g. genomes)
by searching features."""
if not record.features:
# if there aren't any features, just return the record
return record
for feature in record.features:
feature_names = []
if 'gene' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['gene'])
if 'gene_synonym' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['gene_synonym'])
if 'product' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['product'])
gene_names = [e.lower() for e in self.gene_names]
feature_names = [e.lower() for e in feature_names]
if set(gene_names) & set(feature_names):
try:
extractor = SeqFeature(feature.location)
found_seq = extractor.extract(record)
except ValueError:
# catch value errors raised for sequences
# with "fuzzy" positions
# TODO: what are fuzzy positions and can I use
# them?
return record
else:
return found_seq
return record
示例5: get_gene_and_201bp_upstream
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def get_gene_and_201bp_upstream(genefeature,genomeseq):
mystart = genefeature.location.start
myend = genefeature.location.end
mystrand = genefeature.location.strand
if mystrand == 1:
newfeature = SeqFeature(FeatureLocation(mystart-201,myend),strand=mystrand)
elif mystrand == -1:
newfeature = SeqFeature(FeatureLocation(mystart,myend+201),strand=mystrand)
return newfeature.extract(genomeseq)
示例6: find_cds
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def find_cds ():
seq_des = str(record_dict[keys].description).split("|")
for i in seq_des:
if re.match("CDS", i):
feature, cds_start, cds_end = re.split(":|-", i)
cds_feature = SeqFeature(FeatureLocation(int(cds_start)-1,int(cds_end)-1),
type=str(feature))
cds_sequence = cds_feature.extract(record_dict[keys].seq)
print cds_sequence.translate()
return cds_start, cds_end, cds_sequence
示例7: retrieveCompositeSequence
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def retrieveCompositeSequence(seq_record,seqList) :
# true seq
listePosition = list()
for node in seqList :
seq,coord = node.split(":")
start,end = coord.split("..")
listePosition.append(int(float(start)))
listePosition.append(int(float(end)))
start = min(listePosition)
end = max(listePosition)
f = SeqFeature(FeatureLocation(start,end))
seq = f.extract(seq_record)
seqId = seq_record.id+"|"+str(start)+"_"+str(end)
return SeqRecord(seq=seq.seq,id=seqId,description="")
示例8: retrievePartialSequence
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def retrievePartialSequence(seqList,seq2coord,gi2domain,gi2phylum,gi2species,seqId2seqRecord) :
# true seq
seq_set = set()
for node in seqList :
seq,coord = node.split(":")
seq_set.add(seq)
# print len(seq_set)
# get fasta
seqListExtracted = list()
for seqId in seq_set :
if seqId in seqId2seqRecord :
# print seq_record.id
seq_record = seqId2seqRecord[seqId]
for node in seqList :
# print "\t"+node.split(":")[0]
if seq_record.id == node.split(":")[0] :
# extracting subseq thanks to coordinates
# print "\t\t"+seq_record.id+"\t"+node+"\t"+str(seq2coord[node])
start = seq2coord[node][0]
end = seq2coord[node][1]
f = SeqFeature(FeatureLocation(int(start),int(end) ) )
seq = f.extract(seq_record)
# print seq
gi = node.split(":")[0]
species = gi2species[gi]
phylum = gi2phylum[gi]
seqId = gi+"|"+phylum+"|"+species+"|"+node.split(":")[1].replace("..","_")
seqRecord = SeqRecord(seq=seq.seq,id=seqId,description="")
seqListExtracted.append(seqRecord)
seqList.remove(node)
break
else :
continue
else :
continue
return seqListExtracted
示例9: extract_sequences_one_sample
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def extract_sequences_one_sample(args):
'''
Function for extracting protein sequences given annotated regions and produce fasta files that can be used for clustering
'''
(fasta_path,annotation_path,sample, output_dir)=args
domainInfo=load_annotation_pfam(annotation_path)
print "Generating domain fasta sequences for "+sample+" ..."
from Bio import SeqIO
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.SeqRecord import SeqRecord
(annot,start,stop,strand,evalue)=domainInfo
record_dict=index_fasta(fasta_path)
recordlist=[]
outfilename=output_dir +'/forClustering/'+ sample +'.fasta'
outhandle=open(outfilename,'w')
for domainID in annot.keys():
for i in range(len(annot[domainID])):
domain=annot[domainID][i]
try:
seq=record_dict[domain]
except KeyError:
print "Error: " + domain + " not in fasta file.\n"
break
a=start[domainID][i]
b=stop[domainID][i]
seq_strand=strand[domainID][i]
seq_evalue=evalue[domainID][i]
if seq_strand in '+':
domain_feature = SeqFeature(FeatureLocation(a-1, b-1), type="domain", strand=1)
elif seq_strand in '-':
domain_feature = SeqFeature(FeatureLocation(a-1, b-1), type="domain", strand=-1)
feature_seq = domain_feature.extract(seq)
feature_seq.id=feature_seq.id+' '+domainID+' '+seq_evalue
recordlist.append(feature_seq)
SeqIO.write(recordlist, outhandle, "fasta")
outhandle.close()
print "Done"
示例10: test_get_codon_feature_location__reverse_strand
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def test_get_codon_feature_location__reverse_strand(self):
FEATURE_SEQ_RAW = reverse_complement('ATGTTTGGGTAG')
SEQ = Seq('CCCCCC' + FEATURE_SEQ_RAW + 'AGTA', generic_dna)
seq_record = SeqRecord(SEQ)
FEATURE_1_ID = '1'
FEATURE_1_LOC = FeatureLocation(6, 18)
feature_1 = SeqFeature(FEATURE_1_LOC, type='CDS',
id=FEATURE_1_ID, strand=-1)
add_feature_to_seq_record(seq_record, feature_1)
# Sanity check for the feature sequence.
feature_seq = str(feature_1.extract(seq_record.seq))
self.assertEqual('ATG', feature_seq[0:3])
self.assertEqual('TTT', feature_seq[3:6])
CODON_0_FEATURE_LOCATION = get_codon_feature_location(feature_1, 0)
self.assertEqual(15, CODON_0_FEATURE_LOCATION.start)
self.assertEqual(18, CODON_0_FEATURE_LOCATION.end)
CODON_1_FEATURE_LOCATION = get_codon_feature_location(feature_1, 1)
self.assertEqual(12, CODON_1_FEATURE_LOCATION.start)
self.assertEqual(15, CODON_1_FEATURE_LOCATION.end)
示例11: annotate_like_HXB2
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def annotate_like_HXB2(refname, VERBOSE=0):
'''Annotate copying from HXB2'''
hxb2 = load_custom_reference('HXB2', 'gb')
ref = load_custom_reference(refname, 'fasta')
refs = str(ref.seq)
def get_sublocation(sublocation):
hxb2_seq = sublocation.extract(hxb2)
ref_seq = trim_to_refseq(refs, hxb2_seq).replace('-', '')
start = refs.find(ref_seq)
end = start + len(ref_seq)
return FeatureLocation(start, end, strand=+1)
for fea in hxb2.features:
if VERBOSE >= 1:
print fea.id
loc = [get_sublocation(loc) for loc in fea.location.parts]
if len(loc) == 1:
loc = loc[0]
else:
loc = CompoundLocation(loc)
feature = SeqFeature(loc, type=fea.type, id=fea.id)
# Test length of old and new
if fea.id not in ["LTR5'", "LTR3'", 'V4']:
L1 = len(fea.extract(hxb2))
L2 = len(feature.extract(ref))
s = str(L2)+' vs '+str(L1)
if 1.0 * L2 / L1 < 0.9:
raise ValueError('Feature: '+fea.id+' is too short: '+s)
elif 1.0 * L2 / L1 > 1.1:
raise ValueError('Feature: '+fea.id+' is too long: '+s)
ref.features.append(feature)
return ref
示例12: TSS
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
# if we had a TSS (not ever gene does)
if tss:
# open the fasta file sequence.fasta
for seq_record in SeqIO.parse("sequence.fasta", "fasta"):
# check from genestart to TSS is forward or tss to gene start if backwards with -1 or 1 strange appropraitely
if direction is "-":
# print(tss)
test_feature = SeqFeature(FeatureLocation(int(geneStarts[i]), int(tss)), type="gene", strand=-1)
else:
test_feature = SeqFeature(FeatureLocation(int(tss), int(geneStarts[i])), type="gene", strand=1)
# test_feature=SeqFeature(FeatureLocation(40417,TSS),type="gene",strand=strandDirection)
# example_seq now contains the sequence which we need to search through
example_seq = test_feature.extract(seq_record)
print (example_seq.seq)
# need to transcribe the sequence
sequence = str(example_seq.seq.transcribe())
print sequence
letters = ["U", "A", "C", "G"]
# default the stalling position to x
stallingPosition = ["x", "x", "x", "x"]
# for each letter in UACG
for letter in letters:
示例13: SeqFeature
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
type=typ,
id=name)
else:
fea = SeqFeature(CompoundLocation([FeatureLocation(edge[0], edge[1], strand=+1)
for edge in edges]),
type=typ,
id=name)
seqnew.features.append(fea)
print 'Sanity checks'
for fea in seqnew.features:
if fea.type in ('gene', 'protein'):
feaseq = fea.extract(seqnew)
# vpr has an additional T in HXB2
if fea.id == 'vpr':
assert ((len(feaseq) - 1) % 3) == 0
T_pos = 5771
prot = (feaseq[: T_pos - fea.location.nofuzzy_start].seq + \
feaseq[T_pos + 1 - fea.location.nofuzzy_start:].seq).translate()
else:
assert (len(feaseq) % 3) == 0
prot = feaseq.seq.translate()
# These genes contain premature stops in HXB2
if fea.id in ('tat', 'nef'):
assert prot[-1] == '*'
示例14: gene2features
# 需要导入模块: from Bio.SeqFeature import SeqFeature [as 别名]
# 或者: from Bio.SeqFeature.SeqFeature import extract [as 别名]
def gene2features(r, gene, gene2position, gene2product, start, end, gcode, partialyes, verbose):
"""
"""
contig, CDSs, gffstrand, function, frames = gene2position[gene]
if gffstrand in ('1','+'):
strand = +1
else:
strand = -1
CDSs.reverse()
'''#add stop codon if not partial seq
if strand==1 and CDSs[-1][1]+3 <= len(r.seq):
CDSs[-1][1] += 3
elif strand==-1 and CDSs[0][0]-3 > 0:
CDSs[0][0] -= 3'''
cdsloc, mrnaloc = get_locations(CDSs, start, end, strand)
#add gene
geneid = gene #".".join(gene.split('.')[:-1])
#get product
product = "hypothetical protein"
if geneid in gene2product:
product = gene2product[geneid]
if gene.endswith('.t1'):
sf = SeqFeature(FeatureLocation(BeforePosition(start-1),AfterPosition(end)), strand=strand, type='gene', id=geneid)
sf.qualifiers={"locus_tag": geneid, "gene": geneid, "product": product}
r.features.append(sf)
#get mRNA sf
sf = SeqFeature(mrnaloc, type='mRNA', id=gene)
sf.qualifiers={"locus_tag": geneid, "gene": geneid, "product": product} #"protein_id": gene
r.features.append(sf)
#get CDS sf
sf = SeqFeature(cdsloc, type='CDS', id=gene)
#get translation
seq = sf.extract(r.seq)
aa = str(seq.translate(table=gcode))
#solve non-triplets issue
if len(seq) % 3:
if strand==1:
end -= len(seq) % 3
else:
start += len(seq) % 3
##check for partial sequence - no M as first or no * as last aa
partial = 0
#both ends partial
if aa[0]!="M" and aa[-1]!="*":
partial = 1
sf.location = FeatureLocation(BeforePosition(start-1),AfterPosition(end))
#left end partial
elif aa[0]!="M" and strand==1 or aa[-1]!="*" and strand==-1:
partial = 1
sf.location = FeatureLocation(BeforePosition(start-1),end)
#right end partial
elif aa[-1]!="*" and strand==1 or aa[0]!="M" and strand==-1:
partial = 1
sf.location = FeatureLocation(start-1,AfterPosition(end))
#strip stop codon
aa = aa.strip("*")
#replace internal stop codons by X
if "*" in aa:
if verbose:
sys.stderr.write("[Warning] Stop codon(s) in: %s. Skipped!\n" % gene)
return r
#aa = aa.replace("*","X")
sf.qualifiers = {'transl_table': gcode, "locus_tag": geneid, "gene": geneid, "product": product, "translation": aa} #"protein_id": gene,
if function:
sf.qualifiers['note'] = function
#inform about partial entries
if partial:
#skip if not partial are allowed
if not partialyes:
return r
if aa[0]!="M":
sf.qualifiers['codon_start'] = 1
sf.qualifiers['product'] += ", partial cds"
if verbose:
sys.stderr.write("[Warning] Partial sequence: %s\n" % (gene,))
#sys.stderr.write("[Warning] Partial sequence: %s %s\n" % (gene,sf))
#add to features
r.features.append(sf)
return r