本文整理汇总了Python中Bio.SeqFeature.SeqFeature类的典型用法代码示例。如果您正苦于以下问题:Python SeqFeature类的具体用法?Python SeqFeature怎么用?Python SeqFeature使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SeqFeature类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: t_write_from_recs
def t_write_from_recs(self):
"""Write out GFF3 from SeqRecord inputs.
"""
seq = Seq("GATCGATCGATCGATCGATC")
rec = SeqRecord(seq, "ID1")
qualifiers = {"source": "prediction", "score": 10.0, "other": ["Some", "annotations"],
"ID": "gene1"}
sub_qualifiers = {"source": "prediction"}
top_feature = SeqFeature(FeatureLocation(0, 20), type="gene", strand=1,
qualifiers=qualifiers)
top_feature.sub_features = [SeqFeature(FeatureLocation(0, 5), type="exon", strand=1,
qualifiers=sub_qualifiers),
SeqFeature(FeatureLocation(15, 20), type="exon", strand=1,
qualifiers=sub_qualifiers)]
rec.features = [top_feature]
out_handle = StringIO.StringIO()
GFF.write([rec], out_handle)
wrote_info = out_handle.getvalue().split("\n")
assert wrote_info[0] == "##gff-version 3"
assert wrote_info[1] == "##sequence-region ID1 1 20"
assert wrote_info[2].split("\t") == ['ID1', 'prediction', 'gene', '1',
'20', '10.0', '+', '.',
'other=Some,annotations;ID=gene1']
assert wrote_info[3].split("\t") == ['ID1', 'prediction', 'exon', '1', '5',
'.', '+', '.', 'Parent=gene1']
示例2: mergeRecords
def mergeRecords(file): #adapted from SeqHandler by NF Alikhan (github.com/happykhan/seqhandler)
#SeqHandler is a script for merging, converting and splitting sequence files (Genbank, EMBL, fasta and others). Please use it to merge multi-Genbank files before running bwast.py
filetype = determineFileType(file) #determine file type
readInMultifasta = open(file, "r")
records = list(SeqIO.parse(readInMultifasta, filetype))
mergingFile = records[0]
from Bio.SeqFeature import SeqFeature, FeatureLocation
contigs = SeqFeature(FeatureLocation(0, len(mergingFile) ), type="fasta_record",\
strand=1)
contigs.qualifiers["note"] = records[0].name #pull out contig number of first contig
mergingFile.features.append(contigs) #append first contig to mergingFile
for nextRecord in records[1:]:
contigs = SeqFeature(FeatureLocation(len(mergingFile), len(mergingFile) + len(nextRecord)), type="fasta_record",\
strand=1)
contigs.qualifiers["note"] = nextRecord.name
mergingFile.features.append(contigs) #append subsequent contigs to mergingFile
mergingFile += nextRecord
mergingFile.name = records[0].name
mergingFile.description = records[0].description
mergingFile.annotations = records[0].annotations
for feature in mergingFile.features:
if feature.type == 'source':
mergingFile.features.remove(feature)
contigs = SeqFeature(FeatureLocation(0, len(mergingFile)), type="source", strand=1)
mergingFile.features.insert(0,contigs)
merged_file = re.sub(r"\.\w+$", r".merged.fa", file)
out_handle = open(merged_file, "w")
SeqIO.write(mergingFile, out_handle, filetype)
return merged_file
示例3: mergeMod
def mergeMod(args):
filetype = args.inFormat
# Load file as SeqRecord
int_handle = open(args.input, "r")
recs = list(SeqIO.parse(int_handle, filetype))
# For each SeqRecord, I.e. complete gbk annotation obj in file
fgbk = recs[0]
from Bio.SeqFeature import SeqFeature, FeatureLocation
d = SeqFeature(FeatureLocation(0, len(fgbk)), type="fasta_record", strand=1)
d.qualifiers["note"] = recs[0].name
fgbk.features.append(d)
for l in recs[1:]:
d = SeqFeature(FeatureLocation(len(fgbk), len(fgbk) + len(l)), type="fasta_record", strand=1)
d.qualifiers["note"] = l.name
fgbk.features.append(d)
fgbk += l
fgbk.name = recs[0].name
fgbk.description = recs[0].description
fgbk.annotations = recs[0].annotations
if args.accession != None:
fgbk.name = args.accession
if args.ver != None:
fgbk.id = fgbk.name + "." + args.ver
for f in fgbk.features:
if f.type == "source":
fgbk.features.remove(f)
d = SeqFeature(FeatureLocation(0, len(fgbk)), type="source", strand=1)
fgbk.features.insert(0, d)
outtype = filetype
if args.outFormat != None:
outtype = args.outFormat
out_handle = open(args.output, "w")
SeqIO.write(fgbk, out_handle, outtype)
示例4: _findGeneInSeq
def _findGeneInSeq(self, record):
"""Extract gene sequence from larger sequence (e.g. genomes)
by searching features."""
if not record.features:
# if there aren't any features, just return the record
return record
for feature in record.features:
feature_names = []
if 'gene' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['gene'])
if 'gene_synonym' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['gene_synonym'])
if 'product' in feature.qualifiers.keys():
feature_names.extend(feature.qualifiers['product'])
gene_names = [e.lower() for e in self.gene_names]
feature_names = [e.lower() for e in feature_names]
if set(gene_names) & set(feature_names):
try:
extractor = SeqFeature(feature.location)
found_seq = extractor.extract(record)
except ValueError:
# catch value errors raised for sequences
# with "fuzzy" positions
# TODO: what are fuzzy positions and can I use
# them?
return record
else:
return found_seq
return record
示例5: make_protein_feature
def make_protein_feature(feature_name, feature_start, feature_end, feature_type):
''' Returns sequence feature, using start, end, name and type as input
'''
feature = SeqFeature(FeatureLocation(int(feature_start), int(feature_end)), type=feature_type)
if feature_type == "Region":
feature.qualifiers = {'name': [feature_name]}
return feature
示例6: seqBlastToFeatures
def seqBlastToFeatures(self, blastDB, blastExe, seqFile, blastType = "blastn",scoreMin = 1e-3, logFile = None):
'''
Blast sequence file against blast database
parse files into records.
This function may not work as well on very large blast comparisons because
it does a full read of the result for the conversion to features.
'''
print ">blast %s %s %s %s" % (self.blastDB, self.blastExe, seqFile, blastType)
blastRecords = self.seqBlast(seqFile, blastType = "blastn", scoreMin = 1e-3, logFile = None)
result = []
index = 0
for r in blastRecords:
recordFeatures = []
for alignment in r.alignments:
name = alignment.title
query = r.query
for hsp in alignment.hsps:
if hsp.expect < scoreMin:
(ts,ss) = hsp.frame
strand = ss
start = hsp.sbjct_start
end = hsp.sbjct_end
location = FeatureLocation(start,end)
feature = SeqFeature(id=query,location=location,strand=strand)
aMatch = hsp.query + "\n" + hsp.match + "\n" + hsp.sbjct
feature.qualifiers["query"] = hsp.query
feature.qualifiers["subject"] = hsp.sbjct
feature.qualifiers["alignment"] = aMatch
recordFeatures.append(feature)
result.append(recordFeatures)
index = index + 1
return result
示例7: test_translation_checks_cds
def test_translation_checks_cds(self):
"""Test that a CDS feature is subject to respective checks."""
seq = Seq.Seq("GGTTACACTTACCGATAATGTCTCTGATGA", generic_dna)
f = SeqFeature(FeatureLocation(0, 30), type="CDS")
f.qualifiers['transl_table'] = [11]
with self.assertRaises(TranslationError):
f.translate(seq)
示例8: to_seqfeature
def to_seqfeature(self):
"""Create a SeqFeature from the ProteinDomain Object."""
feat = SeqFeature(location=FeatureLocation(self.start, self.end),
id=self.value)
if hasattr(self, 'confidence'):
feat.qualifiers['confidence'] = self.confidence
return feat
示例9: annotate_dna_reference
def annotate_dna_reference(ref, protein):
'''Annotate DNA reference with the protein secondary structures'''
from Bio.SeqFeature import SeqFeature, FeatureLocation
annotation_table = parse_secondary_structure(protein)
if protein == 'gagpol':
start_protein = ref.annotation['gag'].location.nofuzzy_start
elif protein == 'vpu':
# Uniprot starts one aa downstream of us
start_protein = ref.annotation[protein].location.nofuzzy_start + 3
else:
start_protein = ref.annotation[protein].location.nofuzzy_start
features = []
for _, datum in annotation_table.iterrows():
start_dna = datum['start'] * 3 + start_protein
end_dna = datum['end'] * 3 + start_protein
# Notice ribosomal slippage site
if protein == 'gagpol':
if start_dna >= slippage_site:
start_dna -= 1
if end_dna >= slippage_site:
end_dna -= 1
anno = SeqFeature(FeatureLocation(start_dna, end_dna), strand=+1)
anno.type = datum['feature']
features.append(anno)
return features
示例10: parse
def parse(self):
with open(self._file) as handle:
genbank = SeqRecord(Seq.UnknownSeq(0))
header_pattern = re.compile(r"ref\|(?P<id>.*?)\|:(?P<start>[0-9]+)-(?P<end>[0-9]+)\|(?P<description>.*?)\|\s*\[gene=(?P<gene>\S+)\]\s*\[locus_tag=(?P<locus_tag>\S+)\]\s*")
first = True
for record in SeqIO.parse(handle, "fasta"):
header = record.description
match = header_pattern.match(header)
if not match:
self.errors.append("Invalid header: >" + header)
continue
if first:
first = False
genbank.id = match.group("id")
genbank.name = match.group("id")
feature = SeqFeature(FeatureLocation(int(match.group("start")), int(match.group("end"))), type = "gene")
feature.qualifiers = {"locus_tag": match.group("locus_tag"),
"gene": match.group("gene"),
"note": match.group("description"),
"sequence": record.seq}
genbank.features.append(feature)
return genbank
return None
示例11: get_longest
def get_longest(seq_record, gene2isoforms):
l = []
c = 0;
chrom = adjust_name(seq_record.name);
for gene, isoforms in gene2isoforms.iteritems():
longest = max(isoforms, key = lambda i: sum([len(x) for x in i]))
if(args.format == 'bed'):
compound_to_bed(longest, chrom, gene)
elif(args.format == 'fasta'):
if(len(longest) > 1):
location = CompoundLocation(longest, operator = "join")
else:
location = longest[0];
feature = SeqFeature(location=location, type='utr', strand = longest[0].strand)
#print longest[0].strand
f = feature.extract(seq_record)
f.name = gene
f.id = gene
f.description = gene
l.append(f);
return l;
示例12: test_deletion__overlapping_features
def test_deletion__overlapping_features(self):
# Example based on intersection of nei and arbB genes in MG1655.
before_overlap = 'GCCCTGGCTGCCAGCA'
overlap = 'CTAG'
after_overlap = 'GCCGACCGCTTCGG'
raw_seq_str = before_overlap + overlap + after_overlap
seq = Seq(raw_seq_str, generic_dna)
seq_record = SeqRecord(seq)
feature_1_loc = FeatureLocation(0,
len(before_overlap) + len(overlap), strand=1)
feature_1 = SeqFeature(feature_1_loc, type='CDS', id='1')
seq_record.features.append(feature_1)
feature_2_loc = FeatureLocation(len(before_overlap), len(raw_seq_str),
strand=1)
feature_2 = SeqFeature(feature_2_loc, type='CDS', id='2')
seq_record.features.append(feature_2)
maker = VCFToGenbankMaker(seq_record, None, None)
maker._update_genome_record_for_variant(
len(before_overlap), overlap, '')
# Assert the sequence is correct.
EXPECTED_SEQ = before_overlap + after_overlap
self.assertEqual(EXPECTED_SEQ, str(seq_record.seq))
# Assert the feature annotations are still correct.
EXPECTED_FEATURE_1_SEQ = before_overlap
self.assertEqual(EXPECTED_FEATURE_1_SEQ,
str(feature_1.extract(seq_record.seq)))
EXPECTED_FEATURE_2_SEQ = after_overlap
self.assertEqual(EXPECTED_FEATURE_2_SEQ,
str(feature_2.extract(seq_record.seq)))
示例13: create_gene_feature
def create_gene_feature(gene_name, feature_location, feature_qualifiers):
"""Creates a minimal SeqFeature to represent a gene.
"""
gene_feature = SeqFeature(feature_location, type='gene')
gene_feature.qualifiers = {'gene': [gene_name]}
gene_feature.qualifiers = dict(gene_feature.qualifiers.items() +
feature_qualifiers.items())
return gene_feature
示例14: makeSeqObjectsForTblastnNeighbors
def makeSeqObjectsForTblastnNeighbors(tblastn_id, clusterrunid, cur, N=200000):
"""
Given a tBBLSATn ID and a dictionary from sanitized contig IDs (which is what will be
present in the TBLASTN id) to non-sanitized IDs (which are what is in the database),
returns a list of seq objects INCLUDING the TBLASTN hit itself (so that we can show that
on the region drawing).
We pick an N large enough to get at least one gene and then pick the closest one and get
all of its neighbors with a call to makeSeqFeaturesForGeneNeighbors() and just tack the TBLASTN
onto it.
"""
# Lets first get the contig and start/stop locations (which tell us teh strand) out of
# the TBLASTN id. This returns a ValueError if it fails which the calling function can catch if needed.
sanitizedToNot = getSanitizedContigList(cur)
contig, start, stop = splitTblastn(tblastn_id)
if contig in sanitizedToNot:
contig = sanitizedToNot[contig]
start = int(start)
stop = int(stop)
# Create a seq object for the TBLASTN hit itself
if start < stop:
strand = +1
else:
strand = -1
tblastn_feature = SeqFeature(FeatureLocation(start, stop), strand=strand, id=tblastn_id)
tblastn_feature.qualifiers["cluster_id"] = -1
# Find the neighboring genes.
neighboring_genes = getGenesInRegion(contig, start - N, stop + N, cur)
if len(neighboring_genes) == 0:
sys.stderr.write(
"WARNING: No neighboring genes found for TBLASTN hit %s within %d nucleotides in contig %s\n"
% (tblastn_id, N, contig)
)
return [tblastn_feature]
else:
neighboring_geneinfo = getGeneInfo(neighboring_genes, cur)
# Find the closest gene to ours and get the clusters for those neighbors based on the specific clusterrunid
minlen = N
mingene = None
minstrand = None
for geneinfo in neighboring_geneinfo:
genestart = int(geneinfo[5])
geneend = int(geneinfo[6])
distance = min(abs(genestart - start), abs(geneend - start), abs(genestart - stop), abs(geneend - stop))
if distance < minlen:
mingene = geneinfo[0]
minlen = distance
neighboring_features = makeSeqFeaturesForGeneNeighbors(mingene, clusterrunid, cur)
# Add the TBLASTN itself and return it.
neighboring_features.append(tblastn_feature)
return neighboring_features
示例15: get_gene_and_201bp_upstream
def get_gene_and_201bp_upstream(genefeature,genomeseq):
mystart = genefeature.location.start
myend = genefeature.location.end
mystrand = genefeature.location.strand
if mystrand == 1:
newfeature = SeqFeature(FeatureLocation(mystart-201,myend),strand=mystrand)
elif mystrand == -1:
newfeature = SeqFeature(FeatureLocation(mystart,myend+201),strand=mystrand)
return newfeature.extract(genomeseq)