本文整理匯總了Python中pyfasta.Fasta.sequence方法的典型用法代碼示例。如果您正苦於以下問題:Python Fasta.sequence方法的具體用法?Python Fasta.sequence怎麽用?Python Fasta.sequence使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyfasta.Fasta
的用法示例。
在下文中一共展示了Fasta.sequence方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: parse_sequences
# 需要導入模塊: from pyfasta import Fasta [as 別名]
# 或者: from pyfasta.Fasta import sequence [as 別名]
def parse_sequences(sites, size, fasta_file):
"""Adds the binding site sequences extende to 'size' per row (decoded as A=0, C=1, G=2, T=3) to each input region."""
from pyfasta import Fasta # Fasta package is needed to fetch sequences from genome fasta file
print "INFO: Begin to fetch sequences...."
f = Fasta(fasta_file, key_fn=lambda key: key.split()[0])
for i, reg in enumerate(sites):
start = reg["ext_start"]
end = reg["ext_end"]
# if motif on negativ strand, shift region by +1 to account for zero based half-open intervals
if reg["strand"] == '-':
start += 1
end += 1
seq = f.sequence({"chr":reg["chr"], "start":start, "stop":end}, one_based=False)
# Note, the 'strand':reg["strand"] argument for f.sequence does not work, there seems to be a bug in the pyfasta/fasta.py code.
seq = seq.upper()
# if motif on negative strand, convert seq to reverse complement
if reg["strand"] == '-':
seq = reverse_complement(seq)
# add sequence to region dict
reg["ext_seq"] = seq
print "INFO: Finished sequences."
return regions
示例2: Reference
# 需要導入模塊: from pyfasta import Fasta [as 別名]
# 或者: from pyfasta.Fasta import sequence [as 別名]
class Reference(object):
def __init__(self, genome_fasta):
# @see: https://pypi.python.org/pypi/pyfasta
key_fn = lambda key : key.split()[0] # Use first value before whitespace as keys
self.fasta = Fasta(genome_fasta, key_fn=key_fn)
def get_sequence_from_iv(self, iv):
feature_hash = {'chr' : iv.chrom, 'start' : iv.start, 'stop' : iv.end, 'strand' : iv.strand}
return self.fasta.sequence(feature_hash, one_based=False)
示例3: main
# 需要導入模塊: from pyfasta import Fasta [as 別名]
# 或者: from pyfasta.Fasta import sequence [as 別名]
def main(gff_file, outdir):
"""empty docstring"""
name = re.compile("parent=([^.;]+)", re.I)
feats = {}
non_cds_feats = collections.defaultdict(list)
for line in open(gff_file):
line = line.split("\t")
match = re.search(name, line[-1])
if not match:
continue
fname = match.groups(0)[0]
non_cds_feats[fname].append(line)
if line[2].upper() == "CDS":
feats[fname] = True
continue
if fname in feats:
continue
feats[fname] = None
i = 0
for k, v in sorted(feats.items()):
if not v is None:
del non_cds_feats[k]
seen = {}
RNA = open(outdir + "/at_non_cds.gff", "w")
for k, feat_list in sorted(non_cds_feats.items()):
for feat in feat_list:
if feat[0] in ("ChrC", "ChrM"):
continue
if feat[2] == "exon":
continue
key = (feat[0], feat[3], feat[4])
if key in seen:
continue
feat[0] = feat[0].upper().replace("CHR", "")
seen[key] = True
feat[-1] = k
print >> RNA, "\t".join(feat)
RNA.close()
gff = read_gff(outdir + "/at_non_cds.gff")
fasta = Fasta("/home/gturco/src/find_cns_gturco/pipeline/data/arabidopsis.fasta")
ftypes = {}
FA = open(outdir + "/at_rnas.fasta", "w")
for chr, feature_list in gff.iteritems():
for fname, feature in feature_list.iteritems():
seq = fasta.sequence(feature)
print >> FA, ">", feature["name"]
print >> FA, seq
FA.close()
示例4: main
# 需要導入模塊: from pyfasta import Fasta [as 別名]
# 或者: from pyfasta.Fasta import sequence [as 別名]
def main(gff_file, fasta_file, parents, children):
db_file = gff_file + ".db"
if not op.exists(db_file):
GFFutils.create_gffdb(gff_file, db_file)
f = Fasta(fasta_file)
g = GFFutils.GFFDB(db_file)
parents = set(parents.split(','))
parents_iter = [g.features_of_type(x) for x in parents]
parents_list = itertools.chain(*parents_iter)
children_list = set(children.split(','))
for feat in parents_list:
children = []
for c in g.children(feat.id, 1):
if c.featuretype not in children_list: continue
child = f.sequence(dict(chr=c.chrom, start=c.start, stop=c.stop,
strand=c.strand))
children.append((child, c))
if not children:
print >>sys.stderr, "[warning] %s has no children with type %s" \
% (feat.id, ','.join(children_list))
continue
# sort children in incremental position
children.sort(key=lambda x: x[1].start)
# reverse children if negative strand
if feat.strand=='-': children.reverse()
feat_seq = ''.join(x[0] for x in children)
print ">%s" % feat.id
print feat_seq
示例5: MutateFasta
# 需要導入模塊: from pyfasta import Fasta [as 別名]
# 或者: from pyfasta.Fasta import sequence [as 別名]
class MutateFasta(object):
def __init__(self, fasta):
self.fasta = Fasta(fasta, key_fn=lambda key: key.split()[0])
# self.chroms = [str(i+1) for i in range(22)] + ['X', 'Y'] # , 'MT']
def generate_seq(self, records, offset=None):
if not records and not offset: return
seq = ''
chrom = offset[0] if offset else records[0]['chrom']
prev_pos = offset[1] if offset else 0
last_pos = offset[2] if offset else len(self.fasta[chrom])
for r in records:
ref = self.slice_fasta(r['chrom'], r['pos'], r['pos'])
if not r['chrom'] == chrom: continue
if not (r['ref'] and r['alt']): continue
if not r['ref'][0] == ref: continue
mut_type, sub_seq = self._classify_mut(r['ref'], r['alt'])
if mut_type == 'snv':
seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'] - 1)
seq += sub_seq
prev_pos = r['pos']
elif mut_type == 'del':
seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'])
prev_pos += len(sub_seq)
elif mut_type == 'ins':
seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'])
seq += sub_seq
prev_pos = r['pos']
# Reminder
if prev_pos + 1 <= last_pos:
seq += self.slice_fasta(chrom, prev_pos + 1, last_pos)
return seq
def generate_contexted_seq(self, r):
cons = []
chrom = r['chrom']
# TODO: support - strand genes. (currently only supports + strand genes...)
# NOTE: refFlat is stored in 0-based coordinate
# 5'UTR + 1st Exon
cons.append([self.slice_fasta(chrom, r['txStart'] + 1, r['cdsStart']), 'utr'])
cons.append([self.slice_fasta(chrom, r['cdsStart'] + 1, r['exonEnds'][0]), 'exon'])
if r['exonCount'] > 1:
cons.append([self.slice_fasta(chrom, r['exonEnds'][0] + 1, r['exonStarts'][1]), 'intron'])
# Exons
for i,con in enumerate(r['exonStarts']):
if i == 0 or i+1 == r['exonCount']: continue
cons.append([self.slice_fasta(chrom, r['exonStarts'][i] + 1, r['exonEnds'][i]), 'exon'])
cons.append([self.slice_fasta(chrom, r['exonEnds'][i] + 1, r['exonStarts'][i+1]), 'intron'])
# last Exon + 3'UTR
cons.append([self.slice_fasta(chrom, r['exonStarts'][r['exonCount']-1] + 1, r['cdsEnd']), 'exon'])
cons.append([self.slice_fasta(chrom, r['cdsEnd'] + 1, r['txEnd']), 'utr'])
return cons
def slice_fasta(self, chrom, start, stop):
return self.fasta.sequence({'chr': str(chrom), 'start': int(start), 'stop': int(stop)}, one_based=True)
def _classify_mut(self, ref, alt):
"""
>>> _classify_mut('A','G')
('snv', 'G')
>>> _classify_mut('G','GAA')
('ins', 'AA')
>>> _classify_mut('TTA','T')
('del', 'TA')
"""
if len(ref) == len(alt) == 1:
return 'snv', alt
elif len(ref) < len(alt):
assert ref[0] == alt[0], '{0} {1}'.format(ref, alt)
return 'ins', alt[1:]
elif len(ref) > len(alt):
assert ref[0] == alt[0], '{0} {1}'.format(ref, alt)
return 'del', ref[1:]