当前位置: 首页>>代码示例>>Python>>正文


Python Fasta.sequence方法代码示例

本文整理汇总了Python中pyfasta.Fasta.sequence方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.sequence方法的具体用法?Python Fasta.sequence怎么用?Python Fasta.sequence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyfasta.Fasta的用法示例。


在下文中一共展示了Fasta.sequence方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_sequences

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import sequence [as 别名]
def parse_sequences(sites, size, fasta_file):
    """Adds the binding site sequences extende to 'size' per row (decoded as A=0, C=1, G=2, T=3) to each input region."""
    from pyfasta import Fasta  # Fasta package is needed to fetch sequences from genome fasta file
            
    print "INFO: Begin to fetch sequences...."
    
    f = Fasta(fasta_file, key_fn=lambda key: key.split()[0])

    for i, reg in enumerate(sites):
        
        start = reg["ext_start"]
        end = reg["ext_end"]
        
        # if motif on negativ strand, shift region by +1 to account for zero based half-open intervals
        if reg["strand"] == '-':
            start += 1
            end += 1
        
        seq = f.sequence({"chr":reg["chr"], "start":start, "stop":end}, one_based=False)

        # Note, the 'strand':reg["strand"] argument for f.sequence does not work, there seems to be a bug in the pyfasta/fasta.py code.
        seq = seq.upper()
 
        # if motif on negative strand, convert seq to reverse complement
        if reg["strand"] == '-': 
            seq = reverse_complement(seq)
        
        # add sequence to region dict
        reg["ext_seq"] = seq
        
    print "INFO: Finished sequences."
    return regions 
开发者ID:ComputationalSystemsBiology,项目名称:ExoProfiler,代码行数:34,代码来源:5primeCounter.py

示例2: Reference

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import sequence [as 别名]
class Reference(object):
    def __init__(self, genome_fasta):
        # @see: https://pypi.python.org/pypi/pyfasta
        key_fn = lambda key : key.split()[0] # Use first value before whitespace as keys
        self.fasta =  Fasta(genome_fasta, key_fn=key_fn)

    def get_sequence_from_iv(self, iv):
        feature_hash = {'chr' : iv.chrom, 'start' : iv.start, 'stop' : iv.end, 'strand' : iv.strand}
        return self.fasta.sequence(feature_hash, one_based=False)
开发者ID:henmt,项目名称:2015,代码行数:11,代码来源:reference.py

示例3: main

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import sequence [as 别名]
def main(gff_file, outdir):
    """empty docstring"""
    name = re.compile("parent=([^.;]+)", re.I)

    feats = {}
    non_cds_feats = collections.defaultdict(list)
    for line in open(gff_file):
        line = line.split("\t")
        match = re.search(name, line[-1])
        if not match:
            continue
        fname = match.groups(0)[0]
        non_cds_feats[fname].append(line)
        if line[2].upper() == "CDS":
            feats[fname] = True
            continue
        if fname in feats:
            continue
        feats[fname] = None
    i = 0
    for k, v in sorted(feats.items()):
        if not v is None:
            del non_cds_feats[k]

    seen = {}
    RNA = open(outdir + "/at_non_cds.gff", "w")
    for k, feat_list in sorted(non_cds_feats.items()):
        for feat in feat_list:
            if feat[0] in ("ChrC", "ChrM"):
                continue
            if feat[2] == "exon":
                continue
            key = (feat[0], feat[3], feat[4])
            if key in seen:
                continue
            feat[0] = feat[0].upper().replace("CHR", "")
            seen[key] = True
            feat[-1] = k
            print >> RNA, "\t".join(feat)
    RNA.close()

    gff = read_gff(outdir + "/at_non_cds.gff")
    fasta = Fasta("/home/gturco/src/find_cns_gturco/pipeline/data/arabidopsis.fasta")
    ftypes = {}
    FA = open(outdir + "/at_rnas.fasta", "w")
    for chr, feature_list in gff.iteritems():
        for fname, feature in feature_list.iteritems():
            seq = fasta.sequence(feature)
            print >> FA, ">", feature["name"]
            print >> FA, seq
    FA.close()
开发者ID:gturco,项目名称:find_cns,代码行数:53,代码来源:arabidopsis_rna.py

示例4: main

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import sequence [as 别名]
def main(gff_file, fasta_file, parents, children):

    db_file = gff_file + ".db"

    if not op.exists(db_file):
        GFFutils.create_gffdb(gff_file, db_file)

    f = Fasta(fasta_file)
    g = GFFutils.GFFDB(db_file)

    parents = set(parents.split(','))
    parents_iter = [g.features_of_type(x) for x in parents]
    parents_list = itertools.chain(*parents_iter)
    children_list = set(children.split(','))

    for feat in parents_list:

        children = []
        for c in g.children(feat.id, 1):

            if c.featuretype not in children_list: continue
            child = f.sequence(dict(chr=c.chrom, start=c.start, stop=c.stop,
                strand=c.strand))
            children.append((child, c))

        if not children: 
            print >>sys.stderr, "[warning] %s has no children with type %s" \
                                    % (feat.id, ','.join(children_list))
            continue
        # sort children in incremental position
        children.sort(key=lambda x: x[1].start)
        # reverse children if negative strand
        if feat.strand=='-': children.reverse()
        feat_seq = ''.join(x[0] for x in children)

        print ">%s" % feat.id
        print feat_seq
开发者ID:Nicholas-NVS,项目名称:bio-pipeline,代码行数:39,代码来源:gff_loader.py

示例5: MutateFasta

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import sequence [as 别名]
class MutateFasta(object):
    def __init__(self, fasta):
        self.fasta = Fasta(fasta, key_fn=lambda key: key.split()[0])
        # self.chroms = [str(i+1) for i in range(22)] + ['X', 'Y']  # , 'MT']

    def generate_seq(self, records, offset=None):
        if not records and not offset: return

        seq = ''
        chrom = offset[0] if offset else records[0]['chrom']
        prev_pos = offset[1] if offset else 0
        last_pos = offset[2] if offset else len(self.fasta[chrom])

        for r in records:
            ref = self.slice_fasta(r['chrom'], r['pos'], r['pos'])

            if not r['chrom'] == chrom: continue
            if not (r['ref'] and r['alt']): continue
            if not r['ref'][0] == ref: continue

            mut_type, sub_seq = self._classify_mut(r['ref'], r['alt'])

            if mut_type == 'snv':
                seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'] - 1)
                seq += sub_seq
                prev_pos = r['pos']

            elif mut_type == 'del':
                seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'])
                prev_pos += len(sub_seq)

            elif mut_type == 'ins':
                seq += self.slice_fasta(chrom, prev_pos + 1, r['pos'])
                seq += sub_seq
                prev_pos = r['pos']

        # Reminder
        if prev_pos + 1 <= last_pos:
            seq += self.slice_fasta(chrom, prev_pos + 1, last_pos)

        return seq

    def generate_contexted_seq(self, r):
        cons = []
        chrom = r['chrom']

        # TODO: support - strand genes. (currently only supports + strand genes...)

        # NOTE: refFlat is stored in 0-based coordinate

        # 5'UTR + 1st Exon
        cons.append([self.slice_fasta(chrom, r['txStart'] + 1, r['cdsStart']), 'utr'])
        cons.append([self.slice_fasta(chrom, r['cdsStart'] + 1, r['exonEnds'][0]), 'exon'])

        if r['exonCount'] > 1:
            cons.append([self.slice_fasta(chrom, r['exonEnds'][0] + 1, r['exonStarts'][1]), 'intron'])

            # Exons
            for i,con in enumerate(r['exonStarts']):
                if i == 0 or i+1 == r['exonCount']: continue

                cons.append([self.slice_fasta(chrom, r['exonStarts'][i] + 1, r['exonEnds'][i]), 'exon'])
                cons.append([self.slice_fasta(chrom, r['exonEnds'][i] + 1, r['exonStarts'][i+1]), 'intron'])

            # last Exon + 3'UTR
            cons.append([self.slice_fasta(chrom, r['exonStarts'][r['exonCount']-1] + 1, r['cdsEnd']), 'exon'])

        cons.append([self.slice_fasta(chrom, r['cdsEnd'] + 1, r['txEnd']), 'utr'])

        return cons

    def slice_fasta(self, chrom, start, stop):
        return self.fasta.sequence({'chr': str(chrom), 'start': int(start), 'stop': int(stop)}, one_based=True)

    def _classify_mut(self, ref, alt):
        """
        >>> _classify_mut('A','G')
        ('snv', 'G')
        >>> _classify_mut('G','GAA')
        ('ins', 'AA')
        >>> _classify_mut('TTA','T')
        ('del', 'TA')
        """

        if len(ref) == len(alt) == 1:
            return 'snv', alt
        elif len(ref) < len(alt):
            assert ref[0] == alt[0], '{0} {1}'.format(ref, alt)
            return 'ins', alt[1:]
        elif len(ref) > len(alt):
            assert ref[0] == alt[0], '{0} {1}'.format(ref, alt)
            return 'del', ref[1:]
开发者ID:knmkr,项目名称:pergenie,代码行数:94,代码来源:mutate_fasta.py


注:本文中的pyfasta.Fasta.sequence方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。