当前位置: 首页>>代码示例>>Python>>正文


Python flatfeature.Bed类代码示例

本文整理汇总了Python中flatfeature.Bed的典型用法代码示例。如果您正苦于以下问题:Python Bed类的具体用法?Python Bed怎么用?Python Bed使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Bed类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TestAssign

class TestAssign(unittest.TestCase):
    def setUp(self):
        self.cns_filename = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.cns.txt"
        self.pairsfile = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.pairs.txt"
        self.qbed = Bed("data/rice_v6_sorghum_v1/rice_v6.bed") ;self.qbed.fill_dict()
        self.sbed = Bed("data/rice_v6_sorghum_v1/sorghum_v1.bed") ;self.sbed.fill_dict()
        self.cns_dict, self.evalue_dict = get_cns_dict(self.cns_filename)
        self.qpair_map, self.spair_map = make_pair_maps(self.pairsfile, "pair", self.qbed, self.sbed)

    def test_get_cns_dict(self):
        """test for test_get_cns_dict"""
        #print self.cns_dict.keys()
        print "keys!",  self.evalue_dict.keys()


    def test_assign(self):
      assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map)
    
    def test_cns_fmt_dict(self):
      for cns, qfeat, sfeat in assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map):
        d = cns_fmt_dict(cns, qfeat, sfeat, self.evalue_dict)
        print "dddddddd", d
        
    def test_main(self):
      pass
开发者ID:PMSeitzer,项目名称:find_cns,代码行数:25,代码来源:test_assign.py

示例2: setUp

 def setUp(self):
     self.cns_filename = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.cns.txt"
     self.pairsfile = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.pairs.txt"
     self.qbed = Bed("data/rice_v6_sorghum_v1/rice_v6.bed") ;self.qbed.fill_dict()
     self.sbed = Bed("data/rice_v6_sorghum_v1/sorghum_v1.bed") ;self.sbed.fill_dict()
     self.cns_dict, self.evalue_dict = get_cns_dict(self.cns_filename)
     self.qpair_map, self.spair_map = make_pair_maps(self.pairsfile, "pair", self.qbed, self.sbed)
开发者ID:PMSeitzer,项目名称:find_cns,代码行数:7,代码来源:test_assign.py

示例3: utr_present

def utr_present(cns_pck,query_bed_path, UTR):
  "checks to see if qaccn has utr region"
  db = MySQLdb.connect(host="127.0.0.1", user="root", db = "rice_gene_table")
  cursor = db.cursor()
  cns_handle = open(cns_pck)
  cns_pickle = pickle.load(cns_handle)
  query_bed = Bed(query_bed_path)
  for cns in cns_pickle:
    qfeat = query_bed.accn(cns['qaccn'])
    if qfeat['strand'] == "+":
      end = qfeat['end']
      start = qfeat["start"]
    else:
      end = qfeat['start']
      start = qfeat["end"]
    if UTR == 3:
      if end == min(qfeat['locs'])[0] or end == max(qfeat['locs'])[1]:
        stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.3_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
        print stmt
        cursor.execute(stmt)
    elif UTR == 5:
      if start == min(qfeat['locs'])[0] or start == max(qfeat['locs'])[1]:
        stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.5_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
        print stmt
        cursor.execute(stmt)
开发者ID:jschnable,项目名称:find_cns,代码行数:25,代码来源:cns_location.py

示例4: setUp

 def setUp(self):
     self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
     self.qallbed.fill_dict()
     self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
     self.sallbed.fill_dict()
     self.saccn = self.sallbed.accn("Si000834m")
     blastfh = open("blast_res")
     self.blast = blastfh.read()
     self.d, self.pseudo = group_cds(self.blast, self.saccn)
开发者ID:gturco,项目名称:pseudo,代码行数:9,代码来源:psudo_test.py

示例5: main

def main(bedfile,seqfile, gene_list):
    print "position,gene,element"
    b = Bed(bedfile)
    f = Fasta(seqfile)
    for gene_name in gene_list:
        gene = b.accn(gene_name)
        promf, promr = get_prom(f, gene)
        print gene_name
        mf = find_seq(promf)
        mr = find_seq(promr)
        make_graph(mf,mr, gene_name)
开发者ID:gturco,项目名称:FFL_tools,代码行数:11,代码来源:find_TERE.py

示例6: setUp

    def setUp(self):
        handle = open("/Users/gturco/code/freeling_lab/find_cns_gturco/pipeline/tests/blast_3.txt")
        fh = handle.readlines()
        self.blast_str = " , ".join(fh)
        self.unmasked_fasta = Fasta("/Users/gturco/find_cns/maize_v2_UM.fasta")

        self.qbed = Bed("/Users/gturco/rice_maize/rice_v6.bed")
        self.qbed.fill_dict()
        self.sbed = Bed("/Users/gturco/maize/maize_v2.bed", "/Users/gturco/maize/maize_v2.fasta")
        self.sbed.fill_dict()
        self.sfeat = self.sbed.accn("GRMZM2G086714")
        self.qfeat = self.qbed.accn("Os09g27050")
开发者ID:gturco,项目名称:find_cns,代码行数:12,代码来源:test_find_cns_maize.py

示例7: TestPseudo

class TestPseudo(unittest.TestCase):
    def setUp(self):
        self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
        self.qallbed.fill_dict()
        self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
        self.sallbed.fill_dict()
        self.saccn = self.sallbed.accn("Si000834m")
        blastfh = open("blast_res")
        self.blast = blastfh.read()
        self.d, self.pseudo = group_cds(self.blast, self.saccn)

    def test_group_cds_1(self):
        self.assertEqual(len(self.d.keys()), 4)
        total_values = []
        for key in self.d.keys():
            values = len(self.d[key])
            total_values.append(values)
        self.assertEqual(sum(total_values), 38)

    def test_group_cds_2(self):
        blast_2fh = open("blast_2")
        blast_2 = blast_2fh.read()

        d, pseudo = group_cds(blast_2, self.sallbed.accn("Si002524m"))

        self.assertEqual(len(d.keys()), 5)
        for key in d.keys():
            # logging.info('key: {0}'.format(key))

            self.assertEqual(1, len(d[key]))

    def test_append_to_included_groups(self):
        locs = [1, 2, 3, 4]
        group_dict = {(2, 5): [], (3, 6): [], (9, 8): []}

        result_dict = append_to_included_groups(locs, group_dict)
        expected = {(2, 5): [(1, 2, 3, 4)], (3, 6): [(1, 2, 3, 4)], (9, 8): []}

        self.assertEquals(expected, result_dict)

    def test_remove_crossing_hit(self):
        qaccn = self.qallbed.accn("Os01g01890")
        for group_key in self.d.keys():
            exon_hits = self.d[group_key]
            non_crossing = remove_crossing_hits(exon_hits, qaccn, self.saccn)
            if len(non_crossing) > 1:
                mid, start, stop = bites(non_crossing)

    def test_find_orf(self):
        qaccn = self.qallbed.accn("Os01g01295")
        orf = find_orf(self.qallbed, qaccn)
        self.assertEqual(orf + 1, 141084)

    def test_find_orf_neg(self):
        saccn = self.sallbed.accn("Si001539m")
        orf = find_orf(self.sallbed, saccn)
        self.assertEqual(orf, 7662777)
开发者ID:gturco,项目名称:pseudo,代码行数:57,代码来源:psudo_test.py

示例8: write_new_bed

def write_new_bed(gene_list, old_bed, missed_genes,out_file):
    merge_fh = open(out_file,"wb")
    hit_list = [hit for hit,qaccn in missed_genes]
    for i,gene in enumerate(old_bed):
        if gene["accn"] in hit_list: continue
        new_line = Bed.row_string(gene)
        merge_fh.write("{0}\n".format(new_line))
    for i,new_gene in enumerate(gene_list):
        ### merge overlapping here
        updated_feat = gene_list[new_gene]
        if len(updated_feat["locs"]) > 1:
            updated_feat = merge_feats(updated_feat)
        new_line = Bed.row_string(updated_feat)
        merge_fh.write("{0}\n".format(new_line))
开发者ID:gturco,项目名称:co-anno,代码行数:14,代码来源:merge.py

示例9: parse_dups

def parse_dups(dups_file, flat):
    #####THIS ONLY WORKS IF WE CHANGE QUOTA
    flat.fill_dict()
    dup_dic = {}
    seen = []

    for line in open(dups_file):
        line = line.strip().split("\t")
        parent = line[0]
        dups = line[1:]
        
        all = [Bed.row_to_dict(flat.d[f]) for f in list(set(line))]
        all.sort(key=operator.itemgetter('start'))
        dup_start = all[0]
        dup_end = all[-1]
        dup_dic[parent] = 'P'
        seen += [parent]
        for dup in dups:
            if dup in seen: continue
            seen.append(dup)
            dup_dic[dup] = parent
        # so here, there are all the genes that arent part of the local dup
        # array, but we want to mark them with 'I'
        intervening = flat.get_features_in_region(dup_start['seqid'], dup_start['start'], dup_end['end'])
        for ii in intervening:
            if ii['accn'] == parent or ii['accn'] == dup_end: continue
            if not ii['accn'] in dup_dic.keys():
                dup_dic[ii['accn']] = 'I'
    return dup_dic
开发者ID:gturco,项目名称:find_cns,代码行数:29,代码来源:make_genelist_gt.py

示例10: main

def main(cns_path, fmt, query_bed_path, subject_bed_path):
  cns_dic = cns_to_dic(cns_path,fmt)
  query_bed = Bed(query_bed_path)
  subject_bed = Bed(subject_bed_path)
  utr_dict = {}
  for cns in cns_dic:
    cns['qstop'] = int(cns['qstop'])
    cns['qstart'] = int(cns['qstart'])
    cns['sstop'] = int(cns['sstop'])
    cns['sstart'] = int(cns['sstart'])
 
    qfeat = query_bed.accn(cns['qaccn'])
    sfeat = subject_bed.accn(cns['saccn']) 
    qgene_space_start = min(qfeat['locs'])[0]
    qgene_space_end = max(qfeat['locs'])[1]
    qgene_space_poly = LineString([(0.0, qgene_space_start), (0.0, qgene_space_end)])
    qgene_poly = LineString([(0.0, qfeat['start']), (0.0, qfeat['end'])])
    sgene_poly = LineString([(0.0, sfeat['start']), (0.0, sfeat['end'])])
    # if intron of one dont need to check other
    qcns = LineString([(0,cns['qstart']),(0,cns['qstop'])])
    scns = LineString([(0,cns['sstart']),(0,cns['sstop'])])
    cns_type(cns,qgene_space_poly, qgene_poly, sgene_poly, scns, qcns,qgene_space_start,qfeat)
    create_utr_list(utr_dict,qfeat, cns,"q")
    create_utr_list(utr_dict,sfeat, cns,"s")
  for cns in cns_dic:
    if cns['type'] == "5-prox_dist":
      qgene_start = min(utr_dict[cns['qaccn']])
      qgene_stop =  max(utr_dict[cns['qaccn']])
      # sstart = min(utr_dict[cns['saccn']])
      # sstop =  max(utr_dict[cns['saccn']])
      five_diff_pos = abs(qgene_start - cns["qstop"])
      five_diff_neg = abs(qgene_stop - cns["qstart"])
      if five_diff_pos <=1000 and cns["qstrand"] == "+" or five_diff_neg <=1000 and cns["qstrand"] == "-":
        cns["type"] = "5-proximal"
      elif five_diff_pos >1000 and cns["qstrand"] == "+" or five_diff_neg >1000 and cns["qstrand"] == "-":
        cns["type"] = "5-distal"
    elif cns['type'] == "3-prox_dist":
      qgene_start = min(utr_dict[cns['qaccn']])
      qgene_stop =  max(utr_dict[cns['qaccn']])
      three_diff_pos =  abs(cns["qstart"] - qgene_stop)
      three_diff_neg =  abs(cns["qstop"] - qgene_start)
      if three_diff_pos <=1000 and cns["qstrand"] == "+" or three_diff_neg <=1000 and cns["qstrand"] == "-":
        cns["type"] = "3-proximal"
      elif three_diff_pos > 1000 and cns["qstrand"] == "+" or three_diff_neg > 1000 and cns["qstrand"] == "-":
        cns["type"] = "3-distal"
  return cns_dic
开发者ID:gturco,项目名称:find_cns,代码行数:46,代码来源:cns_location_csv.py

示例11: main

def main(cns_file,bedpath,fastapath):
    genespace = get_genespace(cns_file)
    bed = Bed(bedpath)
    f = Fasta(fastapath)
    handles = ['3_utr','5_utr','intronic','5_prox','5_distal','3_prox','3_distal']
    fhs = open_files(handles)
    for gene in genespace.keys():
        #cnsspace = genespace[gene]
        try:
            accn = bed.accn(gene)
        except KeyError: continue
        cnsspace = [(max(0,accn['start'] - 12000), accn['end'] + 12000)]
        #print "GENESPACE {0}".format(cnsspace)
        locs = accn['locs']
        locs.sort()
        cnsspace.sort()
        write_to_pos_fasta(bed,accn,locs,cnsspace,fhs,f)
开发者ID:gturco,项目名称:random_bio_tools,代码行数:17,代码来源:control_locations.py

示例12: LocalDups

class LocalDups(object):
    def __init__(self,filename,bed):
        self.filename = filename
        self.bed = Bed(bed)
        self.bed.fill_dict()

    def get_order_dups(self):
        d = {}
        for line in open(self.filename):
            dupline = DupLine(line)
            dups = dupline.get_order(self.bed)
            d[dups[0]['accn']] = "P"
            for dup in dups[1:]:
                d[dup['accn']] = dups[0]['accn']
            intervening = dupline.get_interving_genes(self.bed)
            for i in intervening:
                if i in d.keys():continue
                d[i] = "I"
        self.filename.close()
        return d

    def write_ordered(self,out_fh):
        """write localdups to outfile"""
        localdup_fh = open(out_fh, "w")
        d = {}
        for line in open(self.filename):
            dupline = DupLine(line)
            dups = dupline.get_order(self.bed)
            line = "{0}\n".format("\t".join(dups))
            localdup_fh.write(line)
        localdup_fh.close()


    def get_dups(self):
        d = {}
        for  line in open(self.filename):
            dupline = DupLine(line)
            d[dupline.parent] = 'P'
            for dup in dupline.children:
                d[dup] = dupline.parent
            intervening = dupline.get_interving_genes(self.bed)
            for i in intervening:
                if i in d.keys(): continue
                d[i] = "I"
        self.filename.close()
        return d
开发者ID:gturco,项目名称:find_cns,代码行数:46,代码来源:cleanup.py

示例13: write_genelist

def write_genelist(q_or_s, outfile, flat, pairs, orthos, mcnss, link_fmt, this_org, other_org,
        other_flat, dups, local_dups):
    # used in the link_fmt
    qorg, sorg = this_org, other_org

    fmt = "%(accn)s\t%(seqid)s\t%(start)i\t%(end)i\t%(ortholog)s\t%(ortho_cns)s\t"
    fmt +="%(regional_dup_info)s\t%(local_dup_info)s\t%(strand)s\t"
    fmt += "%(new_gene_info)s\t%(link)s"
    header = fmt.replace('%(', '').replace(')s','').replace(')i','')

    outdir = op.dirname(flat.path)
    annos = dict([kv.rstrip().split(",") for kv in open("%s/%s_protein_rna.anno" % (outdir, q_or_s))])
    if flat.path == other_flat.path:
        annos.update(dict([kv.rstrip().split(",") for kv in open("%s/s_protein_rna.anno" % (outdir,))]))

    out = open(outfile, 'w')
    print >>sys.stderr, "writing genelist to %s" % (outfile,)
    print >>out, header.replace('ortho_', other_org + '_')

    same_org = this_org == other_org
    for feat in flat:

        these_pairs = pairs.get(feat['accn'], [])
        cnss = mcnss.get(feat['accn'], [])

        ortholog, other_pairs = split_pairs(feat, [other_flat.d[t] for t in these_pairs], orthos, q_or_s=='s')
        ortho_cns, non_ortho_cns = split_cns(cnss, orthos, q_or_s=='s')
        regional_dup_info = dups.get(feat['accn'], '')
        local_dup_info = local_dups.get(feat['accn'], '')

        if ortholog:
            ortho = ortholog[0]
            link = link_fmt % dict(qorg=qorg, sorg=sorg,
                                   accn1=ortho['accn'], accn2=feat['accn']
                                  )
        else:
            link = ''

        new_gene_info = ""
        if feat['accn'].endswith(("_cns_protein", "_cns_rna")):
            try:
                new_gene_info = annos[feat['accn']]
            except KeyError: # from coannoation of previous run.
                pass

        ortholog = len(ortholog) and ",".join([o["accn"] for o in ortholog]) or ""
        if len(ortho_cns) > 0 and len(ortholog) == 0:
           print >>sys.stderr, "\nBAD", feat, "\n", ortho_cns, "\nthese:", these_pairs, "\nother:", other_pairs, "\n\n"
           # fell right on the edge of a syntenic block. the cns got in, but not the gene.
           #1/0

        other_pairs = ",".join([o["accn"] for o in other_pairs])
        fmt_dict = locals()
        fmt_dict.update(Bed.row_to_dict(feat))
        fmt_dict.update({'ortho_cns': len(ortho_cns) if ortholog else "",
                         'ortho_NON_cns_count': len(non_ortho_cns) if
                         other_pairs else ""})
        print >>out, fmt % fmt_dict
开发者ID:gturco,项目名称:find_cns,代码行数:58,代码来源:make_genelist_gt.py

示例14: merge_same_hits

def merge_same_hits(missed, fh_match, org_bed):
    """ groups genes that hit more then once """
    d = {}
    handle = open(fh_match)
    matches = handle.read()
    org_bed_path = org_bed.path
    path = org_bed_path.split('/')
    dirc = '/'.join(path[:-1])
    org = path[-1]
    fh = open('{0}/missed_from_{1}'.format(dirc,org), "wb")
    for match in matches.split('\n')[:-1]:
        qaccn,saccn = match.split('\t')
        #create dictionary
        try:
            seqid = missed.accn(qaccn)['seqid']
            haccn = missed.accn(qaccn)
        except KeyError: continue
        #if near_gene(haccn,org_bed)==True: continue
        if (seqid,saccn) not in d.keys():
            #append whole dict to keys
            d[(seqid,saccn)]= missed.accn(qaccn)
        else:
            #else add locs to exsting one
            gene_start = min(d[(seqid,saccn)]['locs'])[0]
            gene_end = max(d[(seqid,saccn)]['locs'])[1]
            missed_end = missed.accn(qaccn)['locs'][0][1]
            missed_start = missed.accn(qaccn)['locs'][0][0]
            if missed_end < gene_start:
                # if no intervening genes and they are close together...
                intervening_genes = get_intervening_genes(missed_end,gene_start,seqid, org_bed, d[(seqid,saccn)]['accn'])
                if intervening_genes is False:
                    d[(seqid,saccn)]['locs'] =  d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
                    d[(seqid,saccn)]['start'] = missed_start
                    if 'Os' in qaccn:
		    	        d[seqid,saccn]['accn'] = qaccn
                else:
                    d[(seqid,qaccn)] = missed.accn(qaccn)
            elif gene_end < missed_start:
                intervening_genes = get_intervening_genes(gene_end,missed_start,seqid, org_bed,d[(seqid,saccn)]["accn"])
                if intervening_genes is False:
                    d[(seqid,saccn)]['locs'] =  d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
                    d[(seqid,saccn)]['end'] = missed_end
                    if 'Os' in qaccn:
                        d[seqid,saccn]['accn'] = qaccn
                else:
                    d[(seqid,qaccn)]= missed.accn(qaccn)
            else:
                d[(seqid,saccn)]['locs'] =  d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
        
    for key in d.keys():
        new_row = d[key]['locs'].sort()
        row = d[key]
        print >>fh, Bed.row_string(row)
开发者ID:gturco,项目名称:find_cns,代码行数:53,代码来源:merge.py

示例15: merge

def merge(org_bed, missed, merge_file):
    """creates blast.all file and updates everything"""
    merge_fh = open(merge_file, "w")
    #cds_missed = missed[missed['ftype'] == 'CDS']
    #count = org_bed.shape[0] + missed[missed['ftype'] !='CDS'].shape[0]
    new_rows = []
    seen_accns = {}
    # CDS added to existing gene.
    for row_missed in missed:
        if row_missed['accn'] in seen_accns: continue
        try:
            org_bed_row = org_bed.accn(row_missed['accn'])
             # it's a CDS
        except KeyError:
            #its a new gene
            new_rows.append(row_missed)
            seen_accns[row_missed['accn']] = True
            continue
        locs_interval = Intersecter()
        [locs_interval.add_interval(Feature(start,stop)) for start,stop in org_bed_row['locs']]
        for missed_start,missed_end in row_missed['locs']:
            if len(locs_interval.find(missed_start,missed_end)) > 0:
#                print >>sys.stderr, org_bed_row['accn']
                locs_intersects = [(l.start,l.stop) for l in locs_interval.find(missed_start,missed_end)]
                [org_bed_row['locs'].remove(locs_intersect) for locs_intersect in locs_intersects]
                locs_intersects = set(locs_intersects)
		locs_intersects.add((missed_start,missed_end))
                locs_start = min([start for start,end in locs_intersects])
                locs_end = max([end for start,end in locs_intersects])
                org_bed_row['locs'] = org_bed_row['locs'] + [(locs_start,locs_end)]
                row_missed['locs'].remove((missed_start,missed_end))

        org_bed_row['locs'] = org_bed_row['locs'] + row_missed['locs']
        #print >>sys.stderr, "{0},{1}".format(row_missed['accn'], locs)
        org_bed_row['locs'].sort()
        org_bed_row['start'] = min(min([start for start,end in org_bed_row['locs']]), org_bed_row['start'])
        org_bed_row['end'] = max(max([end for start,end in org_bed_row['locs']]), org_bed_row['end'])
        new_rows.append(org_bed_row)
        seen_accns[org_bed_row['accn']] =True

    for org_bed_rw in org_bed:
        if org_bed_rw['accn'] not in seen_accns:
            new_rows.append(org_bed_rw)
            seen_accns[org_bed_rw['accn']] =True

    def row_cmp(a,b):
        return cmp(a['seqid'], b['seqid']) or cmp(a['start'], b['start'])


    new_rows.sort(cmp=row_cmp)
    #print >>merge_fh, "\t".join(Bed.names)
    for i, row in enumerate(new_rows):
        print >>merge_fh, Bed.row_string(row)
开发者ID:gturco,项目名称:find_cns,代码行数:53,代码来源:merge.py


注:本文中的flatfeature.Bed类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。