本文整理汇总了Python中flatfeature.Bed类的典型用法代码示例。如果您正苦于以下问题:Python Bed类的具体用法?Python Bed怎么用?Python Bed使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Bed类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestAssign
class TestAssign(unittest.TestCase):
def setUp(self):
self.cns_filename = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.cns.txt"
self.pairsfile = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.pairs.txt"
self.qbed = Bed("data/rice_v6_sorghum_v1/rice_v6.bed") ;self.qbed.fill_dict()
self.sbed = Bed("data/rice_v6_sorghum_v1/sorghum_v1.bed") ;self.sbed.fill_dict()
self.cns_dict, self.evalue_dict = get_cns_dict(self.cns_filename)
self.qpair_map, self.spair_map = make_pair_maps(self.pairsfile, "pair", self.qbed, self.sbed)
def test_get_cns_dict(self):
"""test for test_get_cns_dict"""
#print self.cns_dict.keys()
print "keys!", self.evalue_dict.keys()
def test_assign(self):
assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map)
def test_cns_fmt_dict(self):
for cns, qfeat, sfeat in assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map):
d = cns_fmt_dict(cns, qfeat, sfeat, self.evalue_dict)
print "dddddddd", d
def test_main(self):
pass
示例2: setUp
def setUp(self):
self.cns_filename = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.cns.txt"
self.pairsfile = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.pairs.txt"
self.qbed = Bed("data/rice_v6_sorghum_v1/rice_v6.bed") ;self.qbed.fill_dict()
self.sbed = Bed("data/rice_v6_sorghum_v1/sorghum_v1.bed") ;self.sbed.fill_dict()
self.cns_dict, self.evalue_dict = get_cns_dict(self.cns_filename)
self.qpair_map, self.spair_map = make_pair_maps(self.pairsfile, "pair", self.qbed, self.sbed)
示例3: utr_present
def utr_present(cns_pck,query_bed_path, UTR):
"checks to see if qaccn has utr region"
db = MySQLdb.connect(host="127.0.0.1", user="root", db = "rice_gene_table")
cursor = db.cursor()
cns_handle = open(cns_pck)
cns_pickle = pickle.load(cns_handle)
query_bed = Bed(query_bed_path)
for cns in cns_pickle:
qfeat = query_bed.accn(cns['qaccn'])
if qfeat['strand'] == "+":
end = qfeat['end']
start = qfeat["start"]
else:
end = qfeat['start']
start = qfeat["end"]
if UTR == 3:
if end == min(qfeat['locs'])[0] or end == max(qfeat['locs'])[1]:
stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.3_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
print stmt
cursor.execute(stmt)
elif UTR == 5:
if start == min(qfeat['locs'])[0] or start == max(qfeat['locs'])[1]:
stmt = "update MUSA_GENE_LIST_copy set MUSA_GENE_LIST_copy.5_UTR = 'ND' where MUSA_GENE_LIST_copy.Rice_MSU6_genes = '{0}'".format(cns['qaccn'])
print stmt
cursor.execute(stmt)
示例4: setUp
def setUp(self):
self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
self.qallbed.fill_dict()
self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
self.sallbed.fill_dict()
self.saccn = self.sallbed.accn("Si000834m")
blastfh = open("blast_res")
self.blast = blastfh.read()
self.d, self.pseudo = group_cds(self.blast, self.saccn)
示例5: main
def main(bedfile,seqfile, gene_list):
print "position,gene,element"
b = Bed(bedfile)
f = Fasta(seqfile)
for gene_name in gene_list:
gene = b.accn(gene_name)
promf, promr = get_prom(f, gene)
print gene_name
mf = find_seq(promf)
mr = find_seq(promr)
make_graph(mf,mr, gene_name)
示例6: setUp
def setUp(self):
handle = open("/Users/gturco/code/freeling_lab/find_cns_gturco/pipeline/tests/blast_3.txt")
fh = handle.readlines()
self.blast_str = " , ".join(fh)
self.unmasked_fasta = Fasta("/Users/gturco/find_cns/maize_v2_UM.fasta")
self.qbed = Bed("/Users/gturco/rice_maize/rice_v6.bed")
self.qbed.fill_dict()
self.sbed = Bed("/Users/gturco/maize/maize_v2.bed", "/Users/gturco/maize/maize_v2.fasta")
self.sbed.fill_dict()
self.sfeat = self.sbed.accn("GRMZM2G086714")
self.qfeat = self.qbed.accn("Os09g27050")
示例7: TestPseudo
class TestPseudo(unittest.TestCase):
def setUp(self):
self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
self.qallbed.fill_dict()
self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
self.sallbed.fill_dict()
self.saccn = self.sallbed.accn("Si000834m")
blastfh = open("blast_res")
self.blast = blastfh.read()
self.d, self.pseudo = group_cds(self.blast, self.saccn)
def test_group_cds_1(self):
self.assertEqual(len(self.d.keys()), 4)
total_values = []
for key in self.d.keys():
values = len(self.d[key])
total_values.append(values)
self.assertEqual(sum(total_values), 38)
def test_group_cds_2(self):
blast_2fh = open("blast_2")
blast_2 = blast_2fh.read()
d, pseudo = group_cds(blast_2, self.sallbed.accn("Si002524m"))
self.assertEqual(len(d.keys()), 5)
for key in d.keys():
# logging.info('key: {0}'.format(key))
self.assertEqual(1, len(d[key]))
def test_append_to_included_groups(self):
locs = [1, 2, 3, 4]
group_dict = {(2, 5): [], (3, 6): [], (9, 8): []}
result_dict = append_to_included_groups(locs, group_dict)
expected = {(2, 5): [(1, 2, 3, 4)], (3, 6): [(1, 2, 3, 4)], (9, 8): []}
self.assertEquals(expected, result_dict)
def test_remove_crossing_hit(self):
qaccn = self.qallbed.accn("Os01g01890")
for group_key in self.d.keys():
exon_hits = self.d[group_key]
non_crossing = remove_crossing_hits(exon_hits, qaccn, self.saccn)
if len(non_crossing) > 1:
mid, start, stop = bites(non_crossing)
def test_find_orf(self):
qaccn = self.qallbed.accn("Os01g01295")
orf = find_orf(self.qallbed, qaccn)
self.assertEqual(orf + 1, 141084)
def test_find_orf_neg(self):
saccn = self.sallbed.accn("Si001539m")
orf = find_orf(self.sallbed, saccn)
self.assertEqual(orf, 7662777)
示例8: write_new_bed
def write_new_bed(gene_list, old_bed, missed_genes,out_file):
merge_fh = open(out_file,"wb")
hit_list = [hit for hit,qaccn in missed_genes]
for i,gene in enumerate(old_bed):
if gene["accn"] in hit_list: continue
new_line = Bed.row_string(gene)
merge_fh.write("{0}\n".format(new_line))
for i,new_gene in enumerate(gene_list):
### merge overlapping here
updated_feat = gene_list[new_gene]
if len(updated_feat["locs"]) > 1:
updated_feat = merge_feats(updated_feat)
new_line = Bed.row_string(updated_feat)
merge_fh.write("{0}\n".format(new_line))
示例9: parse_dups
def parse_dups(dups_file, flat):
#####THIS ONLY WORKS IF WE CHANGE QUOTA
flat.fill_dict()
dup_dic = {}
seen = []
for line in open(dups_file):
line = line.strip().split("\t")
parent = line[0]
dups = line[1:]
all = [Bed.row_to_dict(flat.d[f]) for f in list(set(line))]
all.sort(key=operator.itemgetter('start'))
dup_start = all[0]
dup_end = all[-1]
dup_dic[parent] = 'P'
seen += [parent]
for dup in dups:
if dup in seen: continue
seen.append(dup)
dup_dic[dup] = parent
# so here, there are all the genes that arent part of the local dup
# array, but we want to mark them with 'I'
intervening = flat.get_features_in_region(dup_start['seqid'], dup_start['start'], dup_end['end'])
for ii in intervening:
if ii['accn'] == parent or ii['accn'] == dup_end: continue
if not ii['accn'] in dup_dic.keys():
dup_dic[ii['accn']] = 'I'
return dup_dic
示例10: main
def main(cns_path, fmt, query_bed_path, subject_bed_path):
cns_dic = cns_to_dic(cns_path,fmt)
query_bed = Bed(query_bed_path)
subject_bed = Bed(subject_bed_path)
utr_dict = {}
for cns in cns_dic:
cns['qstop'] = int(cns['qstop'])
cns['qstart'] = int(cns['qstart'])
cns['sstop'] = int(cns['sstop'])
cns['sstart'] = int(cns['sstart'])
qfeat = query_bed.accn(cns['qaccn'])
sfeat = subject_bed.accn(cns['saccn'])
qgene_space_start = min(qfeat['locs'])[0]
qgene_space_end = max(qfeat['locs'])[1]
qgene_space_poly = LineString([(0.0, qgene_space_start), (0.0, qgene_space_end)])
qgene_poly = LineString([(0.0, qfeat['start']), (0.0, qfeat['end'])])
sgene_poly = LineString([(0.0, sfeat['start']), (0.0, sfeat['end'])])
# if intron of one dont need to check other
qcns = LineString([(0,cns['qstart']),(0,cns['qstop'])])
scns = LineString([(0,cns['sstart']),(0,cns['sstop'])])
cns_type(cns,qgene_space_poly, qgene_poly, sgene_poly, scns, qcns,qgene_space_start,qfeat)
create_utr_list(utr_dict,qfeat, cns,"q")
create_utr_list(utr_dict,sfeat, cns,"s")
for cns in cns_dic:
if cns['type'] == "5-prox_dist":
qgene_start = min(utr_dict[cns['qaccn']])
qgene_stop = max(utr_dict[cns['qaccn']])
# sstart = min(utr_dict[cns['saccn']])
# sstop = max(utr_dict[cns['saccn']])
five_diff_pos = abs(qgene_start - cns["qstop"])
five_diff_neg = abs(qgene_stop - cns["qstart"])
if five_diff_pos <=1000 and cns["qstrand"] == "+" or five_diff_neg <=1000 and cns["qstrand"] == "-":
cns["type"] = "5-proximal"
elif five_diff_pos >1000 and cns["qstrand"] == "+" or five_diff_neg >1000 and cns["qstrand"] == "-":
cns["type"] = "5-distal"
elif cns['type'] == "3-prox_dist":
qgene_start = min(utr_dict[cns['qaccn']])
qgene_stop = max(utr_dict[cns['qaccn']])
three_diff_pos = abs(cns["qstart"] - qgene_stop)
three_diff_neg = abs(cns["qstop"] - qgene_start)
if three_diff_pos <=1000 and cns["qstrand"] == "+" or three_diff_neg <=1000 and cns["qstrand"] == "-":
cns["type"] = "3-proximal"
elif three_diff_pos > 1000 and cns["qstrand"] == "+" or three_diff_neg > 1000 and cns["qstrand"] == "-":
cns["type"] = "3-distal"
return cns_dic
示例11: main
def main(cns_file,bedpath,fastapath):
genespace = get_genespace(cns_file)
bed = Bed(bedpath)
f = Fasta(fastapath)
handles = ['3_utr','5_utr','intronic','5_prox','5_distal','3_prox','3_distal']
fhs = open_files(handles)
for gene in genespace.keys():
#cnsspace = genespace[gene]
try:
accn = bed.accn(gene)
except KeyError: continue
cnsspace = [(max(0,accn['start'] - 12000), accn['end'] + 12000)]
#print "GENESPACE {0}".format(cnsspace)
locs = accn['locs']
locs.sort()
cnsspace.sort()
write_to_pos_fasta(bed,accn,locs,cnsspace,fhs,f)
示例12: LocalDups
class LocalDups(object):
def __init__(self,filename,bed):
self.filename = filename
self.bed = Bed(bed)
self.bed.fill_dict()
def get_order_dups(self):
d = {}
for line in open(self.filename):
dupline = DupLine(line)
dups = dupline.get_order(self.bed)
d[dups[0]['accn']] = "P"
for dup in dups[1:]:
d[dup['accn']] = dups[0]['accn']
intervening = dupline.get_interving_genes(self.bed)
for i in intervening:
if i in d.keys():continue
d[i] = "I"
self.filename.close()
return d
def write_ordered(self,out_fh):
"""write localdups to outfile"""
localdup_fh = open(out_fh, "w")
d = {}
for line in open(self.filename):
dupline = DupLine(line)
dups = dupline.get_order(self.bed)
line = "{0}\n".format("\t".join(dups))
localdup_fh.write(line)
localdup_fh.close()
def get_dups(self):
d = {}
for line in open(self.filename):
dupline = DupLine(line)
d[dupline.parent] = 'P'
for dup in dupline.children:
d[dup] = dupline.parent
intervening = dupline.get_interving_genes(self.bed)
for i in intervening:
if i in d.keys(): continue
d[i] = "I"
self.filename.close()
return d
示例13: write_genelist
def write_genelist(q_or_s, outfile, flat, pairs, orthos, mcnss, link_fmt, this_org, other_org,
other_flat, dups, local_dups):
# used in the link_fmt
qorg, sorg = this_org, other_org
fmt = "%(accn)s\t%(seqid)s\t%(start)i\t%(end)i\t%(ortholog)s\t%(ortho_cns)s\t"
fmt +="%(regional_dup_info)s\t%(local_dup_info)s\t%(strand)s\t"
fmt += "%(new_gene_info)s\t%(link)s"
header = fmt.replace('%(', '').replace(')s','').replace(')i','')
outdir = op.dirname(flat.path)
annos = dict([kv.rstrip().split(",") for kv in open("%s/%s_protein_rna.anno" % (outdir, q_or_s))])
if flat.path == other_flat.path:
annos.update(dict([kv.rstrip().split(",") for kv in open("%s/s_protein_rna.anno" % (outdir,))]))
out = open(outfile, 'w')
print >>sys.stderr, "writing genelist to %s" % (outfile,)
print >>out, header.replace('ortho_', other_org + '_')
same_org = this_org == other_org
for feat in flat:
these_pairs = pairs.get(feat['accn'], [])
cnss = mcnss.get(feat['accn'], [])
ortholog, other_pairs = split_pairs(feat, [other_flat.d[t] for t in these_pairs], orthos, q_or_s=='s')
ortho_cns, non_ortho_cns = split_cns(cnss, orthos, q_or_s=='s')
regional_dup_info = dups.get(feat['accn'], '')
local_dup_info = local_dups.get(feat['accn'], '')
if ortholog:
ortho = ortholog[0]
link = link_fmt % dict(qorg=qorg, sorg=sorg,
accn1=ortho['accn'], accn2=feat['accn']
)
else:
link = ''
new_gene_info = ""
if feat['accn'].endswith(("_cns_protein", "_cns_rna")):
try:
new_gene_info = annos[feat['accn']]
except KeyError: # from coannoation of previous run.
pass
ortholog = len(ortholog) and ",".join([o["accn"] for o in ortholog]) or ""
if len(ortho_cns) > 0 and len(ortholog) == 0:
print >>sys.stderr, "\nBAD", feat, "\n", ortho_cns, "\nthese:", these_pairs, "\nother:", other_pairs, "\n\n"
# fell right on the edge of a syntenic block. the cns got in, but not the gene.
#1/0
other_pairs = ",".join([o["accn"] for o in other_pairs])
fmt_dict = locals()
fmt_dict.update(Bed.row_to_dict(feat))
fmt_dict.update({'ortho_cns': len(ortho_cns) if ortholog else "",
'ortho_NON_cns_count': len(non_ortho_cns) if
other_pairs else ""})
print >>out, fmt % fmt_dict
示例14: merge_same_hits
def merge_same_hits(missed, fh_match, org_bed):
""" groups genes that hit more then once """
d = {}
handle = open(fh_match)
matches = handle.read()
org_bed_path = org_bed.path
path = org_bed_path.split('/')
dirc = '/'.join(path[:-1])
org = path[-1]
fh = open('{0}/missed_from_{1}'.format(dirc,org), "wb")
for match in matches.split('\n')[:-1]:
qaccn,saccn = match.split('\t')
#create dictionary
try:
seqid = missed.accn(qaccn)['seqid']
haccn = missed.accn(qaccn)
except KeyError: continue
#if near_gene(haccn,org_bed)==True: continue
if (seqid,saccn) not in d.keys():
#append whole dict to keys
d[(seqid,saccn)]= missed.accn(qaccn)
else:
#else add locs to exsting one
gene_start = min(d[(seqid,saccn)]['locs'])[0]
gene_end = max(d[(seqid,saccn)]['locs'])[1]
missed_end = missed.accn(qaccn)['locs'][0][1]
missed_start = missed.accn(qaccn)['locs'][0][0]
if missed_end < gene_start:
# if no intervening genes and they are close together...
intervening_genes = get_intervening_genes(missed_end,gene_start,seqid, org_bed, d[(seqid,saccn)]['accn'])
if intervening_genes is False:
d[(seqid,saccn)]['locs'] = d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
d[(seqid,saccn)]['start'] = missed_start
if 'Os' in qaccn:
d[seqid,saccn]['accn'] = qaccn
else:
d[(seqid,qaccn)] = missed.accn(qaccn)
elif gene_end < missed_start:
intervening_genes = get_intervening_genes(gene_end,missed_start,seqid, org_bed,d[(seqid,saccn)]["accn"])
if intervening_genes is False:
d[(seqid,saccn)]['locs'] = d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
d[(seqid,saccn)]['end'] = missed_end
if 'Os' in qaccn:
d[seqid,saccn]['accn'] = qaccn
else:
d[(seqid,qaccn)]= missed.accn(qaccn)
else:
d[(seqid,saccn)]['locs'] = d[(seqid,saccn)]['locs'] + missed.accn(qaccn)['locs']
for key in d.keys():
new_row = d[key]['locs'].sort()
row = d[key]
print >>fh, Bed.row_string(row)
示例15: merge
def merge(org_bed, missed, merge_file):
"""creates blast.all file and updates everything"""
merge_fh = open(merge_file, "w")
#cds_missed = missed[missed['ftype'] == 'CDS']
#count = org_bed.shape[0] + missed[missed['ftype'] !='CDS'].shape[0]
new_rows = []
seen_accns = {}
# CDS added to existing gene.
for row_missed in missed:
if row_missed['accn'] in seen_accns: continue
try:
org_bed_row = org_bed.accn(row_missed['accn'])
# it's a CDS
except KeyError:
#its a new gene
new_rows.append(row_missed)
seen_accns[row_missed['accn']] = True
continue
locs_interval = Intersecter()
[locs_interval.add_interval(Feature(start,stop)) for start,stop in org_bed_row['locs']]
for missed_start,missed_end in row_missed['locs']:
if len(locs_interval.find(missed_start,missed_end)) > 0:
# print >>sys.stderr, org_bed_row['accn']
locs_intersects = [(l.start,l.stop) for l in locs_interval.find(missed_start,missed_end)]
[org_bed_row['locs'].remove(locs_intersect) for locs_intersect in locs_intersects]
locs_intersects = set(locs_intersects)
locs_intersects.add((missed_start,missed_end))
locs_start = min([start for start,end in locs_intersects])
locs_end = max([end for start,end in locs_intersects])
org_bed_row['locs'] = org_bed_row['locs'] + [(locs_start,locs_end)]
row_missed['locs'].remove((missed_start,missed_end))
org_bed_row['locs'] = org_bed_row['locs'] + row_missed['locs']
#print >>sys.stderr, "{0},{1}".format(row_missed['accn'], locs)
org_bed_row['locs'].sort()
org_bed_row['start'] = min(min([start for start,end in org_bed_row['locs']]), org_bed_row['start'])
org_bed_row['end'] = max(max([end for start,end in org_bed_row['locs']]), org_bed_row['end'])
new_rows.append(org_bed_row)
seen_accns[org_bed_row['accn']] =True
for org_bed_rw in org_bed:
if org_bed_rw['accn'] not in seen_accns:
new_rows.append(org_bed_rw)
seen_accns[org_bed_rw['accn']] =True
def row_cmp(a,b):
return cmp(a['seqid'], b['seqid']) or cmp(a['start'], b['start'])
new_rows.sort(cmp=row_cmp)
#print >>merge_fh, "\t".join(Bed.names)
for i, row in enumerate(new_rows):
print >>merge_fh, Bed.row_string(row)