本文整理汇总了Python中flatfeature.Bed.fill_dict方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.fill_dict方法的具体用法?Python Bed.fill_dict怎么用?Python Bed.fill_dict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类flatfeature.Bed
的用法示例。
在下文中一共展示了Bed.fill_dict方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestAssign
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
class TestAssign(unittest.TestCase):
def setUp(self):
self.cns_filename = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.cns.txt"
self.pairsfile = "data/rice_v6_sorghum_v1/rice_v6_sorghum_v1.pairs.txt"
self.qbed = Bed("data/rice_v6_sorghum_v1/rice_v6.bed") ;self.qbed.fill_dict()
self.sbed = Bed("data/rice_v6_sorghum_v1/sorghum_v1.bed") ;self.sbed.fill_dict()
self.cns_dict, self.evalue_dict = get_cns_dict(self.cns_filename)
self.qpair_map, self.spair_map = make_pair_maps(self.pairsfile, "pair", self.qbed, self.sbed)
def test_get_cns_dict(self):
"""test for test_get_cns_dict"""
#print self.cns_dict.keys()
print "keys!", self.evalue_dict.keys()
def test_assign(self):
assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map)
def test_cns_fmt_dict(self):
for cns, qfeat, sfeat in assign(self.cns_dict, self.qbed, self.sbed, self.qpair_map, self.spair_map):
d = cns_fmt_dict(cns, qfeat, sfeat, self.evalue_dict)
print "dddddddd", d
def test_main(self):
pass
示例2: main
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
def main(cnsfile, qbed_file, sbed_file, pairsfile, pairs_fmt):
qcns_file = qbed_file.replace(".bed", "_cns.gff")
assert qcns_file != qbed_file
qcns_gff = open(qcns_file, 'w')
print >>qcns_gff, "##gff-version 3"
if sbed_file != qbed_file:
scns_file = sbed_file.replace(".bed", "_cns.gff")
assert scns_file != sbed_file
scns_gff = open(scns_file, 'w')
print >>scns_gff, "##gff-version 3"
else:
scns_gff = qcns_gff
qbed = Bed(qbed_file); qbed.fill_dict()
sbed = Bed(sbed_file); sbed.fill_dict()
cnsdict = get_cns_dict(cnsfile)
qpair_map, spair_map = make_pair_maps(pairsfile, pairs_fmt, qbed, sbed)
out = sys.stdout
fmt = "%(cns_id)s,%(qaccn)s,%(qchr)s,%(qstart)i,%(qstop)i,%(qstrand)s," + \
"%(saccn)s,%(schr)s,%(sstart)i,%(sstop)i,%(sstrand)s"
print >>out, "#" + fmt.replace("%(","").replace(")s","").replace(")i","")
for cns, qfeat, sfeat in assign(cnsdict, qbed, sbed, qpair_map, spair_map):
d = cns_fmt_dict(cns, qfeat, sfeat)
d['cns_id'] = cns_id(d)
if d['sstop'] < d['sstart']:
d['sstop'], d['sstart'] = d['sstart'], d['sstop']
print >>out, fmt % d
write_gff(d, qcns_gff, scns_gff)
示例3: TestPseudo
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
class TestPseudo(unittest.TestCase):
def setUp(self):
self.qallbed = Bed("data/rice_v6_setaria64/rice_v6.all.bed", "data/rice_v6_setaria64/rice_v6.fasta")
self.qallbed.fill_dict()
self.sallbed = Bed("data/rice_v6_setaria64/setaria64.all.bed", "data/rice_v6_setaria64/setaria64.fasta")
self.sallbed.fill_dict()
self.saccn = self.sallbed.accn("Si000834m")
blastfh = open("blast_res")
self.blast = blastfh.read()
self.d, self.pseudo = group_cds(self.blast, self.saccn)
def test_group_cds_1(self):
self.assertEqual(len(self.d.keys()), 4)
total_values = []
for key in self.d.keys():
values = len(self.d[key])
total_values.append(values)
self.assertEqual(sum(total_values), 38)
def test_group_cds_2(self):
blast_2fh = open("blast_2")
blast_2 = blast_2fh.read()
d, pseudo = group_cds(blast_2, self.sallbed.accn("Si002524m"))
self.assertEqual(len(d.keys()), 5)
for key in d.keys():
# logging.info('key: {0}'.format(key))
self.assertEqual(1, len(d[key]))
def test_append_to_included_groups(self):
locs = [1, 2, 3, 4]
group_dict = {(2, 5): [], (3, 6): [], (9, 8): []}
result_dict = append_to_included_groups(locs, group_dict)
expected = {(2, 5): [(1, 2, 3, 4)], (3, 6): [(1, 2, 3, 4)], (9, 8): []}
self.assertEquals(expected, result_dict)
def test_remove_crossing_hit(self):
qaccn = self.qallbed.accn("Os01g01890")
for group_key in self.d.keys():
exon_hits = self.d[group_key]
non_crossing = remove_crossing_hits(exon_hits, qaccn, self.saccn)
if len(non_crossing) > 1:
mid, start, stop = bites(non_crossing)
def test_find_orf(self):
qaccn = self.qallbed.accn("Os01g01295")
orf = find_orf(self.qallbed, qaccn)
self.assertEqual(orf + 1, 141084)
def test_find_orf_neg(self):
saccn = self.sallbed.accn("Si001539m")
orf = find_orf(self.sallbed, saccn)
self.assertEqual(orf, 7662777)
示例4: main
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
def main(cnsfile, qbed_file, sbed_file, qorg, sorg, padding):
qbed = Bed(qbed_file); qbed.fill_dict()
sbed = Bed(sbed_file); sbed.fill_dict()
cnsdict = get_cns_dict(cnsfile)
out = sys.stdout
fmt = "%(qaccn)s,%(qchr)s,%(qstart)i,%(qstop)i,%(qstrand)s," + \
"%(saccn)s,%(schr)s,%(sstart)i,%(sstop)i,%(sstrand)s,%(link)s"
print >>out, "#" + fmt.replace("%(","").replace(")s","").replace(")i","")
for cns, qfeat, sfeat in assign(cnsdict, qbed, sbed):
d = cns_fmt_dict(cns, qfeat, sfeat)
d['link'] = assign_url(cns.sstart, cns.schr, cns.qstart, cns.qchr, sorg, qorg, padding)
print >>out, fmt % d
示例5: main
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
def main(cnsfile, qbed_file, sbed_file, pairsfile, pck, qorg, sorg, padding):
qbed = Bed(qbed_file); qbed.fill_dict()
sbed = Bed(sbed_file); sbed.fill_dict()
cnsdict = get_cns_dict(cnsfile)
qpair_map = make_pair_maps(pairsfile, 'pair', qbed, sbed)
out = sys.stdout
fmt = "%(saccn)s,%(saccnL)s,%(saccnR)s,%(schr)s,%(sstart)i,%(sstop)i," + \
"%(qaccn)s,%(qchr)s,%(qstart)i,%(qstop)i,%(link)s"
print >>out, "#" + fmt.replace("%(","").replace(")s","").replace(")i","")
for cns, saccn, saccn_l, saccn_r, qfeat in assign(cnsdict, qbed, qpair_map):
d = cns_fmt_dict(cns, qfeat, saccn, saccn_l, saccn_r)
d['link'] = assign_url(cns.sstart, cns.schr, cns.qstart, cns.qchr,qfeat, pck, sbed, qbed, sorg, qorg, padding)
print >>out, fmt % d
示例6: LocalDups
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
class LocalDups(object):
def __init__(self,filename,bed):
self.filename = filename
self.bed = Bed(bed)
self.bed.fill_dict()
def get_order_dups(self):
d = {}
for line in open(self.filename):
dupline = DupLine(line)
dups = dupline.get_order(self.bed)
d[dups[0]['accn']] = "P"
for dup in dups[1:]:
d[dup['accn']] = dups[0]['accn']
intervening = dupline.get_interving_genes(self.bed)
for i in intervening:
if i in d.keys():continue
d[i] = "I"
self.filename.close()
return d
def write_ordered(self,out_fh):
"""write localdups to outfile"""
localdup_fh = open(out_fh, "w")
d = {}
for line in open(self.filename):
dupline = DupLine(line)
dups = dupline.get_order(self.bed)
line = "{0}\n".format("\t".join(dups))
localdup_fh.write(line)
localdup_fh.close()
def get_dups(self):
d = {}
for line in open(self.filename):
dupline = DupLine(line)
d[dupline.parent] = 'P'
for dup in dupline.children:
d[dup] = dupline.parent
intervening = dupline.get_interving_genes(self.bed)
for i in intervening:
if i in d.keys(): continue
d[i] = "I"
self.filename.close()
return d
示例7: TestMaize
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
class TestMaize(unittest.TestCase):
def setUp(self):
handle = open("/Users/gturco/code/freeling_lab/find_cns_gturco/pipeline/tests/blast_3.txt")
fh = handle.readlines()
self.blast_str = " , ".join(fh)
self.unmasked_fasta = Fasta("/Users/gturco/find_cns/maize_v2_UM.fasta")
self.qbed = Bed("/Users/gturco/rice_maize/rice_v6.bed")
self.qbed.fill_dict()
self.sbed = Bed("/Users/gturco/maize/maize_v2.bed", "/Users/gturco/maize/maize_v2.fasta")
self.sbed.fill_dict()
self.sfeat = self.sbed.accn("GRMZM2G086714")
self.qfeat = self.qbed.accn("Os09g27050")
def test_get_cmd(self):
sfasta = "data/rice_v6_maize_v2/maize_v2_split/2.fasta"
qfasta = "data/rice_v6_maize_v2/rice_v6_split/4.fasta"
def test_parse_balse(self):
orientaion = -1
cns = parse_blast(
self.blast_str, orientaion, self.qfeat, self.sfeat, self.qbed, self.sbed, 12000, 26000, self.unmasked_fasta
)
print cns
示例8: str
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
import optparse
parser = optparse.OptionParser("usage: %prog [options] ")
parser.add_option("-F", dest="mask", help="blast mask simple sequence [default: F]", default="F")
parser.add_option("-n", dest="ncpu", help="parallelize to this many cores", type='int', default=8)
parser.add_option("-q", dest="qfasta", help="path to genomic query fasta")
parser.add_option("--qbed", dest="qbed", help="query bed file")
parser.add_option("-s", dest="sfasta", help="path to genomic subject fasta")
parser.add_option("--sbed", dest="sbed", help="subject bed file")
parser.add_option("-p", dest="pairs", help="the pairs file. output from dagchainer")
choices = ("dag", "cluster", "pair", 'qa', 'raw')
parser.add_option("--pair_fmt", dest="pair_fmt", default='raw',
help="format of the pairs, one of: %s" % str(choices),
choices=choices)
parser.add_option("--qpad", dest="qpad", type='int', default=12000,
help="how far from the end of the query gene to look for cnss")
parser.add_option("--spad", dest="spad", type='int', default=26000,
help="how far from the end of the subject gene to look for cnss")
parser.add_option("--UMfasta", dest="unmasked_fasta", help="path to unmasked fasta file file")
(options, _) = parser.parse_args()
if not (options.qfasta and options.sfasta and options.sbed and options.qbed):
sys.exit(parser.print_help())
qbed = Bed(options.qbed, options.qfasta); qbed.fill_dict()
sbed = Bed(options.sbed, options.sfasta); sbed.fill_dict()
unmasked_fasta = Fasta(options.unmasked_fasta)
assert options.mask in 'FT'
main(qbed, sbed, options.pairs, options.qpad, options.spad, unmasked_fasta, options.pair_fmt, options.mask, options.ncpu)
示例9: int
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
spos = sbed[raw.pos_b]
key = (raw.seqid_a, raw.seqid_b)
if not key in trees: trees[key] = []
qpos = (qpos['start'] + qpos['end']) / 2
spos = (spos['start'] + spos['end']) / 2
trees[key].append((int(qpos), int(spos)))
for k in trees:
trees[k] = cKDTree(trees[k])
return trees
if __name__ == "__main__":
import optparse
parser = optparse.OptionParser()
parser.add_option("--qbed", dest="qbed", help="query bed file")
parser.add_option("--sbed", dest="sbed", help="subject bed file")
parser.add_option("--cns", dest="cns", help="path to raw cns")
parser.add_option("--dist", dest="dist", type='int', help="max dist from gene to cns", default=12000)
parser.add_option("--paralogy", dest="paralogy", help="path to paralogy file")
parser.add_option("--orthology", dest="orthology", help="path to orthology file")
options, args = parser.parse_args()
if not (options.sbed and options.qbed and options.cns, options.orthology):
sys.exit(parser.print_help())
qbed = Bed(options.qbed); qbed.fill_dict()
sbed = Bed(options.sbed); sbed.fill_dict()
qbed_new, sbed_new, new_pairs = main(qbed, sbed, options.cns, options.dist, options.orthology)
write_new_pairs(options.paralogy, options.orthology, qbed, qbed_new, sbed, sbed_new, new_pairs)
示例10: main
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
def main(qbed_path, sbed_path, cnsfile, dist, orthology_path):
"""
here, we remove cnss that have been called proteins/rnas from
the cns list, and add them to the bed files.
AND have to do the preliminary assignment of cnss that remain to the new-genes
that _were_ cnss. the proper assignment is then handled in assign.py
"""
qcns_file = qbed_path.replace(".bed", "_cns.gff")
assert qcns_file != qbed_path
qcns_gff = open(qcns_file, 'w')
print >>qcns_gff, "##gff-version 3"
if sbed_path != qbed_path:
scns_file = sbed_path.replace(".bed", "_cns.gff")
assert scns_file != sbed_path
scns_gff = open(scns_file, 'w')
print >>scns_gff, "##gff-version 3"
else: scns_gff = qcns_gff
qrawbed = RawBed(qbed_path)
srawbed = RawBed(sbed_path)
ortho_trees = read_orthos_to_trees(orthology_path, qrawbed,srawbed)
qbed = Bed(qbed_path); qbed.fill_dict()
sbed = Bed(sbed_path); sbed.fill_dict()
name, ext = op.splitext(cnsfile)
real_cns_fh = open("%s.real%s" % (name, ext), "w")
print >>sys.stderr, "writing to:", real_cns_fh.name
outdir = op.dirname(cnsfile)
print >>real_cns_fh, "#qseqid,qaccn,sseqid,saccn,qstart,qend,sstart,send,eval"
crna = read_cns_to_rna(outdir)
cpro = read_cns_to_protein_exons(outdir)
#cns_items = list(parse_raw_cns(cnsfile))
proteins = collections.defaultdict(list)
rnas = collections.defaultdict(list)
real_cns_items = []
for cnsi in CNS.parse_raw_line(cnsfile):
cns_id = cnsi.cns_id
cns = cnsi.to_dict()
key = (cns['qseqid'], cns['sseqid'])
if cns_id in cpro:
proteins[key].append((cns, cpro[cns_id]))
elif cns_id in crna:
rnas[key].append((cns, crna[cns_id]))
else:
real_cns_items.append((cns_id, cns))
p_trees = fill_tree(proteins)
r_trees = fill_tree(rnas)
def assign_new_names(prs, protein_or_rna):
n = {}
for seqid_pair, li in prs.iteritems():
if not seqid_pair in n: n[seqid_pair] = []
for gnew, info in li[:]:
new_qname = "%(qseqid)s_%(qstart)i_%(qend)i_cns" % gnew
new_sname = "%(sseqid)s_%(sstart)i_%(send)i_cns" % gnew
# and give them both an id so we know they were a pair.
new_qname += "_%s" % (protein_or_rna)
new_sname += "_%s" % (protein_or_rna)
#print >>sys.stderr, gnew['qaccn'], cns["qaccn"]
try:
qstrand = qbed.d[gnew['qaccn']]['strand']
sstrand = sbed.d[gnew['saccn']]['strand']
except:
print >>sys.stderr, gnew
raise
gnew['qaccn'] = new_qname
gnew['saccn'] = new_sname
gnew['qstrand'] = qstrand
gnew['sstrand'] = sstrand
n[seqid_pair].append((gnew, info))
return n
nproteins = assign_new_names(proteins, "protein")
nrnas = assign_new_names(rnas, "rna")
cns_seen = {}
# go through the remaining cnss, print and assign them to the new
# genes (previously cnss) in within dist.
for cns_id, cns in real_cns_items:
print >>real_cns_fh, cns_to_str(cns)
key = (cns['qseqid'], cns['sseqid'])
for pnew, info in get_new(cns, p_trees, key, nproteins, dist + 1000):
cns['qaccn'] = pnew['qaccn']
cns['saccn'] = pnew['saccn']
cns_str = cns_to_str(cns)
if cns_str in cns_seen: continue
cns_seen[cns_str] = 1
print >>real_cns_fh, cns_str
for rnew, info in get_new(cns, r_trees, key, nrnas, dist + 1000):
cns['qaccn'] = rnew['qaccn']
cns['saccn'] = rnew['saccn']
cns_str = cns_to_str(cns)
if cns_str in cns_seen: continue
cns_seen[cns_str] = 1
print >>real_cns_fh, cns_str
#.........这里部分代码省略.........
示例11: test_main
# 需要导入模块: from flatfeature import Bed [as 别名]
# 或者: from flatfeature.Bed import fill_dict [as 别名]
def test_main(self):
"""test for test_get_cns_dict"""
qbed = Bed(self.qbed, self.qfasta); qbed.fill_dict()
sbed = Bed(self.sbed, self.sfasta); sbed.fill_dict()
x = main(qbed, sbed, self.pairs, 12000,12000, "pair", self.blast_path, "T",2)
print x