本文整理汇总了Python中jcvi.utils.grouper.Grouper.joined方法的典型用法代码示例。如果您正苦于以下问题:Python Grouper.joined方法的具体用法?Python Grouper.joined怎么用?Python Grouper.joined使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcvi.utils.grouper.Grouper
的用法示例。
在下文中一共展示了Grouper.joined方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tandem_main
# 需要导入模块: from jcvi.utils.grouper import Grouper [as 别名]
# 或者: from jcvi.utils.grouper.Grouper import joined [as 别名]
def tandem_main(blast_file, cds_file, bed_file, N=3, P=50, is_self=True, \
evalue=.01, strip_name=".", ofile=sys.stderr, genefam=False):
if genefam:
N = 1e5
# get the sizes for the CDS first
f = Fasta(cds_file)
sizes = dict(f.itersizes())
# retrieve the locations
bed = Bed(bed_file)
order = bed.order
if is_self:
# filter the blast file
g = Grouper()
fp = open(blast_file)
for row in fp:
b = BlastLine(row)
query_len = sizes[b.query]
subject_len = sizes[b.subject]
if b.hitlen < min(query_len, subject_len)*P/100.:
continue
query = gene_name(b.query, strip_name)
subject = gene_name(b.subject, strip_name)
qi, q = order[query]
si, s = order[subject]
if abs(qi - si) <= N and b.evalue <= evalue:
if genefam:
g.join(query, subject)
elif q.seqid == s.seqid:
g.join(query, subject)
else:
homologs = Grouper()
fp = open(blast_file)
for row in fp:
b = BlastLine(row)
query_len = sizes[b.query]
subject_len = sizes[b.subject]
if b.hitlen < min(query_len, subject_len)*P/100.:
continue
if b.evalue > evalue:
continue
query = gene_name(b.query, strip_name)
subject = gene_name(b.subject, strip_name)
homologs.join(query, subject)
if genefam:
g = homologs
else:
g = Grouper()
for i, atom in enumerate(bed):
for x in range(1, N+1):
if all([i-x >= 0, bed[i-x].seqid == atom.seqid, \
homologs.joined(bed[i-x].accn, atom.accn)]):
leni = sizes[bed[i].accn]
lenx = sizes[bed[i-x].accn]
if abs(leni - lenx) > max(leni, lenx)*(1-P/100.):
continue
g.join(bed[i-x].accn, atom.accn)
# dump the grouper
fw = must_open(ofile, "w")
ngenes, nfamilies = 0, 0
families = []
for group in sorted(g):
if len(group) >= 2:
print >>fw, ",".join(sorted(group))
ngenes += len(group)
nfamilies += 1
families.append(sorted(group))
longest_family = max(families, key=lambda x: len(x))
# generate reports
print >>sys.stderr, "Proximal paralogues (dist=%d):" % N
print >>sys.stderr, "Total %d genes in %d families" % (ngenes, nfamilies)
print >>sys.stderr, "Longest families (%d): %s" % (len(longest_family),
",".join(longest_family))
return families