本文整理汇总了Python中CGAT.Bed.readAndIndex方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.readAndIndex方法的具体用法?Python Bed.readAndIndex怎么用?Python Bed.readAndIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Bed
的用法示例。
在下文中一共展示了Bed.readAndIndex方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: annotateCpGIslands
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def annotateCpGIslands( infiles, outfile ):
'''annotate transcript by absence/presence of CpG islands
'''
cpgfile, tssfile = infiles
cpg = Bed.readAndIndex( IOTools.openFile( cpgfile ) )
extension_upstream = PARAMS["cpg_search_upstream"]
extension_downstream = PARAMS["cpg_search_downstream"]
c = E.Counter()
outf = IOTools.openFile( outfile, "w" )
outf.write("transcript_id\tstrand\tstart\tend\trelative_start\trelative_end\n" )
for tss in Bed.iterator(IOTools.openFile( tssfile ) ):
c.tss_total += 1
if tss.strand == "+":
start, end = tss.start - extension_upstream, tss.start + extension_downstream
else:
start, end = tss.end - extension_downstream, tss.end + extension_upstream
try:
matches = list(cpg[tss.contig].find( start, end ))
except KeyError:
c.promotor_without_matches += 1
continue
if len(matches) == 0:
c.promotor_without_matches += 1
continue
c.promotor_output += 1
for match in matches:
c.matches_total += 1
genome_start, genome_end, x = match
l = genome_end - genome_start
# get relative location of match
if tss.strand == "+":
relative_start = genome_start - tss.start
else:
relative_start = tss.end - genome_end
relative_end = relative_start + l
outf.write( "\t".join( map(str, (
tss.name, tss.strand,
genome_start, genome_end,
relative_start, relative_end ))) + "\n" )
c.matches_output += 1
outf.close()
with IOTools.openFile( outfile + ".summary", "w" ) as outf:
outf.write ("category\tcounts\n" )
outf.write( c.asTable() + "\n" )
E.info( c )
示例2: main
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if not argv:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(
version="%prog version: $Id: bed2graph.py 2861 2010-02-23 17:36:32Z andreas $", usage=globals()["__doc__"])
parser.add_option("-o", "--output-section", dest="output", type="choice",
choices=("full", "name"),
help="output either ``full`` overlapping entries, only the ``name``s. [default=%default].")
parser.set_defaults(
output="full",
)
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
if len(args) != 2:
raise ValueError("two arguments required")
if args[0] == "-":
infile1 = options.stdin
else:
infile1 = IOTools.openFile(args[0], "r")
infile2 = IOTools.openFile(args[1], "r")
idx = Bed.readAndIndex(infile2, with_values=True)
output = options.output
outfile = options.stdout
if output == "name":
outfile.write("name1\tname2\n")
outf = lambda x: x.fields[0]
else:
outf = str
for bed in Bed.iterator(infile1):
try:
overlaps = idx[bed.contig].find(bed.start, bed.end)
except (KeyError, IndexError):
# ignore missing contig and zero length intervals
continue
for o in overlaps:
outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n")
E.Stop()
示例3: __init__
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def __init__(self, filename, *args, **kwargs ):
assert filename != None, "please supply filename for CounterOverlap"
Counter.__init__(self, *args, **kwargs )
self.filename = filename
E.info( "reading intervals from %s" % self.filename )
self.index = Bed.readAndIndex( IOTools.openFile( self.filename, "r"),
per_track = True )
E.info( "read intervals for %s tracks" % len(self.index) )
self.tracks = self.index.keys()
self.headers = []
for track in self.tracks:
self.headers.extend( ["%s_nover" % track, "%s_bases" % track] )
示例4: buildIndex
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def buildIndex(self, filename):
return Bed.readAndIndex(IOTools.openFile(filename, "r"))
示例5: __init__
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def __init__(self, filename):
self.mIndices = Bed.readAndIndex(IOTools.openFile(filename, "r"),
per_track=True)
示例6: main
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if argv is None:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id$",
usage=globals()["__doc__"])
parser.add_option("-m","--method", dest="method", type="choice",
choices=["ave_dist","min_dist","corr"],
default="min_dist",
help="Method for calcuating similarity between profiles")
parser.add_option("-s", "--spread", dest="spread", type="int",
default=10,
help="Amount to spread each tag by")
parser.add_option("-k", "--keep-dist", dest="keep_dist",
action="store_true",
help="Keep the distribution of tag depths")
parser.add_option("-r", "--rands", dest="rands",
default=100,
help="Number of randomisations to use for calculating"
" mean and stdev of distance")
# add common options (-h/--help, ...) and parse command line
(options, args) = E.Start(parser, argv=argv)
profile1_file, profile2_file = args
profile1_file = pysam.AlignmentFile(profile1_file)
if profile2_file.endswith("bed") or profile2_file.endswith("bed.gz"):
profile2_file = Bed.readAndIndex(profile2_file, with_values=True)
profile2_counter = bed_counter
else:
profile2_file = pysam.AlignmentFile(profile2_file)
profile2_counter = iCLIP.count_intervals
if options.method=="min_dist":
distance_func = iCLIP.findMinDistance
elif options.method=="ave_dist":
distance_func = iCLIP.calcAverageDistance
else:
def distance_func(profile1,profile2):
return iCLIP.corr_profile(profile1,profile2, options.spread, profile2_ready=True)
for exon in GTF.iterator(options.stdin):
if exon.feature != "exon":
continue
contig = exon.contig
strand = exon.strand
transcript_id = exon.transcript_id
start = exon.start
end = exon.end
profile1 = iCLIP.count_intervals(profile1_file,
[(start, end)],
contig=contig,
strand=strand)
profile2 = profile2_counter(profile2_file,
[(start, end)],
contig=contig,
strand=strand)
if profile1.sum() == 0 or profile2.sum() == 0:
z = "NA"
distance = "NA"
options.stdout.write(
"%(contig)s\t%(start)i\t%(end)i\t%(transcript_id)s\t%(strand)s\t%(distance)s\t%(z)s\n" % locals())
continue
if options.method=="corr":
profile2 = iCLIP.spread(profile2, options.spread)
distance = distance_func(profile1, profile2)
rands = iCLIP.rand_apply(profile=profile1,
exon=exon,
n=options.rands,
func=distance_func,
keep_dist=options.keep_dist,
profile2=profile2)
z = (distance - rands.mean())/rands.std()
options.stdout.write(
"%(contig)s\t%(start)i\t%(end)i\t%(transcript_id)s\t%(strand)s\t%(distance).3f\t%(z).2f\n" % locals())
# write footer and output benchmark information.
E.Stop()
示例7: countInteractions
# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def countInteractions(reads, digest, probe_fragments, outfile,
metrics_file):
reads = pysam.AlignmentFile(reads)
digest_intervals = Bed.readAndIndex(
IOTools.openFile(digest), with_values="name")
probe_fragments = Bed.readAndIndex(
IOTools.openFile(probe_fragments), with_values="name")
c = collections.Counter()
results = collections.defaultdict(int)
contigs = digest_intervals.keys()
rejects=pysam.AlignmentFile(outfile + ".rejects.bam", "wb", template=reads)
E.debug("Starting Counting Run...")
for fragment in readBundle(reads):
if c["fragments"] % 1000 == 0:
E.debug("\t".join(["%s:\t%s" % x for x in c.iteritems()]))
c["fragments"] += 1
c["reads"] += len(fragment)
def _get_read_position(aln):
if aln.is_reverse:
c = aln.cigartuples[::-1]
else:
c = aln.cigartuples
pos = 0
for op, length in c:
if op == 0:
return pos
else:
pos += length
return pos
def _get_first(alignments):
'''find alignment that maps earliest part of read'''
slist = sorted(alignments, key=lambda x: _get_read_position(x))
return (slist[0], slist[1:])
def _get_probes(read):
try:
m = list(
probe_fragments[reads.getrname(read.reference_id)].find(
read.pos, read.aend))
except KeyError:
return []
return m
first_read = [aln for aln in fragment if aln.is_read1]
second_read = [aln for aln in fragment if aln.is_read2]
if len(first_read) == 0 or len(second_read) == 0:
c["Unpaired"] += 1
continue
assert len(first_read) + len(second_read) == len(fragment)
primary_aln, other_aln = zip(_get_first(first_read),
_get_first(second_read))
other_aln = sum(other_aln, [])
probe_matches = [_get_probes(read) for read in primary_aln]
if len(sum(probe_matches, [])) == 0:
c["no_probe"] += 1
for read in fragment:
rejects.write(read)
continue
other_matches = set(sum([
list(digest_intervals[reads.getrname(read.reference_id)].find(
read.pos, read.aend))
for read in other_aln
if reads.getrname(read.reference_id) in contigs],[]))
primary_matches = set(sum([
list(digest_intervals[reads.getrname(read.reference_id)].find(
read.pos, read.aend))
for read in primary_aln
if reads.getrname(read.reference_id) in contigs],[]))
if len(primary_matches) > 2:
c["multi-hit"] += 1
continue
if not all([match in primary_matches for match in other_matches]):
c["multi-hit"] += 1
continue
primary_matches = list(primary_matches)
if len(primary_matches) == 2:
results[(primary_matches[0][2], primary_matches[1][2])] += 1
results[(primary_matches[1][2], primary_matches[0][2])] += 1
elif len(primary_matches) == 1:
results[(primary_matches[0][2], primary_matches[0][2])] += 1
else:
#.........这里部分代码省略.........