本文整理汇总了Python中toolshed.nopen函数的典型用法代码示例。如果您正苦于以下问题:Python nopen函数的具体用法?Python nopen怎么用?Python nopen使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了nopen函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(prefix, name_re, min_samples, methylation_files):
name_re = re.compile(r"%s" % name_re)
if not prefix.endswith((".", "/")): prefix += "."
fhm = nopen('{prefix}methylation.txt.gz'.format(prefix=prefix), 'w')
fhme = nopen('{prefix}methylated.txt.gz'.format(prefix=prefix), 'w')
fhc = nopen('{prefix}counts.txt.gz'.format(prefix=prefix), 'w')
def source_from_fname(fname):
try:
return name_re.search(fname).groups(0)[0]
except:
return op.basename(fname)
iterables = [gen_iterable(f, source_from_fname) for f in methylation_files]
sources = [source_from_fname(f) for f in methylation_files]
fmt = "{chrom}:{start}\t{vals}\n"
fhm.write("probe\t%s" % "\t".join(sources) + "\n")
fhc.write("probe\t%s" % "\t".join(sources) + "\n")
fhme.write("probe\t%s" % "\t".join(sources) + "\n")
for chrom, start, end, values, counts, meths in bed_merge(iterables, sources):
if sum(tryfloat(v) > 0 for v in values) < min_samples: continue
vals = "\t".join(values)
fhm.write(fmt.format(chrom=chrom, start=start, vals=vals))
counts = "\t".join(counts)
fhc.write(fmt.format(chrom=chrom, start=start, vals=counts))
meths = "\t".join(meths)
fhme.write(fmt.format(chrom=chrom, start=start, vals=meths))
示例2: main
def main(args):
tags = {}
if args.verbose:
sys.stderr.write(">> reading in tag sequences...\n")
with nopen(args.tags) as fasta:
for name, seq in read_fasta(fasta):
tags[name] = seq
i = 0
for fx in args.reads:
if args.verbose:
sys.stderr.write(">> processing %s...\n" % op.basename(fx))
# process either fasta or fastq.
if ".fasta" in fx or ".fa" in fx:
with nopen(fx) as fa:
for f_id, f_seq in read_fasta(fa):
i += 1
if i % 1000000 == 0 and args.verbose:
sys.stderr.write(">> processed %d reads...\n" % i)
print_record(tags, f_id, f_seq)
else:
with nopen(fx) as fq:
for f_id, f_seq, f_qual in read_fastq(fq):
i += 1
if i % 1000000 == 0 and args.verbose:
sys.stderr.write(">> processed %d reads...\n" % i)
print_record(tags, f_id, f_seq)
示例3: main
def main(args):
# fields from issake
fields = "contig_id length reads avg_coverage seed v_region j_region".split()
# the only fields i believe make any sense to keep
out = "id v_region j_region length reads avg_coverage percent_of_total sequence".split()
# total reads used in assembly
total = 0.
with nopen(args.fasta_in) as fasta:
for name, seq in read_fasta(fasta):
name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
d = dict(zip(fields, name.split("|")))
total += int(d['reads'])
with nopen(args.fasta_in) as fasta,\
open(args.fasta_out, 'wb') as fasta_out,\
open(args.meta, 'wb') as meta:
# print header
meta.write("\t".join(out) + "\n")
for i, (name, seq) in enumerate(read_fasta(fasta)):
# remove some text from iSSAKE output
name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
d = dict(zip(fields, name.split("|")))
# want to shorten the read names
d['id'] = "contig_%d" % i
d['percent_of_total'] = "%.6g" % (100 * (int(d['reads']) / total))
d['sequence'] = seq
meta.write("\t".join(map(str, [d[o] for o in out])) + "\n")
write_fasta(fasta_out, d['id'], seq.upper())
示例4: _set_structure
def _set_structure(self, structure):
"""
here, we want to intersect the query and subject bed files with the
structure.bed file and give each set of intervals in query and bed
that fall within (or have any overlap with) a unique, fake chromosome
so that all shuffling is within that chromosome.
in order to do this, we also have to create a fake genome file that
contains the lengths of those chromosomes.
"""
if structure in (None, ""): return
self.chrom = True # has to be by chromosome.
n_query_before = sum(1 for _ in nopen(self.query))
n_subject_before = sum(1 for _ in nopen(self.subject))
new_genome = open(mktemp(suffix='.fake_genome'), 'w')
structure = "<(cut -f 1-3 %s)" % structure
seen_segs = {}
for bed in ('query', 'subject', 'exclude', 'include'):
bed_path = getattr(self, "_" + bed, getattr(self, bed))
if not bed_path: continue
new_fh = open(mktemp(suffix='%s.fake' % bed), 'w')
for toks in reader("|bedtools intersect -wo -a %s -b '%s' \
| sort -k4,4 -k5,5g" % (structure, bed_path), header=False):
gtoks, btoks = toks[:3], toks[3:-1] # drop the bp overlap
new_chrom = "_".join(gtoks)
gtoks[1:] = map(int, gtoks[1:])
btoks[1:3] = map(int, btoks[1:3])
glen = gtoks[2] - gtoks[1] # fake chrom length.
if new_chrom.startswith('chr'): new_chrom = new_chrom[3:]
if not new_chrom in seen_segs:
# save it in the genome file.
print >>new_genome, "\t".join((new_chrom, str(glen)))
seen_segs[new_chrom] = True
# with partial overlap, we'll have a negative start or an
# end outside the genome... for now, just truncate.
# adjust the interval to its location the new chrom.
btoks[0] = new_chrom
btoks[1] = max(0, btoks[1] - gtoks[1]) # don't let it go below 0
# chop to end of fake chrom.
btoks[2] = min(btoks[2] - gtoks[1], glen - 1)
assert 0 <= btoks[1] <= btoks[2] < glen
btoks[1:3] = map(str, btoks[1:3])
print >>new_fh, "\t".join(btoks)
new_fh.close()
setattr(self, bed, new_fh.name)
new_genome.close()
self.genome_file = new_genome.name
示例5: extend_bed
def extend_bed(fin, fout, bases):
# we're extending both a.bed and b.bed by this distance
# so divide by 2.
bases /= 2
with nopen(fout, 'w') as fh:
for toks in (l.rstrip("\r\n").split("\t") for l in nopen(fin)):
toks[1] = max(0, int(toks[1]) - bases)
toks[2] = max(0, int(toks[2]) + bases)
if toks[1] > toks[2]: # negative distances
toks[1] = toks[2] = (toks[1] + toks[2]) / 2
assert toks[1] <= toks[2]
print >>fh, "\t".join(map(str, toks))
return fh.name
示例6: readfx
def readfx(fastx):
with nopen(fastx) as fp:
last = None
while True:
if not last:
for l in fp:
if l[0] in '>@':
last = l[:-1]
break
if not last: break
name, seqs, last = last[1:].partition(" ")[0], [], None
for l in fp:
if l[0] in '@+>':
last = l[:-1]
break
seqs.append(l[:-1])
if not last or last[0] != '+':
yield name, ''.join(seqs), None
if not last: break
else:
seq, leng, seqs = ''.join(seqs), 0, []
for l in fp:
seqs.append(l[:-1])
leng += len(l) - 1
if leng >= len(seq):
last = None
yield name, seq, ''.join(seqs);
break
if last:
yield name, seq, None
break
示例7: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("-p", dest="pvals", help="BED containing all the p values"
" used to generate `regions`")
p.add_argument("-r", dest="regions", help="BED containing all the regions")
p.add_argument("-s", dest="step", type=int, default=50,
help="step size for acf calculation. should be the same "
" value as the step sent to -d arg for acf")
p.add_argument("--mlog", dest="mlog", action="store_true",
default=False, help="do the correlation on the -log10 of"
"the p-values. Default is to do it on the raw values")
p.add_argument("-N", dest="N", help="number of simulations to perform",
type=int, default=0)
p.add_argument("-c", dest="c", help="column number containing the p-value"
" of interest", type=int, default=-1)
p.add_argument("-z", dest="z", help="use z-score correction",
action="store_true")
args = p.parse_args()
if not (args.regions and args.pvals):
import sys
sys.exit(not p.print_help())
from toolshed import nopen
header = nopen(args.regions).next()
if header.startswith("#") or (not header.split("\t")[2].isdigit()):
print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)
return run(args)
示例8: intersect
def intersect(ref, xref, peaks):
if xref:
xref = xref_to_dict(xref)
# group the output by chr->gene->start
cmd = ("|bedtools intersect -wb -a {peaks} -b {ref} "
"| sort -k1,1 -k8,8 -k2,2n").format(**locals())
cols = ['chrom','start','stop','peak','_chrom',
'_start','_stop','gene','_score','strand']
tmp = open(tempfile.mkstemp(suffix=".bed")[1], 'wb')
for g in grouper(nopen(cmd), cols):
negs = []
for i, l in enumerate(unique_everseen(\
g, lambda t: ret_item(t, cols, 'peak')), start=1):
l = lparser(l, cols)
# negative stranded sites
if l['strand'] == "-":
# need to count through them up, saving l each time
negs.append(l)
continue
# positive stranded sites
print >>tmp, "\t".join(get_out(l, i, xref))
for i, l in izip(count(len(negs), -1), negs):
print >>tmp, "\t".join(get_out(l, i, xref))
tmp.close()
return tmp.name
示例9: local_shuffle
def local_shuffle(bed, loc='500000'):
"""
Randomize the location of each interval in `bed` by moving its
start location to within `loc` bp of its current location or to
its containing interval in `loc`.
Arguments:
bed - input bed file
loc - shuffle intervals to within this distance (+ or -).
If not an integer, then this should be a BED file containing
regions such that each interval in `bed` is shuffled within
its containing interval in `loc`
"""
from random import randint
if str(loc).isdigit():
dist = abs(int(loc))
with nopen(bed) as fh:
for toks in (l.rstrip('\r\n').split('\t') for l in fh):
d = randint(-dist, dist)
toks[1:3] = [str(max(0, int(bloc) + d)) for bloc in toks[1:3]]
print "\t".join(toks)
else:
# we are using dist as the windows within which to shuffle
assert os.path.exists(loc)
bed4 = mktemp()
with open(bed4, 'w') as fh:
# this step is so we don't have to track the number of columns in A
for toks in reader(bed, header=False):
fh.write("%s\t%s\n" % ("\t".join(toks[:3]), SEP.join(toks)))
missing = 0
# we first find the b-interval that contains each a-interval by
# using bedtools intersect
for toks in reader("|bedtools intersect -wao -a {bed4} -b {loc}"
.format(**locals()), header=False):
ajoin = toks[:4]
a = ajoin[3].split(SEP) # extract the full interval
b = toks[4:]
if int(b[-1]) == 0:
missing += 1
continue
assert a[0] == b[0], ('chroms dont match', a, b)
alen = int(a[2]) - int(a[1])
# doesn't care if the new interval is completely contained in b
astart = randint(int(b[1]), int(b[2]))
# subtract half the time.
aend = (astart - alen) if randint(0, 1) == 0 and astart > alen \
else (astart + alen)
a[1], a[2] = map(str, (astart, aend) if astart < aend
else (aend, astart))
print "\t".join(a)
if missing > 0:
print >> sys.stderr, ("found {missing} intervals in {bed} that "
" were not contained in {loc}"
.format(**locals()))
示例10: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("-p", dest="pvals", help="BED containing all the p values"
" used to generate `regions`")
p.add_argument("-r", dest="regions", help="BED containing all the regions")
p.add_argument("-s", "--step", dest="step", type=int, default=50,
help="step size for acf calculation. should be the same "
" value as the step sent to -d arg for acf")
p.add_argument("-c", dest="c", help="column number containing the p-value"
" of interest", type=str, default=-1)
p.add_argument("-z", dest="z", help="use z-score correction",
action="store_true")
args = p.parse_args()
if not (args.regions and args.pvals):
import sys
sys.exit(not p.print_help())
header = ts.nopen(args.regions).next()
if header.startswith("#") or (not header.split("\t")[2].isdigit()):
print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)
header = ts.header(args.pvals)
if args.c in header:
args.c = header.index(args.c) + 1
else:
args.c = int(args.c)
return run(args)
示例11: run_metric
def run_metric(cmd, metric=None):
"""
Metric can be a string, e.g. "wc -l" or a python callable that consumes
lines of input and returns a single value.
e.g.
def mymetric(fh):
val = 0
for line in fh:
val += float(line.split("\t")[4])
return val
The lines sent to the metric function will be the result of bedtools
intersect -wa -- so that both the -a and -b intervals will be present
in each line.
"""
if metric is None:
cmd, metric = cmd
if isinstance(metric, basestring):
return float(run("%s | %s" % (cmd, metric)))
else:
proc = nopen("|%s" % cmd, mode=None)
res = metric(proc.stdout)
check_proc(proc, cmd)
assert isinstance(res, (int, float))
return res
示例12: read_regions
def read_regions(fregions):
if not fregions: return None
regions = {}
for toks in (l.split("\t") for l in ts.nopen(fregions) if l[0] != "#"):
if not toks[0] in regions: regions[toks[0]] = []
regions[toks[0]].append((int(toks[1]), int(toks[2])))
return regions
示例13: tofile
def tofile(fiter, fname):
fh = nopen(fname, "w")
for line in fiter:
print >>fh, line.rstrip("\r\n")
fh.close()
atexit.register(os.unlink, fname)
return fname
示例14: protein
def protein(self):
from toolshed import nopen
l = "http://genome.ucsc.edu/cgi-bin/hgGene?hgg_do_getProteinSeq=1&hgg_gene="
url = l + self.name
seq = [x.strip() for x in nopen(url) if x.strip() and
not ">" in x]
return "".join(seq)
示例15: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('attrs', help='Cytoscape attributes file of overlapping features')
args = p.parse_args()
attrsfile = nopen(args.attrs)
# remove header
attrsfile.readline()
uniqid = os.path.basename(args.attrs).split(".", 1)[0]
previous_feature = None
for line in attrsfile:
attr = line.rstrip("\r\n").split("=", 1)
# everything before the "="
cyto_info = attr[0].strip()
feature = attr[1].strip().upper()
fields = (cyto_info, "= True\n")
if previous_feature and previous_feature == feature:
fileout.write(" ".join(map(str, fields)))
else:
fileout = open("%s.%s.eda" % (uniqid, feature), 'w')
fileout.write("feature%s\n" % feature)
fileout.write(" ".join(map(str, fields)))
previous_feature = feature