当前位置: 首页>>代码示例>>Python>>正文


Python toolshed.nopen函数代码示例

本文整理汇总了Python中toolshed.nopen函数的典型用法代码示例。如果您正苦于以下问题:Python nopen函数的具体用法?Python nopen怎么用?Python nopen使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了nopen函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(prefix, name_re, min_samples, methylation_files):
    name_re = re.compile(r"%s" % name_re)
    if not prefix.endswith((".", "/")): prefix += "."
    fhm = nopen('{prefix}methylation.txt.gz'.format(prefix=prefix), 'w')
    fhme = nopen('{prefix}methylated.txt.gz'.format(prefix=prefix), 'w')
    fhc = nopen('{prefix}counts.txt.gz'.format(prefix=prefix), 'w')

    def source_from_fname(fname):
        try:
            return name_re.search(fname).groups(0)[0]
        except:
            return op.basename(fname)

    iterables = [gen_iterable(f, source_from_fname) for f in methylation_files]
    sources = [source_from_fname(f) for f in methylation_files]

    fmt = "{chrom}:{start}\t{vals}\n"
    fhm.write("probe\t%s" % "\t".join(sources) + "\n")
    fhc.write("probe\t%s" % "\t".join(sources) + "\n")
    fhme.write("probe\t%s" % "\t".join(sources) + "\n")
    for chrom, start, end, values, counts, meths in bed_merge(iterables, sources):
        if sum(tryfloat(v) > 0 for v in values) < min_samples: continue
        vals = "\t".join(values)
        fhm.write(fmt.format(chrom=chrom, start=start, vals=vals))
        counts = "\t".join(counts)
        fhc.write(fmt.format(chrom=chrom, start=start, vals=counts))
        meths = "\t".join(meths)
        fhme.write(fmt.format(chrom=chrom, start=start, vals=meths))
开发者ID:brentp,项目名称:clustermodel,代码行数:28,代码来源:meth-matrix.py

示例2: main

def main(args):
    tags = {}
    if args.verbose:
        sys.stderr.write(">> reading in tag sequences...\n")
    with nopen(args.tags) as fasta:
        for name, seq in read_fasta(fasta):
            tags[name] = seq
    i = 0
    for fx in args.reads:
        if args.verbose:
            sys.stderr.write(">> processing %s...\n" % op.basename(fx))
        # process either fasta or fastq.
        if ".fasta" in fx or ".fa" in fx:
            with nopen(fx) as fa:
                for f_id, f_seq in read_fasta(fa):
                    i += 1
                    if i % 1000000 == 0 and args.verbose:
                        sys.stderr.write(">> processed %d reads...\n" % i)
                    print_record(tags, f_id, f_seq)
        else:
            with nopen(fx) as fq:
                for f_id, f_seq, f_qual in read_fastq(fq):
                    i += 1
                    if i % 1000000 == 0 and args.verbose:
                        sys.stderr.write(">> processed %d reads...\n" % i)
                    print_record(tags, f_id, f_seq)
开发者ID:brwnj,项目名称:repertoire,代码行数:26,代码来源:find_seeds.py

示例3: main

def main(args):
    # fields from issake
    fields = "contig_id length reads avg_coverage seed v_region j_region".split()
    # the only fields i believe make any sense to keep
    out = "id v_region j_region length reads avg_coverage percent_of_total sequence".split()
    # total reads used in assembly
    total = 0.
    with nopen(args.fasta_in) as fasta:
        for name, seq in read_fasta(fasta):
            name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
            d = dict(zip(fields, name.split("|")))
            total += int(d['reads'])
    with nopen(args.fasta_in) as fasta,\
            open(args.fasta_out, 'wb') as fasta_out,\
            open(args.meta, 'wb') as meta:
        # print header
        meta.write("\t".join(out) + "\n")
        for i, (name, seq) in enumerate(read_fasta(fasta)):
            # remove some text from iSSAKE output
            name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
            d = dict(zip(fields, name.split("|")))
            # want to shorten the read names
            d['id'] = "contig_%d" % i
            d['percent_of_total'] = "%.6g" % (100 * (int(d['reads']) / total))
            d['sequence'] = seq
            meta.write("\t".join(map(str, [d[o] for o in out])) + "\n")
            write_fasta(fasta_out, d['id'], seq.upper())
开发者ID:brwnj,项目名称:repertoire,代码行数:27,代码来源:reads2meta.py

示例4: _set_structure

    def _set_structure(self, structure):
        """
        here, we want to intersect the query and subject bed files with the
        structure.bed file and give each set of intervals in query and bed
        that fall within (or have any overlap with) a unique, fake chromosome
        so that all shuffling is within that chromosome.
        in order to do this, we also have to create a fake genome file that
        contains the lengths of those chromosomes.
        """
        if structure in (None, ""): return
        self.chrom = True # has to be by chromosome.

        n_query_before = sum(1 for _ in nopen(self.query))
        n_subject_before = sum(1 for _ in nopen(self.subject))

        new_genome = open(mktemp(suffix='.fake_genome'), 'w')
        structure = "<(cut -f 1-3 %s)" % structure
        seen_segs = {}
        for bed in ('query', 'subject', 'exclude', 'include'):
            bed_path = getattr(self, "_" + bed, getattr(self, bed))
            if not bed_path: continue
            new_fh = open(mktemp(suffix='%s.fake' % bed), 'w')
            for toks in reader("|bedtools intersect -wo -a %s -b '%s' \
                    | sort -k4,4 -k5,5g" % (structure, bed_path), header=False):
                gtoks, btoks = toks[:3], toks[3:-1] # drop the bp overlap
                new_chrom = "_".join(gtoks)

                gtoks[1:] = map(int, gtoks[1:])
                btoks[1:3] = map(int, btoks[1:3])

                glen = gtoks[2] - gtoks[1] # fake chrom length.
                if new_chrom.startswith('chr'): new_chrom = new_chrom[3:]
                if not new_chrom in seen_segs:
                    # save it in the genome file.
                    print >>new_genome, "\t".join((new_chrom, str(glen)))
                seen_segs[new_chrom] = True

                # with partial overlap, we'll have a negative start or an
                # end outside the genome... for now, just truncate.

                # adjust the interval to its location the new chrom.
                btoks[0] = new_chrom
                btoks[1] = max(0, btoks[1] - gtoks[1]) # don't let it go below 0
                # chop to end of fake chrom.
                btoks[2] = min(btoks[2] - gtoks[1], glen - 1)
                assert 0 <= btoks[1] <= btoks[2] < glen
                btoks[1:3] = map(str, btoks[1:3])
                print >>new_fh, "\t".join(btoks)
            new_fh.close()
            setattr(self, bed, new_fh.name)
        new_genome.close()
        self.genome_file = new_genome.name
开发者ID:brentp,项目名称:shuffler,代码行数:52,代码来源:shuffler.py

示例5: extend_bed

def extend_bed(fin, fout, bases):
    # we're extending both a.bed and b.bed by this distance
    # so divide by 2.
    bases /= 2
    with nopen(fout, 'w') as fh:
        for toks in (l.rstrip("\r\n").split("\t") for l in nopen(fin)):
            toks[1] = max(0, int(toks[1]) - bases)
            toks[2] = max(0, int(toks[2]) + bases)
            if toks[1] > toks[2]:  # negative distances
                toks[1] = toks[2] = (toks[1] + toks[2]) / 2
            assert toks[1] <= toks[2]
            print >>fh, "\t".join(map(str, toks))
    return fh.name
开发者ID:daler,项目名称:poverlap,代码行数:13,代码来源:poverlap.py

示例6: readfx

def readfx(fastx):
    with nopen(fastx) as fp:
        last = None
        while True:
            if not last:
                for l in fp:
                    if l[0] in '>@':
                        last = l[:-1]
                        break
            if not last: break
            name, seqs, last = last[1:].partition(" ")[0], [], None
            for l in fp:
                if l[0] in '@+>':
                    last = l[:-1]
                    break
                seqs.append(l[:-1])
            if not last or last[0] != '+':
                yield name, ''.join(seqs), None
                if not last: break
            else:
                seq, leng, seqs = ''.join(seqs), 0, []
                for l in fp:
                    seqs.append(l[:-1])
                    leng += len(l) - 1
                    if leng >= len(seq):
                        last = None
                        yield name, seq, ''.join(seqs);
                        break
                if last:
                    yield name, seq, None
                    break
开发者ID:hjanime,项目名称:scgc_scripts,代码行数:31,代码来源:fastx.py

示例7: main

def main():
    p = argparse.ArgumentParser(description=__doc__,
                   formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument("-p", dest="pvals", help="BED containing all the p values"
                  " used to generate `regions`")
    p.add_argument("-r", dest="regions", help="BED containing all the regions")
    p.add_argument("-s", dest="step", type=int, default=50,
            help="step size for acf calculation. should be the same "
            " value as the step sent to -d arg for acf")
    p.add_argument("--mlog", dest="mlog", action="store_true",
                   default=False, help="do the correlation on the -log10 of"
                   "the p-values. Default is to do it on the raw values")
    p.add_argument("-N", dest="N", help="number of simulations to perform",
                   type=int, default=0)
    p.add_argument("-c", dest="c", help="column number containing the p-value"
                   " of interest", type=int, default=-1)
    p.add_argument("-z", dest="z", help="use z-score correction",
                    action="store_true")
    args = p.parse_args()
    if not (args.regions and args.pvals):
        import sys
        sys.exit(not p.print_help())
    from toolshed import nopen
    header = nopen(args.regions).next()
    if header.startswith("#") or (not header.split("\t")[2].isdigit()):
        print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)
    return run(args)
开发者ID:ngthomas,项目名称:combined-pvalues,代码行数:27,代码来源:region_p.py

示例8: intersect

def intersect(ref, xref, peaks):
    if xref:
        xref = xref_to_dict(xref)
    # group the output by chr->gene->start
    cmd = ("|bedtools intersect -wb -a {peaks} -b {ref} "
                "| sort -k1,1 -k8,8 -k2,2n").format(**locals())
    cols = ['chrom','start','stop','peak','_chrom',
                '_start','_stop','gene','_score','strand']
    tmp = open(tempfile.mkstemp(suffix=".bed")[1], 'wb')
    for g in grouper(nopen(cmd), cols):
        negs = []
        for i, l in enumerate(unique_everseen(\
                            g, lambda t: ret_item(t, cols, 'peak')), start=1):
            l = lparser(l, cols)
            # negative stranded sites
            if l['strand'] == "-":
                # need to count through them up, saving l each time
                negs.append(l)
                continue
            # positive stranded sites
            print >>tmp, "\t".join(get_out(l, i, xref))
        for i, l in izip(count(len(negs), -1), negs):
            print >>tmp, "\t".join(get_out(l, i, xref))
    tmp.close()
    return tmp.name
开发者ID:jayhesselberth,项目名称:polya,代码行数:25,代码来源:merge_sites.py

示例9: local_shuffle

def local_shuffle(bed, loc='500000'):
    """
    Randomize the location of each interval in `bed` by moving its
    start location to within `loc` bp of its current location or to
    its containing interval in `loc`.

    Arguments:
        bed - input bed file
        loc - shuffle intervals to within this distance (+ or -).
               If not an integer, then this should be a BED file containing
               regions such that each interval in `bed` is shuffled within
               its containing interval in `loc`
    """
    from random import randint
    if str(loc).isdigit():
        dist = abs(int(loc))
        with nopen(bed) as fh:
            for toks in (l.rstrip('\r\n').split('\t') for l in fh):
                d = randint(-dist, dist)
                toks[1:3] = [str(max(0, int(bloc) + d)) for bloc in toks[1:3]]
                print "\t".join(toks)
    else:
        # we are using dist as the windows within which to shuffle
        assert os.path.exists(loc)
        bed4 = mktemp()
        with open(bed4, 'w') as fh:
            # this step is so we don't have to track the number of columns in A
            for toks in reader(bed, header=False):
                fh.write("%s\t%s\n" % ("\t".join(toks[:3]), SEP.join(toks)))

        missing = 0
        # we first find the b-interval that contains each a-interval by
        # using bedtools intersect
        for toks in reader("|bedtools intersect -wao -a {bed4} -b {loc}"
                           .format(**locals()), header=False):
            ajoin = toks[:4]
            a = ajoin[3].split(SEP)  # extract the full interval
            b = toks[4:]

            if int(b[-1]) == 0:
                missing += 1
                continue
            assert a[0] == b[0], ('chroms dont match', a, b)

            alen = int(a[2]) - int(a[1])
            # doesn't care if the new interval is completely contained in b
            astart = randint(int(b[1]), int(b[2]))

            # subtract half the time.
            aend = (astart - alen) if randint(0, 1) == 0 and astart > alen \
                else (astart + alen)

            a[1], a[2] = map(str, (astart, aend) if astart < aend
                             else (aend, astart))

            print "\t".join(a)
        if missing > 0:
            print >> sys.stderr, ("found {missing} intervals in {bed} that "
                                  " were not contained in {loc}"
                                  .format(**locals()))
开发者ID:astatham,项目名称:poverlap,代码行数:60,代码来源:poverlap.py

示例10: main

def main():
    p = argparse.ArgumentParser(description=__doc__,
                   formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument("-p", dest="pvals", help="BED containing all the p values"
                  " used to generate `regions`")
    p.add_argument("-r", dest="regions", help="BED containing all the regions")
    p.add_argument("-s", "--step", dest="step", type=int, default=50,
            help="step size for acf calculation. should be the same "
            " value as the step sent to -d arg for acf")
    p.add_argument("-c", dest="c", help="column number containing the p-value"
                   " of interest", type=str, default=-1)
    p.add_argument("-z", dest="z", help="use z-score correction",
                    action="store_true")
    args = p.parse_args()
    if not (args.regions and args.pvals):
        import sys
        sys.exit(not p.print_help())
    header = ts.nopen(args.regions).next()
    if header.startswith("#") or (not header.split("\t")[2].isdigit()):
        print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)

    header = ts.header(args.pvals)
    if args.c in header:
        args.c = header.index(args.c) + 1
    else:
        args.c = int(args.c)
    return run(args)
开发者ID:JinfengChen,项目名称:combined-pvalues,代码行数:27,代码来源:region_p.py

示例11: run_metric

def run_metric(cmd, metric=None):
    """
    Metric can be a string, e.g. "wc -l" or a python callable that consumes
    lines of input and returns a single value.
    e.g.

    def mymetric(fh):
        val = 0
        for line in fh:
            val += float(line.split("\t")[4])
        return val

    The lines sent to the metric function will be the result of bedtools
    intersect -wa -- so that both the -a and -b intervals will be present
    in each line.
    """

    if metric is None:
        cmd, metric = cmd
    if isinstance(metric, basestring):
        return float(run("%s | %s" % (cmd, metric)))
    else:
        proc = nopen("|%s" % cmd, mode=None)
        res = metric(proc.stdout)
        check_proc(proc, cmd)
        assert isinstance(res, (int, float))
        return res
开发者ID:astatham,项目名称:poverlap,代码行数:27,代码来源:poverlap.py

示例12: read_regions

def read_regions(fregions):
    if not fregions: return None
    regions = {}
    for toks in (l.split("\t") for l in ts.nopen(fregions) if l[0] != "#"):
        if not toks[0] in regions: regions[toks[0]] = []
        regions[toks[0]].append((int(toks[1]), int(toks[2])))
    return regions
开发者ID:JinfengChen,项目名称:combined-pvalues,代码行数:7,代码来源:manhattan.py

示例13: tofile

def tofile(fiter, fname):
    fh = nopen(fname, "w")
    for line in fiter:
        print >>fh, line.rstrip("\r\n")
    fh.close()
    atexit.register(os.unlink, fname)
    return fname
开发者ID:brentp,项目名称:shuffler,代码行数:7,代码来源:__main__.py

示例14: protein

 def protein(self):
     from toolshed import nopen
     l = "http://genome.ucsc.edu/cgi-bin/hgGene?hgg_do_getProteinSeq=1&hgg_gene="
     url = l + self.name
     seq = [x.strip() for x in nopen(url) if x.strip() and
             not ">" in x]
     return "".join(seq)
开发者ID:brentp,项目名称:cruzdb,代码行数:7,代码来源:models.py

示例15: main

def main():
    p = argparse.ArgumentParser(description=__doc__,
            formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument('attrs', help='Cytoscape attributes file of overlapping features')
    args = p.parse_args()
    
    attrsfile = nopen(args.attrs)
    # remove header
    attrsfile.readline()
    
    uniqid = os.path.basename(args.attrs).split(".", 1)[0]
    
    previous_feature = None
    
    for line in attrsfile:
        attr = line.rstrip("\r\n").split("=", 1)
        
        # everything before the "="
        cyto_info = attr[0].strip()
        feature = attr[1].strip().upper()
        fields = (cyto_info, "= True\n")
        
        if previous_feature and previous_feature == feature:
            fileout.write(" ".join(map(str, fields)))
        else:
            fileout = open("%s.%s.eda" % (uniqid, feature), 'w')
            fileout.write("feature%s\n" % feature)
            fileout.write(" ".join(map(str, fields)))
            previous_feature = feature
开发者ID:brwnj,项目名称:cu_projects,代码行数:29,代码来源:features_to_files.py


注:本文中的toolshed.nopen函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。