当前位置: 首页>>代码示例>>Python>>正文


Python toolz.partition_all函数代码示例

本文整理汇总了Python中toolz.partition_all函数的典型用法代码示例。如果您正苦于以下问题:Python partition_all函数的具体用法?Python partition_all怎么用?Python partition_all使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了partition_all函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cb_filter

def cb_filter(fastq, bc1, bc2, cores, nedit):
    ''' Filters reads with non-matching barcodes
    Expects formatted fastq files.
    '''

    bc1 = set(cb.strip() for cb in bc1)
    if bc2:
        bc2 = set(cb.strip() for cb in bc2)

    if nedit == 0:
        filter_cb = partial(exact_barcode_filter, bc1=bc1, bc2=bc2)
    else:
        bc1hash = MutationHash(bc1, nedit)
        bc2hash = None
        if bc2:
            bc2hash = MutationHash(bc2, nedit)
        filter_cb = partial(correcting_barcode_filter, bc1hash=bc1hash,
                            bc2hash=bc2hash)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, stream_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_cb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:roryk,项目名称:umis,代码行数:26,代码来源:umis.py

示例2: partial_reduce

def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None):
    """Partial reduction across multiple axes.

    Parameters
    ----------
    func : function
    x : Array
    split_every : dict
        Maximum reduction block sizes in each dimension.

    Example
    -------
    Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th
    dimension, and 3 blocks in the 2nd dimension:

    >>> partial_reduce(np.min, x, {0: 1, 2: 3})    # doctest: +SKIP
    """
    name = name or 'p_reduce-' + tokenize(func, x, split_every, keepdims, dtype)
    parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n)
             in enumerate(x.numblocks)]
    keys = product(*map(range, map(len, parts)))
    out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i
                  in split_every else c for (i, c) in enumerate(x.chunks)]
    if not keepdims:
        out_axis = [i for i in range(x.ndim) if i not in split_every]
        getter = lambda k: get(out_axis, k)
        keys = map(getter, keys)
        out_chunks = list(getter(out_chunks))
    dsk = {}
    for k, p in zip(keys, product(*parts)):
        decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1)
        dummy = dict(i for i in enumerate(p) if i[0] not in decided)
        g = lol_tuples((x.name,), range(x.ndim), decided, dummy)
        dsk[(name,) + k] = (func, g)
    return Array(merge(dsk, x.dask), name, out_chunks, dtype=dtype)
开发者ID:jcorbin,项目名称:dask,代码行数:35,代码来源:reductions.py

示例3: mb_filter

def mb_filter(fastq, cores):
    ''' Filters umis with non-ACGT bases
    Expects formatted fastq files.
    '''
    filter_mb = partial(umi_filter)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_mb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:13,代码来源:umis.py

示例4: fastqtransform

def fastqtransform(transform, fastq1, fastq2, separate_cb, demuxed_cb,
                   dual_index, cores, min_length):
    ''' Transform input reads to the tagcounts compatible read layout using
    regular expressions as defined in a transform file. Outputs new format to
    stdout.
    '''
    if dual_index and separate_cb:
        read_template = '{name}:CELL_{CB1}-{CB2}:UMI_{MB}\n{seq}\n+\n{qual}\n'
    else:
        read_template = '{name}:CELL_{CB}:UMI_{MB}\n{seq}\n+\n{qual}\n'

    transform = json.load(open(transform))
    read1_regex = re.compile(transform['read1'])
    read2_regex = re.compile(transform['read2']) if fastq2 else None

    fastq1_fh = open(fastq1)
    if fastq1.endswith('gz'):
        fastq1_fh = gzip.GzipFile(fileobj=fastq1_fh)

    fastq_file1 = stream_fastq(fastq1_fh)

    if fastq2:
        fastq2_fh = open(fastq2)
        if fastq2.endswith('gz'):
            fastq2_fh = gzip.GzipFile(fileobj=fastq2_fh)

        fastq_file2 = stream_fastq(fastq2_fh)

    else:
        fastq_file2 = itertools.cycle((None,))

    transform = partial(transformer, read1_regex=read1_regex,
                          read2_regex=read2_regex, paired=fastq2)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, itertools.izip(fastq_file1, fastq_file2))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(transform, list(bigchunk)):
            for read1_dict in chunk:
                if dual_index:
                    if not separate_cb:
                        read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']

                if demuxed_cb:
                    read1_dict['CB'] = demuxed_cb

                # Deal with spaces in read names
                read1_dict['name'] = read1_dict['name'].partition(' ')[0]
                if len(read1_dict['seq']) >= min_length:
                    sys.stdout.write(read_template.format(**read1_dict))
开发者ID:flying-sheep,项目名称:umis,代码行数:51,代码来源:umis.py

示例5: add_uid

def add_uid(fastq, cores):
    ''' Adds UID:[samplebc cellbc umi] to readname for umi-tools deduplication
    Expects formatted fastq files with correct sample and cell barcodes.
    '''

    uids = partial(append_uids)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(uids, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:14,代码来源:umis.py

示例6: iterator_to_DataFrame_chunks

def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
    seq2 = partition_all(chunksize, seq)

    if kwargs.get('add_index'):
        mkindex = _add_index
    else:
        mkindex = _ignore_index

    try:
        first, rest = next(seq2), seq2
    except StopIteration:
        def _():
            yield convert(pd.DataFrame, [], **kwargs)
    else:
        df = convert(pd.DataFrame, first, **kwargs)
        df1, n1 = mkindex(df, 0)

        def _():
            n = n1
            yield df1
            for i in rest:
                df = convert(pd.DataFrame, i, **kwargs)
                df, n = mkindex(df, n)
                yield df
    return chunks(pd.DataFrame)(_)
开发者ID:EGQM,项目名称:odo,代码行数:25,代码来源:convert.py

示例7: iterator_to_DataFrame_chunks

def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
    seq2 = partition_all(chunksize, seq)

    add_index = kwargs.get('add_index', False)
    if not add_index:
        # Simple, we can dispatch to dask...
        f = lambda d: convert(pd.DataFrame, d, **kwargs)
        data = [partial(f, d) for d in seq2]
        if not data:
            data = [convert(pd.DataFrame, [], **kwargs)]
        return chunks(pd.DataFrame)(data)

    # TODO: Decide whether we should support the `add_index` flag at all.
    # If so, we need to post-process the converted DataFrame objects sequencially,
    # so we can't parallelize the process.
    try:
        first, rest = next(seq2), seq2
    except StopIteration:
        def _():
            yield convert(pd.DataFrame, [], **kwargs)
    else:
        df = convert(pd.DataFrame, first, **kwargs)
        df1, n1 = _add_index(df, 0)

        def _():
            n = n1
            yield df1
            for i in rest:
                df = convert(pd.DataFrame, i, **kwargs)
                df, n = _add_index(df, n)
                yield df
    return chunks(pd.DataFrame)(_)
开发者ID:jdmcbr,项目名称:odo,代码行数:32,代码来源:convert.py

示例8: test_broken_worker_during_computation

def test_broken_worker_during_computation(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    L = c.map(inc, range(256))
    for i in range(8):
        L = c.map(add, *zip(*partition_all(2, L)))

    from random import random
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()

    result = yield c._gather(L)
    assert isinstance(result[0], int)

    yield n._close()
开发者ID:dask,项目名称:distributed,代码行数:25,代码来源:test_worker_failure.py

示例9: append_iterator_to_table

def append_iterator_to_table(t, rows, dshape=None, **kwargs):
    assert not isinstance(t, type)
    rows = iter(rows)

    # We see if the sequence is of tuples or dicts
    # If tuples then we coerce them to dicts
    try:
        row = next(rows)
    except StopIteration:
        return
    rows = chain([row], rows)
    if isinstance(row, (tuple, list)):
        if dshape and isinstance(dshape.measure, datashape.Record):
            names = dshape.measure.names
            if set(names) != set(discover(t).measure.names):
                raise ValueError("Column names of incoming data don't match "
                                 "column names of existing SQL table\n"
                                 "Names in SQL table: %s\n"
                                 "Names from incoming data: %s\n" %
                                 (discover(t).measure.names, names))
        else:
            names = discover(t).measure.names
        rows = (dict(zip(names, row)) for row in rows)

    engine = t.bind
    with engine.connect() as conn:
        for chunk in partition_all(1000, rows):  # TODO: 1000 is hardcoded
            conn.execute(t.insert(), chunk)

    return t
开发者ID:pieterdavid,项目名称:odo,代码行数:30,代码来源:sql.py

示例10: into

def into(a, b, **kwargs):
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = into(a, chunk, **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:pgnepal,项目名称:blaze,代码行数:8,代码来源:bcolz.py

示例11: into

def into(a, b, **kwargs):
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = ctable([into(np.ndarray(0), c2) for c2 in zip(*chunk)], **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:holdenk,项目名称:blaze,代码行数:8,代码来源:bcolz.py

示例12: execute

def execute(file_name):
    categories = ['distinguished', 'removal_reason']
    f = load(file_name)
    batches = partition_all(200000, f)
    df, frames = peek(map(to_df, batches))
    castra = Castra('./subreddit_dumps/'+file_name+'.castra',
                    template = df, categories = categories)
    castra.extend_sequence(frames, freq = '3h')
开发者ID:JherezTaylor,项目名称:Datamining-Reddit,代码行数:8,代码来源:make_subreddit_castra.py

示例13: cb_filter

def cb_filter(fastq, bc1, bc2, cores):
    ''' Filters reads with non-matching barcodes
    Expects formatted fastq files.
    '''

    bc1 = set(cb.strip() for cb in bc1)
    if bc2:
        bc2 = set(cb.strip() for cb in bc2)

    filter_cb = partial(cb_filterer, bc1=bc1, bc2=bc2)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, stream_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_cb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:Teichlab,项目名称:umis,代码行数:18,代码来源:umis.py

示例14: sb_filter

def sb_filter(fastq, bc, cores, nedit):
    ''' Filters reads with non-matching sample barcodes
    Expects formatted fastq files.
    '''
    barcodes = set(sb.strip() for sb in bc)
    if nedit == 0:
        filter_sb = partial(exact_sample_filter2, barcodes=barcodes)
    else:
        barcodehash = MutationHash(barcodes, nedit)
        filter_sb = partial(correcting_sample_filter2, barcodehash=barcodehash)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_sb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:18,代码来源:umis.py

示例15: into

def into(a, b, **kwargs):
    kwargs = keyfilter(carray_keywords.__contains__, kwargs)
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = into(a, chunk, **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:leolujuyi,项目名称:blaze,代码行数:9,代码来源:bcolz.py


注:本文中的toolz.partition_all函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。