本文整理汇总了Python中toolz.partition_all函数的典型用法代码示例。如果您正苦于以下问题:Python partition_all函数的具体用法?Python partition_all怎么用?Python partition_all使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了partition_all函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cb_filter
def cb_filter(fastq, bc1, bc2, cores, nedit):
''' Filters reads with non-matching barcodes
Expects formatted fastq files.
'''
bc1 = set(cb.strip() for cb in bc1)
if bc2:
bc2 = set(cb.strip() for cb in bc2)
if nedit == 0:
filter_cb = partial(exact_barcode_filter, bc1=bc1, bc2=bc2)
else:
bc1hash = MutationHash(bc1, nedit)
bc2hash = None
if bc2:
bc2hash = MutationHash(bc2, nedit)
filter_cb = partial(correcting_barcode_filter, bc1hash=bc1hash,
bc2hash=bc2hash)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, stream_fastq(fastq))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(filter_cb, list(bigchunk)):
for read in chunk:
sys.stdout.write(read)
示例2: partial_reduce
def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None):
"""Partial reduction across multiple axes.
Parameters
----------
func : function
x : Array
split_every : dict
Maximum reduction block sizes in each dimension.
Example
-------
Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th
dimension, and 3 blocks in the 2nd dimension:
>>> partial_reduce(np.min, x, {0: 1, 2: 3}) # doctest: +SKIP
"""
name = name or 'p_reduce-' + tokenize(func, x, split_every, keepdims, dtype)
parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n)
in enumerate(x.numblocks)]
keys = product(*map(range, map(len, parts)))
out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i
in split_every else c for (i, c) in enumerate(x.chunks)]
if not keepdims:
out_axis = [i for i in range(x.ndim) if i not in split_every]
getter = lambda k: get(out_axis, k)
keys = map(getter, keys)
out_chunks = list(getter(out_chunks))
dsk = {}
for k, p in zip(keys, product(*parts)):
decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1)
dummy = dict(i for i in enumerate(p) if i[0] not in decided)
g = lol_tuples((x.name,), range(x.ndim), decided, dummy)
dsk[(name,) + k] = (func, g)
return Array(merge(dsk, x.dask), name, out_chunks, dtype=dtype)
示例3: mb_filter
def mb_filter(fastq, cores):
''' Filters umis with non-ACGT bases
Expects formatted fastq files.
'''
filter_mb = partial(umi_filter)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, read_fastq(fastq))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(filter_mb, list(bigchunk)):
for read in chunk:
sys.stdout.write(read)
示例4: fastqtransform
def fastqtransform(transform, fastq1, fastq2, separate_cb, demuxed_cb,
dual_index, cores, min_length):
''' Transform input reads to the tagcounts compatible read layout using
regular expressions as defined in a transform file. Outputs new format to
stdout.
'''
if dual_index and separate_cb:
read_template = '{name}:CELL_{CB1}-{CB2}:UMI_{MB}\n{seq}\n+\n{qual}\n'
else:
read_template = '{name}:CELL_{CB}:UMI_{MB}\n{seq}\n+\n{qual}\n'
transform = json.load(open(transform))
read1_regex = re.compile(transform['read1'])
read2_regex = re.compile(transform['read2']) if fastq2 else None
fastq1_fh = open(fastq1)
if fastq1.endswith('gz'):
fastq1_fh = gzip.GzipFile(fileobj=fastq1_fh)
fastq_file1 = stream_fastq(fastq1_fh)
if fastq2:
fastq2_fh = open(fastq2)
if fastq2.endswith('gz'):
fastq2_fh = gzip.GzipFile(fileobj=fastq2_fh)
fastq_file2 = stream_fastq(fastq2_fh)
else:
fastq_file2 = itertools.cycle((None,))
transform = partial(transformer, read1_regex=read1_regex,
read2_regex=read2_regex, paired=fastq2)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, itertools.izip(fastq_file1, fastq_file2))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(transform, list(bigchunk)):
for read1_dict in chunk:
if dual_index:
if not separate_cb:
read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']
if demuxed_cb:
read1_dict['CB'] = demuxed_cb
# Deal with spaces in read names
read1_dict['name'] = read1_dict['name'].partition(' ')[0]
if len(read1_dict['seq']) >= min_length:
sys.stdout.write(read_template.format(**read1_dict))
示例5: add_uid
def add_uid(fastq, cores):
''' Adds UID:[samplebc cellbc umi] to readname for umi-tools deduplication
Expects formatted fastq files with correct sample and cell barcodes.
'''
uids = partial(append_uids)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, read_fastq(fastq))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(uids, list(bigchunk)):
for read in chunk:
sys.stdout.write(read)
示例6: iterator_to_DataFrame_chunks
def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
seq2 = partition_all(chunksize, seq)
if kwargs.get('add_index'):
mkindex = _add_index
else:
mkindex = _ignore_index
try:
first, rest = next(seq2), seq2
except StopIteration:
def _():
yield convert(pd.DataFrame, [], **kwargs)
else:
df = convert(pd.DataFrame, first, **kwargs)
df1, n1 = mkindex(df, 0)
def _():
n = n1
yield df1
for i in rest:
df = convert(pd.DataFrame, i, **kwargs)
df, n = mkindex(df, n)
yield df
return chunks(pd.DataFrame)(_)
示例7: iterator_to_DataFrame_chunks
def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
seq2 = partition_all(chunksize, seq)
add_index = kwargs.get('add_index', False)
if not add_index:
# Simple, we can dispatch to dask...
f = lambda d: convert(pd.DataFrame, d, **kwargs)
data = [partial(f, d) for d in seq2]
if not data:
data = [convert(pd.DataFrame, [], **kwargs)]
return chunks(pd.DataFrame)(data)
# TODO: Decide whether we should support the `add_index` flag at all.
# If so, we need to post-process the converted DataFrame objects sequencially,
# so we can't parallelize the process.
try:
first, rest = next(seq2), seq2
except StopIteration:
def _():
yield convert(pd.DataFrame, [], **kwargs)
else:
df = convert(pd.DataFrame, first, **kwargs)
df1, n1 = _add_index(df, 0)
def _():
n = n1
yield df1
for i in rest:
df = convert(pd.DataFrame, i, **kwargs)
df, n = _add_index(df, n)
yield df
return chunks(pd.DataFrame)(_)
示例8: test_broken_worker_during_computation
def test_broken_worker_during_computation(c, s, a, b):
n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
n.start(0)
start = time()
while len(s.ncores) < 3:
yield gen.sleep(0.01)
assert time() < start + 5
L = c.map(inc, range(256))
for i in range(8):
L = c.map(add, *zip(*partition_all(2, L)))
from random import random
yield gen.sleep(random() / 2)
with ignoring(OSError):
n.process.terminate()
yield gen.sleep(random() / 2)
with ignoring(OSError):
n.process.terminate()
result = yield c._gather(L)
assert isinstance(result[0], int)
yield n._close()
示例9: append_iterator_to_table
def append_iterator_to_table(t, rows, dshape=None, **kwargs):
assert not isinstance(t, type)
rows = iter(rows)
# We see if the sequence is of tuples or dicts
# If tuples then we coerce them to dicts
try:
row = next(rows)
except StopIteration:
return
rows = chain([row], rows)
if isinstance(row, (tuple, list)):
if dshape and isinstance(dshape.measure, datashape.Record):
names = dshape.measure.names
if set(names) != set(discover(t).measure.names):
raise ValueError("Column names of incoming data don't match "
"column names of existing SQL table\n"
"Names in SQL table: %s\n"
"Names from incoming data: %s\n" %
(discover(t).measure.names, names))
else:
names = discover(t).measure.names
rows = (dict(zip(names, row)) for row in rows)
engine = t.bind
with engine.connect() as conn:
for chunk in partition_all(1000, rows): # TODO: 1000 is hardcoded
conn.execute(t.insert(), chunk)
return t
示例10: into
def into(a, b, **kwargs):
chunks = partition_all(1024, b)
chunk = next(chunks)
a = into(a, chunk, **kwargs)
for chunk in chunks:
a.append(list(zip(*chunk)))
a.flush()
return a
示例11: into
def into(a, b, **kwargs):
chunks = partition_all(1024, b)
chunk = next(chunks)
a = ctable([into(np.ndarray(0), c2) for c2 in zip(*chunk)], **kwargs)
for chunk in chunks:
a.append(list(zip(*chunk)))
a.flush()
return a
示例12: execute
def execute(file_name):
categories = ['distinguished', 'removal_reason']
f = load(file_name)
batches = partition_all(200000, f)
df, frames = peek(map(to_df, batches))
castra = Castra('./subreddit_dumps/'+file_name+'.castra',
template = df, categories = categories)
castra.extend_sequence(frames, freq = '3h')
示例13: cb_filter
def cb_filter(fastq, bc1, bc2, cores):
''' Filters reads with non-matching barcodes
Expects formatted fastq files.
'''
bc1 = set(cb.strip() for cb in bc1)
if bc2:
bc2 = set(cb.strip() for cb in bc2)
filter_cb = partial(cb_filterer, bc1=bc1, bc2=bc2)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, stream_fastq(fastq))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(filter_cb, list(bigchunk)):
for read in chunk:
sys.stdout.write(read)
示例14: sb_filter
def sb_filter(fastq, bc, cores, nedit):
''' Filters reads with non-matching sample barcodes
Expects formatted fastq files.
'''
barcodes = set(sb.strip() for sb in bc)
if nedit == 0:
filter_sb = partial(exact_sample_filter2, barcodes=barcodes)
else:
barcodehash = MutationHash(barcodes, nedit)
filter_sb = partial(correcting_sample_filter2, barcodehash=barcodehash)
p = multiprocessing.Pool(cores)
chunks = tz.partition_all(10000, read_fastq(fastq))
bigchunks = tz.partition_all(cores, chunks)
for bigchunk in bigchunks:
for chunk in p.map(filter_sb, list(bigchunk)):
for read in chunk:
sys.stdout.write(read)
示例15: into
def into(a, b, **kwargs):
kwargs = keyfilter(carray_keywords.__contains__, kwargs)
chunks = partition_all(1024, b)
chunk = next(chunks)
a = into(a, chunk, **kwargs)
for chunk in chunks:
a.append(list(zip(*chunk)))
a.flush()
return a