本文整理汇总了Python中toolz.concat函数的典型用法代码示例。如果您正苦于以下问题:Python concat函数的具体用法?Python concat怎么用?Python concat使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了concat函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_read_bytes_delimited
def test_read_bytes_delimited(s3, blocksize):
_, values = read_bytes(test_bucket_name+'/test/accounts*',
blocksize=blocksize, delimiter=b'\n', s3=s3)
_, values2 = read_bytes(test_bucket_name+'/test/accounts*',
blocksize=blocksize, delimiter=b'foo', s3=s3)
assert ([a.key for a in concat(values)] !=
[b.key for b in concat(values2)])
results = compute(*concat(values))
res = [r for r in results if r]
assert all(r.endswith(b'\n') for r in res)
ourlines = b''.join(res).split(b'\n')
testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
assert ourlines == testlines
# delimiter not at the end
d = b'}'
_, values = read_bytes(test_bucket_name+'/test/accounts*',
blocksize=blocksize, delimiter=d, s3=s3)
results = compute(*concat(values))
res = [r for r in results if r]
# All should end in } except EOF
assert sum(r.endswith(b'}') for r in res) == len(res) - 2
ours = b"".join(res)
test = b"".join(files[v] for v in sorted(files))
assert ours == test
示例2: scatter
def scatter(kd, control, colors=['orange', 'blue'], **kwargs):
"""Show a jittered scatterplot of the measurements.
Parameters
----------
kd : list of list of float
The list of `trf_quantify` results for all AUKB knockdown
images in the dataset. (Each result is itself a list.)
control : list of list of float
The list of `trf_quantify` results for all control images in
the dataset.
colors : list of two matplotlib colorspecs, optional
The colors corresponding to AUKB-KD (0) and control (1) data
points on the scatterplot.
Additional Parameters
---------------------
**kwargs : keyword arguments
Additional keyword arguments passed directly to
``plt.scatter``.
Returns
-------
fig : matplotlib axes
The returned value from the call to ``plt.scatter``.
"""
xs = list(tz.concat([i + 0.2 * np.random.randn(n)
for i, n in enumerate(map(len, kd + control))]))
color_vector = ([colors[0]] * sum(map(len, kd)) +
[colors[1]] * sum(map(len, control)))
ys = list(tz.concat(kd + control))
fig = plt.scatter(xs, ys, c=color_vector, **kwargs)
plt.xlim(0, max(xs) + 1)
plt.ylim(0, max(ys) + 1)
return fig
示例3: compute_up
def compute_up(expr, data, scope=None, **kwargs):
data = lower_column(data)
grouper = compute(
expr.grouper,
scope,
post_compute=False,
return_type='native',
**kwargs
)
app = expr.apply
reductions = [
compute(
val,
data,
post_compute=None,
return_type='native',
).label(name)
for val, name in zip(app.values, app.fields)
]
froms = list(unique(chain(get_all_froms(grouper),
concat(map(get_all_froms, reductions)))))
inner_cols = list(getattr(grouper, 'inner_columns', [grouper]))
grouper_cols = inner_cols[:]
inner_cols.extend(concat(
getattr(getattr(r, 'element', None), 'inner_columns', [r])
for r in reductions
))
wheres = unify_wheres([grouper] + reductions)
sel = unify_froms(sa.select(inner_cols, whereclause=wheres), froms)
return sel.group_by(*grouper_cols)
示例4: test_read_bytes_delimited
def test_read_bytes_delimited():
with filetexts(files, mode='b'):
for bs in [5, 15, 45, 1500]:
_, values = read_bytes('.test.accounts*',
blocksize=bs, delimiter=b'\n')
_, values2 = read_bytes('.test.accounts*',
blocksize=bs, delimiter=b'foo')
assert ([a.key for a in concat(values)] !=
[b.key for b in concat(values2)])
results = compute(*concat(values))
res = [r for r in results if r]
assert all(r.endswith(b'\n') for r in res)
ourlines = b''.join(res).split(b'\n')
testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
assert ourlines == testlines
# delimiter not at the end
d = b'}'
_, values = read_bytes('.test.accounts*', blocksize=bs, delimiter=d)
results = compute(*concat(values))
res = [r for r in results if r]
# All should end in } except EOF
assert sum(r.endswith(b'}') for r in res) == len(res) - 2
ours = b"".join(res)
test = b"".join(files[v] for v in sorted(files))
assert ours == test
示例5: diagnostic_yield
def diagnostic_yield(self, metric='completeness', cutoff=1,
superblock_ids=None, group_id=None, sample_ids=None):
"""Calculate diagnostic yield."""
# extract column to filter on
metric_column = getattr(BlockData, metric)
# set up the base query for all blocks
total_query = self.total_count(BlockData)
if superblock_ids:
# apply the superblock filter on the Block class level
total_query = total_query.join(BlockData.parent)\
.filter(Block.superblock_id.in_(superblock_ids))
# extend base query to include only passed blocks
pass_query = total_query.filter(metric_column >= cutoff)
# optionally limit query
queries = [limit_query(query, group=group_id, samples=sample_ids)
for query in (total_query, pass_query)]
# group multiple queries by sample ID (first column)
metrics = groupby(get(0), concat(queries))
# iterate over all values, concat different query results, and keep
# only the unique values (excluding second sample_id)
combined = (unique(concat(values)) for values in itervalues(metrics))
# calculate diagnostic yield by simple division
for sample_id, group_id, total, covered in combined:
yield sample_id, group_id, (covered / total)
示例6: test_modification_time_read_bytes
def test_modification_time_read_bytes():
with s3_context('compress', files) as s3:
_, a = read_bytes('compress/test/accounts.*', s3=s3)
_, b = read_bytes('compress/test/accounts.*', s3=s3)
assert [aa._key for aa in concat(a)] == [bb._key for bb in concat(b)]
with s3_context('compress', valmap(double, files)) as s3:
_, c = read_bytes('compress/test/accounts.*', s3=s3)
assert [aa._key for aa in concat(a)] != [cc._key for cc in concat(c)]
示例7: start
def start(self):
self.status = 'running'
logger.debug("Start Progress Plugin")
self._start()
if not self.keys or not any(v for v in self.keys.values()):
self.stop()
elif all(k in self.scheduler.exceptions_blame for k in
concat(self.keys.values())):
key = next(k for k in concat(self.keys.values()) if k in
self.scheduler.exceptions_blame)
self.stop(exception=True, key=key)
示例8: compute_up
def compute_up(expr, args, **kwargs):
from_objs = list(unique(concat(map(get_all_froms, args))))
if len(from_objs) > 1:
# TODO: how do you do this in sql? please send help
raise ValueError('only columns from the same table can be merged')
cols = list(unique(concat(map(get_unsafe_inner_columns, args, expr.args))))
sel = sa.select(cols, from_obj=from_objs[0])
where = unify_wheres(args)
if where is not None:
sel = sel.where(where)
return sel
示例9: render_tabular
def render_tabular(api, options=None):
"""Entry point for the tabular reporter interface."""
# determine separator
separator = options.get('report.separator', '\t')
human = options.get('report.human')
panel = options.get('report.panel')
samples = options.get('report.samples')
group = options.get('report.group')
# read gene panel file if it has been set
if panel:
superblock_ids = [line.rstrip() for line in panel]
else:
superblock_ids = None
# get sample ID, group and cutoff from metadata
sample_query = limit_query(api.samples(), group=group, samples=samples)
metadata = ((sample.id, sample.group_id, sample.cutoff)
for sample in sample_query)
# get the data
base_query = limit_query(api.average_metrics(superblock_ids=superblock_ids),
group=group,
samples=samples)
queries = [metadata,
base_query,
api.diagnostic_yield(superblock_ids=superblock_ids,
group_id=group, sample_ids=samples),
api.sex_checker(group_id=group, sample_ids=samples)]
# group multiple queries by sample ID (first column)
key_metrics = groupby(get(0), concat(queries))
# get the column names dynamically from the query
headers = concatv(['sample_id', 'group_id', 'cutoff'],
(column['name'] for column
in base_query.column_descriptions),
['diagnostic yield', 'gender'])
unique_headers = unique(headers)
# iterate over all values, concat different query results, and keep
# only the unique values (excluding second sample_id)
data = (unique(concat(values)) for values in itervalues(key_metrics))
if human:
# export key_metrics in a more human friendly format
return tabulate(data, unique_headers)
# yield headers
return '\n'.join(cons('#' + separator.join(unique_headers),
stringify_list(data, separator=separator)))
示例10: compile_components
def compile_components(summary, schema):
"""Given a ``Summary`` object and a table schema, returning 5 sub-functions.
Parameters
----------
summary : Summary
The expression describing the aggregations to be computed.
Returns
-------
A tuple of the following functions:
``create(shape)``
Takes the aggregate shape, and returns a tuple of initialized numpy
arrays.
``info(df)``
Takes a dataframe, and returns preprocessed 1D numpy arrays of the
needed columns.
``append(i, x, y, *aggs_and_cols)``
Appends the ``i``th row of the table to the ``(x, y)`` bin, given the
base arrays and columns in ``aggs_and_cols``. This does the bulk of the
work.
``combine(base_tuples)``
Combine a list of base tuples into a single base tuple. This forms the
reducing step in a reduction tree.
``finalize(aggs)``
Given a tuple of base numpy arrays, returns the finalized
``dynd`` array.
"""
paths, reds = zip(*preorder_traversal(summary))
# List of base reductions (actually computed)
bases = list(unique(concat(r._bases for r in reds)))
dshapes = [b.out_dshape(schema) for b in bases]
# List of tuples of (append, base, input columns, temps)
calls = [_get_call_tuples(b, d) for (b, d) in zip(bases, dshapes)]
# List of unique column names needed
cols = list(unique(concat(pluck(2, calls))))
# List of temps needed
temps = list(pluck(3, calls))
create = make_create(bases, dshapes)
info = make_info(cols)
append = make_append(bases, cols, calls)
combine = make_combine(bases, dshapes, temps)
finalize = make_finalize(bases, summary, schema)
return create, info, append, combine, finalize
示例11: test_join
def test_join():
cities = TableSymbol('cities', schema='{id: int, city: string}')
j = join(t, cities, 'id')
city_data = [[1, 'NYC'], [1, 'Chicago'], [5, 'Paris']]
assert set(concat(compute(join(cities, t, 'id')[['name', 'city']],
{t: c, cities: city_data}))) == \
set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))
assert set(concat(compute(join(t, cities, 'id')[['name', 'city']],
{t: c, cities: city_data}))) == \
set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))
示例12: test_chunk_datetime
def test_chunk_datetime():
data = [[1, 'Alice', 100, datetime.datetime(2014, 10, 1, 1, 1, 1)],
[2, 'Bob', 200, datetime.datetime(2014, 10, 1, 1, 1, 1)],
[3, 'Alice', -300, datetime.datetime(2014, 10, 1, 1, 1, 1)],
[4, 'Charlie', 400, datetime.datetime(2014, 10, 1, 1, 1, 1)],
[5, 'Edith', 200, datetime.datetime(2014, 10, 1, 1, 1, 1)]]
t = Symbol('t', 'var * {id: int, name: string, amount: int, when: datetime}')
c = ChunkIterable(data, chunksize=2)
assert list(concat(compute(t.when.day, c))) == [1] * 5
assert list(concat(compute(t.when.date, c))) == \
[datetime.date(2014, 10, 1)] * 5
示例13: test_deterministic_key_names
def test_deterministic_key_names(hdfs):
data = b'abc\n' * int(1e3)
fn = '%s/file' % basedir
with hdfs.open(fn, 'wb', replication=1) as fil:
fil.write(data)
_, x = read_bytes('hdfs://%s/*' % basedir, delimiter=b'\n', sample=False)
_, y = read_bytes('hdfs://%s/*' % basedir, delimiter=b'\n', sample=False)
_, z = read_bytes('hdfs://%s/*' % basedir, delimiter=b'c', sample=False)
assert [f.key for f in concat(x)] == [f.key for f in concat(y)]
assert [f.key for f in concat(x)] != [f.key for f in concat(z)]
示例14: test_join
def test_join():
cities = symbol('cities', dshape='var * {id: int, city: string}')
j = join(t, cities, 'id')
city_data = [[1, 'NYC'], [1, 'Chicago'], [5, 'Paris']]
assert set(concat(compute(j[['name', 'city']],
{t: c, cities: city_data}))) == \
set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))
assert set(concat(compute(j[['name', 'city']],
{t: c, cities: city_data}))) == \
set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))
示例15: test_deterministic_key_names
def test_deterministic_key_names(e, s, a, b):
with make_hdfs() as (hdfs, basedir):
data = b'abc\n' * int(1e3)
fn = '%s/file' % basedir
with hdfs.open(fn, 'wb', replication=1) as f:
f.write(data)
_, x = read_bytes('hdfs://%s/*' % basedir, delimiter=b'\n')
_, y = read_bytes('hdfs://%s/*' % basedir, delimiter=b'\n')
_, z = read_bytes('hdfs://%s/*' % basedir, delimiter=b'c')
assert [f.key for f in concat(x)] == [f.key for f in concat(y)]
assert [f.key for f in concat(x)] != [f.key for f in concat(z)]