本文整理汇总了Python中toolz.partial函数的典型用法代码示例。如果您正苦于以下问题:Python partial函数的具体用法?Python partial怎么用?Python partial使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了partial函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_and_capitalize_headlines_from_corpus
def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids):
"""
Iterate through all the files in `corpus_dir`,
extract the headlines, capitalized and return them
Parameter:
---------------
corpus_dir: string
docids: list of string
the document to be processed
Return:
--------------
generator of (docid, headlines): (str, list<list<str>>)
"""
get_tokens = partial(map, partial(get_in, ["token"]))
get_features = partial(get_in, ["features"])
make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words)
for docid in docids:
p = Path(corpus_dir) / Path(docid)
auxil_p = p.with_suffix(".auxil")
paf_p = p.with_suffix(".paf")
if auxil_p.exists() and paf_p.exists():
try:
titles, _ = separate_title_from_body(str(auxil_p), str(paf_p))
except Exception as e:
yield (e, None)
# pipeline:
# -> get features
# -> get tokens
# -> capitalize headline
yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))
示例2: read_csv
def read_csv(fn, *args, **kwargs):
chunksize = kwargs.pop('chunksize', 2**16)
categorize = kwargs.pop('categorize', None)
index = kwargs.pop('index', None)
if index and categorize == None:
categorize = True
header = kwargs.get('header', 1)
nlines = linecount(fn) - header
nchunks = int(ceil(1.0 * nlines / chunksize))
read = next(read_csv_names)
blockdivs = tuple(range(chunksize, nlines, chunksize))
one_chunk = pd.read_csv(fn, *args, nrows=100, **kwargs)
cols = []
if categorize or index:
if categorize:
category_columns = [c for c in one_chunk.dtypes.index
if one_chunk.dtypes[c] == 'O']
else:
category_columns = []
cols = category_columns + ([index] if index else [])
d = read_csv(fn, *args, **merge(kwargs,
dict(chunksize=chunksize,
usecols=cols,
categorize=False,
parse_dates=None)))
categories = [d[c].drop_duplicates() for c in category_columns]
if index:
quantiles = d[index].quantiles(np.linspace(0, 100, nchunks + 1)[1:-1])
result = compute(quantiles, *categories)
quantiles, categories = result[0], result[1:]
else:
categories = compute(*categories)
categories = dict(zip(category_columns, categories))
kwargs['chunksize'] = chunksize
load = {(read, -1): (partial(pd.read_csv, *args, **kwargs), fn)}
load.update(dict(((read, i), (get_chunk, (read, i-1), chunksize*i))
for i in range(nchunks)))
name = next(names)
dsk = dict(((name, i), (getitem, (read, i), 0))
for i in range(nchunks))
result = DataFrame(merge(dsk, load), name, one_chunk.columns, blockdivs)
if categorize:
func = partial(categorize_block, categories=categories)
result = result.map_blocks(func, columns=result.columns)
if index:
result = set_partition(result, index, quantiles)
return result
示例3: ccds_to_bed
def ccds_to_bed(ccds_stream):
"""Convert CCDS dump to Chanjo-style BED stream.
Main entry point for default Chanjo converter (ccds). It converts
a sorted (start, chrom) CCDS database to the Chanjo BED-format.
Args:
ccds_stream (file): file handle to read CCDS lines from
Yields:
Interval: interval with merged block and superblock ids
"""
return pipe(
ccds_stream,
filter(grep('Public')), # filter out Public tx
map(text_type.rstrip), # strip \n and spaces
map(split(sep='\t')), # split into list
map(extract_intervals), # convert to Interval
concat, # flatten
map(rename_sex_interval), # rename sex contigs
partial(lazy_groupby, key=attrgetter('contig')), # group by contig
pluck(1), # extract second item
map(groupby(attrgetter('name'))), # non-lazy group by id
map(valmap(merge_related_elements)), # group intervals
map(itervalues), # extract values
map(partial(sorted, key=attrgetter('start'))), # sort by start pos
concat # flatten
)
示例4: test_to_tree_slice
def test_to_tree_slice(serial):
t = symbol('t', 'var * {name: string, amount: int32}')
expr = t[:5]
expr2 = pipe(expr,
partial(to_tree, names={t: 't'}),
serial.dumps,
serial.loads,
partial(from_tree, namespace={'t': t}))
assert expr.isidentical(expr2)
示例5: working_datetime_ranges_of_date
def working_datetime_ranges_of_date(d,
special_working_hours={},
week_working_hours={},
merge_tomorrow=True):
"""
Returns a list of datetimes tuples (datetime_range),
indicating contiguous working periods of given date, if merge_tomorrow
check if first period of tomorrow is contiguous and merge
with last of today.
"""
# curried on working hours
whs_by_date = partial(working_hours_of_date,
special_working_hours=special_working_hours,
week_working_hours=week_working_hours)
# curried on date
whs_to_dt_ranges = partial(working_hours_to_datetime_ranges, d)
today_working_hours = whs_by_date(d)
if not len(today_working_hours):
return []
if not merge_tomorrow:
return whs_to_dt_ranges(today_working_hours)
tomorrow_working_hours = whs_by_date(tomorrow(d))
if are_working_hours_contiguous(today_working_hours,
tomorrow_working_hours):
# last range of today become a merged range between
# the last of today and the first of tomorrow
next_day = tomorrow(d)
# when tomorrow working hour end at 00:00, certainly is (00:00, 00:00)
# because is a contiguous with today working hours, in this case
# we add a day to current date because end at 00:00 of day after
# this cover 24/7 like situation
if tomorrow_working_hours[0][1] == time(0):
next_day = tomorrow(next_day)
last_period = (
datetime.combine(d, today_working_hours[-1][0]),
datetime.combine(next_day, tomorrow_working_hours[0][1])
)
return whs_to_dt_ranges(today_working_hours[:-1]) + [last_period]
return whs_to_dt_ranges(today_working_hours)
示例6: __getattr__
def __getattr__(self, key):
if key == '_hash':
raise AttributeError()
try:
return _attr_cache[(self, key)]
except:
pass
try:
result = object.__getattribute__(self, key)
except AttributeError:
fields = dict(zip(map(valid_identifier, self.fields),
self.fields))
if self.fields and key in fields:
if isscalar(self.dshape.measure): # t.foo.foo is t.foo
result = self
else:
result = self[fields[key]]
else:
d = toolz.merge(schema_methods(self.dshape.measure),
dshape_methods(self.dshape))
if key in d:
func = d[key]
if func in method_properties:
result = func(self)
else:
result = functools.update_wrapper(partial(func, self),
func)
else:
raise
_attr_cache[(self, key)] = result
return result
示例7: get
def get(dsk, keys, optimizations=[fuse], num_workers=cpu_count):
""" Multiprocessed get function appropriate for Bags """
pool = _globals['pool']
if pool is None:
pool = multiprocessing.Pool(psutil.cpu_count())
cleanup = True
else:
cleanup = False
manager = multiprocessing.Manager()
queue = manager.Queue()
apply_async = dill_apply_async(pool.apply_async)
# Optimize Dask
dsk2 = pipe(dsk, partial(cull, keys=keys), *optimizations)
try:
# Run
result = get_async(apply_async, cpu_count, dsk2, keys,
queue=queue)
finally:
if cleanup:
pool.close()
return result
示例8: format_results
def format_results(terminal_width, key_list, separator, text_list,
left_align=True, min_factor=3, **kwargs):
"""Returns formatted results in two columns.
"""
key_width = max(map(len, key_list))
separator_length = len(separator)
desc_wrap = toolz.identity
if terminal_width:
if key_width / terminal_width > .5:
key_width = terminal_width // 2 - 3
text_width = terminal_width - key_width - separator_length
if text_width * min_factor > terminal_width:
desc_wrap = toolz.compose(
('\n' + ' ' * (key_width + separator_length)).join,
toolz.partial(textwrap.wrap, width=text_width, **kwargs),
)
if left_align:
fmt = '%-*s%s%s'
else:
fmt = '%*s%s%s'
for key, text in zip(key_list, text_list):
text = desc_wrap(text)
if len(key) > key_width:
yield fmt % (key_width, key, separator, '')
yield fmt % (key_width, '', ' ' * separator_length, text)
else:
yield fmt % (key_width, key, separator, text)
示例9: trim_internal
def trim_internal(x, axes, boundary=None):
""" Trim sides from each block
This couples well with the overlap operation, which may leave excess data on
each block
See also
--------
dask.array.chunk.trim
dask.array.map_blocks
"""
boundary = coerce_boundary(x.ndim, boundary)
olist = []
for i, bd in enumerate(x.chunks):
bdy = boundary.get(i, 'none')
ilist = []
for j, d in enumerate(bd):
if bdy != 'none':
d = d - axes.get(i, 0) * 2
else:
d = d - axes.get(i, 0) if j != 0 else d
d = d - axes.get(i, 0) if j != len(bd) - 1 else d
ilist.append(d)
olist.append(tuple(ilist))
chunks = tuple(olist)
return map_blocks(partial(_trim, axes=axes, boundary=boundary),
x, chunks=chunks, dtype=x.dtype)
示例10: is_date_a_fixed_closing_date
def is_date_a_fixed_closing_date(d, fixed_closing_days=[]):
"""
Check if date is in given list of dates, does not look at year to
compare.
"""
return d in filter(None, map(partial(date_with_year, d.year),
fixed_closing_days))
示例11: test_inline_ignores_curries_and_partials
def test_inline_ignores_curries_and_partials():
dsk = {'x': 1, 'y': 2,
'a': (partial(add, 1), 'x'),
'b': (inc, 'a')}
result = inline_functions(dsk, fast_functions=set([add]))
assert 'a' not in set(result.keys())
示例12: ghost_internal
def ghost_internal(x, axes):
""" Share boundaries between neighboring blocks
Parameters
----------
x: da.Array
A dask array
axes: dict
The size of the shared boundary per axis
The axes dict informs how many cells to overlap between neighboring blocks
{0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis
"""
dims = list(map(len, x.blockdims))
expand_key2 = partial(expand_key, dims=dims)
interior_keys = pipe(x._keys(), flatten,
map(expand_key2), map(flatten),
concat, list)
interior_slices = dict((k, fractional_slice(k, axes))
for k in interior_keys)
shape = (3,) * x.ndim
name = next(ghost_names)
ghost_blocks = dict(((name,) + k[1:],
(rec_concatenate, (concrete, expand_key2(k))))
for k in interior_keys)
blockdims = [ [bds[0] + axes.get(i, 0)]
+ [bd + axes.get(i, 0) * 2 for bd in bds[1:-1]]
+ [bds[-1] + axes.get(i, 0)]
for i, bds in enumerate(x.blockdims)]
return Array(merge(interior_slices, ghost_blocks, x.dask),
name, blockdims=blockdims)
示例13: __getattr__
def __getattr__(self, key):
if key in dir(self._accessor):
if isinstance(getattr(self._accessor, key), property):
return self._property_map(key)
else:
return partial(self._function_map, key)
else:
raise AttributeError(key)
示例14: hash_join
def hash_join(lhs, left_on, rhs, right_on, how="inner", npartitions=None, suffixes=("_x", "_y")):
""" Join two DataFrames on particular columns with hash join
This shuffles both datasets on the joined column and then performs an
embarassingly parallel join partition-by-partition
>>> hash_join(a, 'id', rhs, 'id', how='left', npartitions=10) # doctest: +SKIP
"""
if npartitions is None:
npartitions = max(lhs.npartitions, rhs.npartitions)
lhs2 = shuffle(lhs, left_on, npartitions)
rhs2 = shuffle(rhs, right_on, npartitions)
if isinstance(left_on, Index):
left_on = None
left_index = True
else:
left_index = False
if isinstance(right_on, Index):
right_on = None
right_index = True
else:
right_index = False
# dummy result
dummy = pd.merge(
lhs._pd,
rhs._pd,
how,
None,
left_on=left_on,
right_on=right_on,
left_index=left_index,
right_index=right_index,
suffixes=suffixes,
)
merger = partial(
_pdmerge, suffixes=suffixes, default_left_columns=list(lhs.columns), default_right_columns=list(rhs.columns)
)
if isinstance(left_on, list):
left_on = (list, tuple(left_on))
if isinstance(right_on, list):
right_on = (list, tuple(right_on))
token = tokenize(lhs, left_on, rhs, right_on, left_index, right_index, how, npartitions, suffixes)
name = "hash-join-" + token
dsk = dict(
((name, i), (merger, (lhs2._name, i), (rhs2._name, i), how, left_on, right_on, left_index, right_index))
for i in range(npartitions)
)
divisions = [None] * (npartitions + 1)
return DataFrame(toolz.merge(lhs2.dask, rhs2.dask, dsk), name, dummy, divisions)
示例15: test_get_with_dill
def test_get_with_dill():
with scheduler_and_workers() as (s, (a, b)):
c = Client(s.address_to_clients)
dsk = {'x': 1, 'y': (partial(add, 1), 'x')}
keys = 'y'
assert c.get(dsk, keys) == 2
c.close()