Python toolz.partial函数代码示例

本文整理汇总了Python中toolz.partial函数的典型用法代码示例。如果您正苦于以下问题：Python partial函数的具体用法？Python partial怎么用？Python partial使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了partial函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract_and_capitalize_headlines_from_corpus

def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids):
    """
    Iterate through all the files in `corpus_dir`,
    extract the headlines, capitalized and return them
    
    Parameter:
    ---------------
    corpus_dir: string

    docids: list of string
        the document to be processed

    Return:
    --------------
    generator of (docid, headlines): (str, list<list<str>>)
    """
    get_tokens = partial(map, partial(get_in, ["token"]))
    get_features = partial(get_in, ["features"])

    make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words)

    for docid in docids:
        p = Path(corpus_dir) / Path(docid)
        auxil_p = p.with_suffix(".auxil")
        paf_p = p.with_suffix(".paf")
        if auxil_p.exists() and paf_p.exists():
            try:
                titles, _ = separate_title_from_body(str(auxil_p), str(paf_p))
            except Exception as e:
                yield (e, None)
            # pipeline:
            # -> get features
            # -> get tokens
            # -> capitalize headline
            yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))

开发者ID:xiaohan2012，项目名称:capitalization-restoration-train，代码行数:35，代码来源:puls_util.py

示例2: read_csv

def read_csv(fn, *args, **kwargs):
    chunksize = kwargs.pop('chunksize', 2**16)
    categorize = kwargs.pop('categorize', None)
    index = kwargs.pop('index', None)
    if index and categorize == None:
        categorize = True
    header = kwargs.get('header', 1)

    nlines = linecount(fn) - header
    nchunks = int(ceil(1.0 * nlines / chunksize))

    read = next(read_csv_names)

    blockdivs = tuple(range(chunksize, nlines, chunksize))

    one_chunk = pd.read_csv(fn, *args, nrows=100, **kwargs)

    cols = []

    if categorize or index:
        if categorize:
            category_columns = [c for c in one_chunk.dtypes.index
                                   if one_chunk.dtypes[c] == 'O']
        else:
            category_columns = []
        cols = category_columns + ([index] if index else [])
        d = read_csv(fn, *args, **merge(kwargs,
                                        dict(chunksize=chunksize,
                                             usecols=cols,
                                             categorize=False,
                                             parse_dates=None)))
        categories = [d[c].drop_duplicates() for c in category_columns]
        if index:
            quantiles = d[index].quantiles(np.linspace(0, 100, nchunks + 1)[1:-1])
            result = compute(quantiles, *categories)
            quantiles, categories = result[0], result[1:]
        else:
            categories = compute(*categories)
        categories = dict(zip(category_columns, categories))

    kwargs['chunksize'] = chunksize
    load = {(read, -1): (partial(pd.read_csv, *args, **kwargs), fn)}
    load.update(dict(((read, i), (get_chunk, (read, i-1), chunksize*i))
                     for i in range(nchunks)))

    name = next(names)

    dsk = dict(((name, i), (getitem, (read, i), 0))
                for i in range(nchunks))

    result = DataFrame(merge(dsk, load), name, one_chunk.columns, blockdivs)

    if categorize:
        func = partial(categorize_block, categories=categories)
        result = result.map_blocks(func, columns=result.columns)

    if index:
        result = set_partition(result, index, quantiles)

    return result

开发者ID:kastnerkyle，项目名称:dask，代码行数:60，代码来源:core.py

示例3: ccds_to_bed

def ccds_to_bed(ccds_stream):
  """Convert CCDS dump to Chanjo-style BED stream.

  Main entry point for default Chanjo converter (ccds). It converts
  a sorted (start, chrom) CCDS database to the Chanjo BED-format.

  Args:
    ccds_stream (file): file handle to read CCDS lines from

  Yields:
    Interval: interval with merged block and superblock ids
  """
  return pipe(
    ccds_stream,
    filter(grep('Public')),                    # filter out Public tx
    map(text_type.rstrip),                     # strip \n and spaces
    map(split(sep='\t')),                      # split into list
    map(extract_intervals),                    # convert to Interval
    concat,                                    # flatten
    map(rename_sex_interval),                  # rename sex contigs
    partial(lazy_groupby, key=attrgetter('contig')),  # group by contig
    pluck(1),                                  # extract second item
    map(groupby(attrgetter('name'))),          # non-lazy group by id
    map(valmap(merge_related_elements)),       # group intervals
    map(itervalues),                           # extract values
    map(partial(sorted, key=attrgetter('start'))),  # sort by start pos
    concat                                     # flatten
  )

开发者ID:dnil，项目名称:chanjo，代码行数:28，代码来源:core.py

示例4: test_to_tree_slice

def test_to_tree_slice(serial):
    t = symbol('t', 'var * {name: string, amount: int32}')
    expr = t[:5]
    expr2 = pipe(expr,
                 partial(to_tree, names={t: 't'}),
                 serial.dumps,
                 serial.loads,
                 partial(from_tree, namespace={'t': t}))
    assert expr.isidentical(expr2)

开发者ID:NunoEdgarGub1，项目名称:blaze，代码行数:9，代码来源:test_server.py

示例5: working_datetime_ranges_of_date

def working_datetime_ranges_of_date(d,
                                    special_working_hours={},
                                    week_working_hours={},
                                    merge_tomorrow=True):
    """
    Returns a list of datetimes tuples (datetime_range),
    indicating contiguous working periods of given date, if merge_tomorrow
    check if first period of tomorrow is contiguous and merge
    with last of today.
    """

    # curried on working hours
    whs_by_date = partial(working_hours_of_date,
                          special_working_hours=special_working_hours,
                          week_working_hours=week_working_hours)
    # curried on date
    whs_to_dt_ranges = partial(working_hours_to_datetime_ranges, d)

    today_working_hours = whs_by_date(d)

    if not len(today_working_hours):
        return []

    if not merge_tomorrow:
        return whs_to_dt_ranges(today_working_hours)

    tomorrow_working_hours = whs_by_date(tomorrow(d))

    if are_working_hours_contiguous(today_working_hours,
                                    tomorrow_working_hours):
        # last range of today become a merged range between
        # the last of today and the first of tomorrow

        next_day = tomorrow(d)

        # when tomorrow working hour end at 00:00, certainly is (00:00, 00:00)
        # because is a contiguous with today working hours, in this case
        # we add a day to current date because end at 00:00 of day after
        # this cover 24/7 like situation
        if tomorrow_working_hours[0][1] == time(0):
            next_day = tomorrow(next_day)

        last_period = (
            datetime.combine(d, today_working_hours[-1][0]),
            datetime.combine(next_day, tomorrow_working_hours[0][1])
        )

        return whs_to_dt_ranges(today_working_hours[:-1]) + [last_period]

    return whs_to_dt_ranges(today_working_hours)

开发者ID:qandobooking，项目名称:booking-engine，代码行数:50，代码来源:time_available.py

示例6: getattr

 def __getattr__(self, key):
     if key == '_hash':
         raise AttributeError()
     try:
         return _attr_cache[(self, key)]
     except:
         pass
     try:
         result = object.__getattribute__(self, key)
     except AttributeError:
         fields = dict(zip(map(valid_identifier, self.fields),
                           self.fields))
         if self.fields and key in fields:
             if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                 result = self
             else:
                 result = self[fields[key]]
         else:
             d = toolz.merge(schema_methods(self.dshape.measure),
                             dshape_methods(self.dshape))
             if key in d:
                 func = d[key]
                 if func in method_properties:
                     result = func(self)
                 else:
                     result = functools.update_wrapper(partial(func, self),
                                                       func)
             else:
                 raise
     _attr_cache[(self, key)] = result
     return result

开发者ID:ChampagneDev，项目名称:blaze，代码行数:31，代码来源:expressions.py

示例7: get

def get(dsk, keys, optimizations=[fuse], num_workers=cpu_count):
    """ Multiprocessed get function appropriate for Bags """
    pool = _globals['pool']
    if pool is None:
        pool = multiprocessing.Pool(psutil.cpu_count())
        cleanup = True
    else:
        cleanup = False

    manager = multiprocessing.Manager()
    queue = manager.Queue()

    apply_async = dill_apply_async(pool.apply_async)

    # Optimize Dask
    dsk2 = pipe(dsk, partial(cull, keys=keys), *optimizations)

    try:
        # Run
        result = get_async(apply_async, cpu_count, dsk2, keys,
                           queue=queue)
    finally:
        if cleanup:
            pool.close()
    return result

开发者ID:kastnerkyle，项目名称:dask，代码行数:25，代码来源:multiprocessing.py

示例8: format_results

def format_results(terminal_width, key_list, separator, text_list,
                   left_align=True, min_factor=3, **kwargs):
    """Returns formatted results in two columns.
    """
    key_width = max(map(len, key_list))
    separator_length = len(separator)
    desc_wrap = toolz.identity
    if terminal_width:
        if key_width / terminal_width > .5:
            key_width = terminal_width // 2 - 3
        text_width = terminal_width - key_width - separator_length
        if text_width * min_factor > terminal_width:
            desc_wrap = toolz.compose(
                ('\n' + ' ' * (key_width + separator_length)).join,
                toolz.partial(textwrap.wrap, width=text_width, **kwargs),
            )

    if left_align:
        fmt = '%-*s%s%s'
    else:
        fmt = '%*s%s%s'

    for key, text in zip(key_list, text_list):
        text = desc_wrap(text)
        if len(key) > key_width:
            yield fmt % (key_width, key, separator, '')
            yield fmt % (key_width, '', ' ' * separator_length, text)
        else:
            yield fmt % (key_width, key, separator, text)

开发者ID:pombredanne，项目名称:databrewer，代码行数:29，代码来源:utils.py

示例9: trim_internal

def trim_internal(x, axes, boundary=None):
    """ Trim sides from each block

    This couples well with the overlap operation, which may leave excess data on
    each block

    See also
    --------
    dask.array.chunk.trim
    dask.array.map_blocks
    """
    boundary = coerce_boundary(x.ndim, boundary)

    olist = []
    for i, bd in enumerate(x.chunks):
        bdy = boundary.get(i, 'none')
        ilist = []
        for j, d in enumerate(bd):
            if bdy != 'none':
                d = d - axes.get(i, 0) * 2
            else:
                d = d - axes.get(i, 0) if j != 0 else d
                d = d - axes.get(i, 0) if j != len(bd) - 1 else d
            ilist.append(d)
        olist.append(tuple(ilist))

    chunks = tuple(olist)

    return map_blocks(partial(_trim, axes=axes, boundary=boundary),
                      x, chunks=chunks, dtype=x.dtype)

开发者ID:martindurant，项目名称:dask，代码行数:30，代码来源:overlap.py

示例10: is_date_a_fixed_closing_date

def is_date_a_fixed_closing_date(d, fixed_closing_days=[]):
    """
    Check if date is in given list of dates, does not look at year to
    compare.
    """
    return d in filter(None, map(partial(date_with_year, d.year),
                                 fixed_closing_days))

开发者ID:qandobooking，项目名称:booking-engine，代码行数:7，代码来源:time_available.py

示例11: test_inline_ignores_curries_and_partials

def test_inline_ignores_curries_and_partials():
    dsk = {'x': 1, 'y': 2,
           'a': (partial(add, 1), 'x'),
           'b': (inc, 'a')}

    result = inline_functions(dsk, fast_functions=set([add]))
    assert 'a' not in set(result.keys())

开发者ID:roxyboy，项目名称:dask，代码行数:7，代码来源:test_optimize.py

示例12: ghost_internal

def ghost_internal(x, axes):
    """ Share boundaries between neighboring blocks

    Parameters
    ----------

    x: da.Array
        A dask array
    axes: dict
        The size of the shared boundary per axis

    The axes dict informs how many cells to overlap between neighboring blocks
    {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis
    """
    dims = list(map(len, x.blockdims))
    expand_key2 = partial(expand_key, dims=dims)
    interior_keys = pipe(x._keys(), flatten,
                                    map(expand_key2), map(flatten),
                                    concat, list)
    interior_slices = dict((k, fractional_slice(k, axes))
                            for k in interior_keys)

    shape = (3,) * x.ndim
    name = next(ghost_names)
    ghost_blocks = dict(((name,) + k[1:],
                         (rec_concatenate, (concrete, expand_key2(k))))
                        for k in interior_keys)

    blockdims = [  [bds[0] + axes.get(i, 0)]
                 + [bd + axes.get(i, 0) * 2 for bd in bds[1:-1]]
                 + [bds[-1] + axes.get(i, 0)]
                 for i, bds in enumerate(x.blockdims)]

    return Array(merge(interior_slices, ghost_blocks, x.dask),
                 name, blockdims=blockdims)

开发者ID:kastnerkyle，项目名称:dask，代码行数:35，代码来源:ghost.py

示例13: getattr

 def __getattr__(self, key):
     if key in dir(self._accessor):
         if isinstance(getattr(self._accessor, key), property):
             return self._property_map(key)
         else:
             return partial(self._function_map, key)
     else:
         raise AttributeError(key)

开发者ID:rlugojr，项目名称:dask，代码行数:8，代码来源:accessor.py

示例14: hash_join

def hash_join(lhs, left_on, rhs, right_on, how="inner", npartitions=None, suffixes=("_x", "_y")):
    """ Join two DataFrames on particular columns with hash join

    This shuffles both datasets on the joined column and then performs an
    embarassingly parallel join partition-by-partition

    >>> hash_join(a, 'id', rhs, 'id', how='left', npartitions=10)  # doctest: +SKIP
    """
    if npartitions is None:
        npartitions = max(lhs.npartitions, rhs.npartitions)

    lhs2 = shuffle(lhs, left_on, npartitions)
    rhs2 = shuffle(rhs, right_on, npartitions)

    if isinstance(left_on, Index):
        left_on = None
        left_index = True
    else:
        left_index = False

    if isinstance(right_on, Index):
        right_on = None
        right_index = True
    else:
        right_index = False

    # dummy result
    dummy = pd.merge(
        lhs._pd,
        rhs._pd,
        how,
        None,
        left_on=left_on,
        right_on=right_on,
        left_index=left_index,
        right_index=right_index,
        suffixes=suffixes,
    )

    merger = partial(
        _pdmerge, suffixes=suffixes, default_left_columns=list(lhs.columns), default_right_columns=list(rhs.columns)
    )

    if isinstance(left_on, list):
        left_on = (list, tuple(left_on))
    if isinstance(right_on, list):
        right_on = (list, tuple(right_on))

    token = tokenize(lhs, left_on, rhs, right_on, left_index, right_index, how, npartitions, suffixes)
    name = "hash-join-" + token

    dsk = dict(
        ((name, i), (merger, (lhs2._name, i), (rhs2._name, i), how, left_on, right_on, left_index, right_index))
        for i in range(npartitions)
    )

    divisions = [None] * (npartitions + 1)
    return DataFrame(toolz.merge(lhs2.dask, rhs2.dask, dsk), name, dummy, divisions)

开发者ID:PedroMDuarte，项目名称:dask，代码行数:58，代码来源:multi.py

示例15: test_get_with_dill

def test_get_with_dill():
    with scheduler_and_workers() as (s, (a, b)):
        c = Client(s.address_to_clients)

        dsk = {'x': 1, 'y': (partial(add, 1), 'x')}
        keys = 'y'

        assert c.get(dsk, keys) == 2
        c.close()

开发者ID:OspreyX，项目名称:dask，代码行数:9，代码来源:test_client.py

注：本文中的toolz.partial函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。