Python iterutils.chunked方法代码示例

本文整理汇总了Python中boltons.iterutils.chunked方法的典型用法代码示例。如果您正苦于以下问题：Python iterutils.chunked方法的具体用法？Python iterutils.chunked怎么用？Python iterutils.chunked使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boltons.iterutils的用法示例。

在下文中一共展示了iterutils.chunked方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _get_crossval_split

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def _get_crossval_split(stimuli, fixations, split_count, included_splits, random=True, stratified_attributes=None):
    if stratified_attributes is not None:
        return _get_stratified_crossval_split(stimuli, fixations, split_count, included_splits, random=random, stratified_attributes=stratified_attributes)

    inds = list(range(len(stimuli)))
    if random:
        print("Using random shuffles for crossvalidation")
        rst = np.random.RandomState(seed=42)
        rst.shuffle(inds)
        inds = list(inds)
    size = int(np.ceil(len(inds) / split_count))
    chunks = chunked(inds, size=size)

    inds = []
    for split_nr in included_splits:
        inds.extend(chunks[split_nr])

    stimuli, fixations = create_subset(stimuli, fixations, inds)
    return stimuli, fixations

开发者ID:matthias-k，项目名称:pysaliency，代码行数:21，代码来源:filter_datasets.py

示例2: init

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def __init__(self, data_source, batch_size=1, ratio_used=1.0, shuffle=True):
        self.ratio_used = ratio_used
        self.shuffle = shuffle

        shapes = data_source.get_shapes()
        unique_shapes = sorted(set(shapes))

        shape_indices = [[] for shape in unique_shapes]

        for k, shape in enumerate(shapes):
            shape_indices[unique_shapes.index(shape)].append(k)

        if self.shuffle:
            for indices in shape_indices:
                random.shuffle(indices)

        self.batches = sum([chunked(indices, size=batch_size) for indices in shape_indices], [])

开发者ID:matthias-k，项目名称:pysaliency，代码行数:19，代码来源:torch_datasets.py

示例3: campaign_visits_to_geojson

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def campaign_visits_to_geojson(rpc, campaign_id, geojson_file):
	"""
	Export the geo location information for all the visits of a campaign into
	the `GeoJSON <http://geojson.org/>`_ format.

	:param rpc: The connected RPC instance to load the information with.
	:type rpc: :py:class:`.KingPhisherRPCClient`
	:param campaign_id: The ID of the campaign to load the information for.
	:param str geojson_file: The destination file for the GeoJSON data.
	"""
	ips_for_georesolution = {}
	ip_counter = collections.Counter()
	for visit_node in _get_graphql_campaign_visits(rpc, campaign_id):
		visit = visit_node['node']
		ip_counter.update((visit['ip'],))
		visitor_ip = ipaddress.ip_address(visit['ip'])
		if not isinstance(visitor_ip, ipaddress.IPv4Address):
			continue
		if visitor_ip.is_loopback or visitor_ip.is_private:
			continue
		if not visitor_ip in ips_for_georesolution:
			ips_for_georesolution[visitor_ip] = visit['firstSeen']
		elif ips_for_georesolution[visitor_ip] > visit['firstSeen']:
			ips_for_georesolution[visitor_ip] = visit['firstSeen']
	ips_for_georesolution = [ip for (ip, _) in sorted(ips_for_georesolution.items(), key=lambda x: x[1])]
	locations = {}
	for ip_addresses in iterutils.chunked(ips_for_georesolution, 50):
		locations.update(rpc.geoip_lookup_multi(ip_addresses))
	points = []
	for ip, location in locations.items():
		if not (location.coordinates and location.coordinates[0] and location.coordinates[1]):
			continue
		points.append(geojson.Feature(geometry=location, properties={'count': ip_counter[ip], 'ip-address': ip}))
	feature_collection = geojson.FeatureCollection(points)
	with open(geojson_file, 'w') as file_h:
		serializers.JSON.dump(feature_collection, file_h, pretty=True)

开发者ID:rsmusllp，项目名称:king-phisher，代码行数:38，代码来源:export.py

示例4: sorted_bounds

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def sorted_bounds(disjoint=False,
                  max_value=50,
                  max_len=10,
                  remove_duplicates=False):
    if disjoint:
        # Since we accumulate later:
        max_value /= max_len

    s = strategies.lists(strategies.integers(min_value=0,
                                             max_value=max_value),
                         min_size=0, max_size=20)
    if disjoint:
        s = s.map(accumulate).map(list)

    # Select only cases with even-length lists
    s = s.filter(lambda x: len(x) % 2 == 0)

    # Convert to list of 2-tuples
    s = s.map(lambda x: [tuple(q)
                         for q in iterutils.chunked(sorted(x), size=2)])

    # Remove cases with zero-length intervals
    s = s.filter(lambda x: all([a[0] != a[1] for a in x]))

    if remove_duplicates:
        # (this will always succeed if disjoint=True)
        s = s.filter(lambda x: x == list(set(x)))

    # Sort intervals and result
    return s.map(sorted)


##
# Fake intervals
##

# TODO: isn't this duplicated with bounds_to_records??

开发者ID:AxFoundation，项目名称:strax，代码行数:39，代码来源:testutils.py

示例5: run

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def run(argv):
    def should_process(pheno):
        return PerPhenoParallelizer().should_process_pheno(
            pheno,
            get_input_filepaths = lambda pheno: pheno['assoc_files'],
            get_output_filepaths = lambda pheno: common_filepaths['parsed'](pheno['phenocode']),
        )
    idxs = [i for i,pheno in enumerate(get_phenolist()) if should_process(pheno)]
    if not idxs:
        print('All phenos are up-to-date!')
        exit(0)

    jobs = chunked(idxs, N_AT_A_TIME)
    sbatch_filepath = get_dated_tmp_path('slurm-parse') + '.sh'
    tmp_path = get_tmp_path('')
    with open(sbatch_filepath, 'w') as f:
        f.write('''\
#!/bin/bash
#SBATCH --cpus-per-task=4
#SBATCH --mem=1G
#SBATCH --time=5-0:0
#SBATCH --array=0-{n_jobs}
#SBATCH --output={tmp_path}/slurm-%j.out
#SBATCH --error={tmp_path}/slurm-%j.out

jobs=(
'''.format(n_jobs = len(jobs)-1, tmp_path=tmp_path))

        for job in jobs:
            f.write(','.join(map(str,job)) + '\n')
        f.write(')\n\n')
        f.write('export PHEWEB_DATADIR={!r}\n'.format(conf.data_dir))
        f.write(sys.argv[0] + ' conf num_procs=4 parse --phenos=${jobs[$SLURM_ARRAY_TASK_ID]}\n')
    print('Run:\nsbatch {}\n'.format(sbatch_filepath))
    print('Monitor with `squeue --long --array --job <jobid>`\n')
    print('output will be in {}/slurm-*.out'.format(tmp_path))

开发者ID:statgen，项目名称:pheweb，代码行数:38，代码来源:slurm-parse.py

示例6: aucell4r

# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def aucell4r(df_rnk: pd.DataFrame, signatures: Sequence[Type[GeneSignature]],
             auc_threshold: float = 0.05, noweights: bool = False, normalize: bool = False,
             num_workers: int = cpu_count()) -> pd.DataFrame:
    """
    Calculate enrichment of gene signatures for single cells.

    :param df_rnk: The rank matrix (n_cells x n_genes).
    :param signatures: The gene signatures or regulons.
    :param auc_threshold: The fraction of the ranked genome to take into account for the calculation of the
        Area Under the recovery Curve.
    :param noweights: Should the weights of the genes part of a signature be used in calculation of enrichment?
    :param normalize: Normalize the AUC values to a maximum of 1.0 per regulon.
    :param num_workers: The number of cores to use.
    :return: A dataframe with the AUCs (n_cells x n_modules).
    """
    if num_workers == 1:
        # Show progress bar ...
        aucs = pd.concat([enrichment4cells(df_rnk,
                                     module.noweights() if noweights else module,
                                     auc_threshold=auc_threshold) for module in tqdm(signatures)]).unstack("Regulon")
        aucs.columns = aucs.columns.droplevel(0)
    else:
        # Decompose the rankings dataframe: the index and columns are shared with the child processes via pickling.
        genes = df_rnk.columns.values
        cells = df_rnk.index.values
        # The actual rankings are shared directly. This is possible because during a fork from a parent process the child
        # process inherits the memory of the parent process. A RawArray is used instead of a synchronize Array because
        # these rankings are read-only.
        shared_ro_memory_array = RawArray(DTYPE_C, mul(*df_rnk.shape))
        array = np.frombuffer(shared_ro_memory_array, dtype=DTYPE)
        # Copy the contents of df_rank into this shared memory block using row-major ordering.
        array[:] = df_rnk.values.flatten(order='C')

        # The resulting AUCs are returned via a synchronize array.
        auc_mtx = Array('d', len(cells) * len(signatures))  # Double precision floats.

        # Convert the modules to modules with uniform weights if necessary.
        if noweights:
            signatures = list(map(lambda m: m.noweights(), signatures))

        # Do the analysis in separate child processes.
        chunk_size = ceil(float(len(signatures)) / num_workers)
        processes = [Process(target=_enrichment, args=(shared_ro_memory_array, chunk,
                                                       genes, cells, auc_threshold,
                                                       auc_mtx, (chunk_size*len(cells))*idx))
                     for idx, chunk in enumerate(chunked(signatures, chunk_size))]
        for p in processes:
            p.start()
        for p in processes:
            p.join()

        # Reconstitute the results array. Using C or row-major ordering.
        aucs = pd.DataFrame(data=np.ctypeslib.as_array(auc_mtx.get_obj()).reshape(len(signatures), len(cells)),
                            columns=pd.Index(data=cells, name='Cell'),
                            index=pd.Index(data=list(map(attrgetter("name"), signatures)), name='Regulon')).T
    return aucs/aucs.max(axis=0) if normalize else aucs

开发者ID:aertslab，项目名称:pySCENIC，代码行数:58，代码来源:aucell.py