本文整理汇总了Python中boltons.iterutils.chunked方法的典型用法代码示例。如果您正苦于以下问题:Python iterutils.chunked方法的具体用法?Python iterutils.chunked怎么用?Python iterutils.chunked使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boltons.iterutils
的用法示例。
在下文中一共展示了iterutils.chunked方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_crossval_split
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def _get_crossval_split(stimuli, fixations, split_count, included_splits, random=True, stratified_attributes=None):
if stratified_attributes is not None:
return _get_stratified_crossval_split(stimuli, fixations, split_count, included_splits, random=random, stratified_attributes=stratified_attributes)
inds = list(range(len(stimuli)))
if random:
print("Using random shuffles for crossvalidation")
rst = np.random.RandomState(seed=42)
rst.shuffle(inds)
inds = list(inds)
size = int(np.ceil(len(inds) / split_count))
chunks = chunked(inds, size=size)
inds = []
for split_nr in included_splits:
inds.extend(chunks[split_nr])
stimuli, fixations = create_subset(stimuli, fixations, inds)
return stimuli, fixations
示例2: __init__
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def __init__(self, data_source, batch_size=1, ratio_used=1.0, shuffle=True):
self.ratio_used = ratio_used
self.shuffle = shuffle
shapes = data_source.get_shapes()
unique_shapes = sorted(set(shapes))
shape_indices = [[] for shape in unique_shapes]
for k, shape in enumerate(shapes):
shape_indices[unique_shapes.index(shape)].append(k)
if self.shuffle:
for indices in shape_indices:
random.shuffle(indices)
self.batches = sum([chunked(indices, size=batch_size) for indices in shape_indices], [])
示例3: campaign_visits_to_geojson
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def campaign_visits_to_geojson(rpc, campaign_id, geojson_file):
"""
Export the geo location information for all the visits of a campaign into
the `GeoJSON <http://geojson.org/>`_ format.
:param rpc: The connected RPC instance to load the information with.
:type rpc: :py:class:`.KingPhisherRPCClient`
:param campaign_id: The ID of the campaign to load the information for.
:param str geojson_file: The destination file for the GeoJSON data.
"""
ips_for_georesolution = {}
ip_counter = collections.Counter()
for visit_node in _get_graphql_campaign_visits(rpc, campaign_id):
visit = visit_node['node']
ip_counter.update((visit['ip'],))
visitor_ip = ipaddress.ip_address(visit['ip'])
if not isinstance(visitor_ip, ipaddress.IPv4Address):
continue
if visitor_ip.is_loopback or visitor_ip.is_private:
continue
if not visitor_ip in ips_for_georesolution:
ips_for_georesolution[visitor_ip] = visit['firstSeen']
elif ips_for_georesolution[visitor_ip] > visit['firstSeen']:
ips_for_georesolution[visitor_ip] = visit['firstSeen']
ips_for_georesolution = [ip for (ip, _) in sorted(ips_for_georesolution.items(), key=lambda x: x[1])]
locations = {}
for ip_addresses in iterutils.chunked(ips_for_georesolution, 50):
locations.update(rpc.geoip_lookup_multi(ip_addresses))
points = []
for ip, location in locations.items():
if not (location.coordinates and location.coordinates[0] and location.coordinates[1]):
continue
points.append(geojson.Feature(geometry=location, properties={'count': ip_counter[ip], 'ip-address': ip}))
feature_collection = geojson.FeatureCollection(points)
with open(geojson_file, 'w') as file_h:
serializers.JSON.dump(feature_collection, file_h, pretty=True)
示例4: sorted_bounds
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def sorted_bounds(disjoint=False,
max_value=50,
max_len=10,
remove_duplicates=False):
if disjoint:
# Since we accumulate later:
max_value /= max_len
s = strategies.lists(strategies.integers(min_value=0,
max_value=max_value),
min_size=0, max_size=20)
if disjoint:
s = s.map(accumulate).map(list)
# Select only cases with even-length lists
s = s.filter(lambda x: len(x) % 2 == 0)
# Convert to list of 2-tuples
s = s.map(lambda x: [tuple(q)
for q in iterutils.chunked(sorted(x), size=2)])
# Remove cases with zero-length intervals
s = s.filter(lambda x: all([a[0] != a[1] for a in x]))
if remove_duplicates:
# (this will always succeed if disjoint=True)
s = s.filter(lambda x: x == list(set(x)))
# Sort intervals and result
return s.map(sorted)
##
# Fake intervals
##
# TODO: isn't this duplicated with bounds_to_records??
示例5: run
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def run(argv):
def should_process(pheno):
return PerPhenoParallelizer().should_process_pheno(
pheno,
get_input_filepaths = lambda pheno: pheno['assoc_files'],
get_output_filepaths = lambda pheno: common_filepaths['parsed'](pheno['phenocode']),
)
idxs = [i for i,pheno in enumerate(get_phenolist()) if should_process(pheno)]
if not idxs:
print('All phenos are up-to-date!')
exit(0)
jobs = chunked(idxs, N_AT_A_TIME)
sbatch_filepath = get_dated_tmp_path('slurm-parse') + '.sh'
tmp_path = get_tmp_path('')
with open(sbatch_filepath, 'w') as f:
f.write('''\
#!/bin/bash
#SBATCH --cpus-per-task=4
#SBATCH --mem=1G
#SBATCH --time=5-0:0
#SBATCH --array=0-{n_jobs}
#SBATCH --output={tmp_path}/slurm-%j.out
#SBATCH --error={tmp_path}/slurm-%j.out
jobs=(
'''.format(n_jobs = len(jobs)-1, tmp_path=tmp_path))
for job in jobs:
f.write(','.join(map(str,job)) + '\n')
f.write(')\n\n')
f.write('export PHEWEB_DATADIR={!r}\n'.format(conf.data_dir))
f.write(sys.argv[0] + ' conf num_procs=4 parse --phenos=${jobs[$SLURM_ARRAY_TASK_ID]}\n')
print('Run:\nsbatch {}\n'.format(sbatch_filepath))
print('Monitor with `squeue --long --array --job <jobid>`\n')
print('output will be in {}/slurm-*.out'.format(tmp_path))
示例6: aucell4r
# 需要导入模块: from boltons import iterutils [as 别名]
# 或者: from boltons.iterutils import chunked [as 别名]
def aucell4r(df_rnk: pd.DataFrame, signatures: Sequence[Type[GeneSignature]],
auc_threshold: float = 0.05, noweights: bool = False, normalize: bool = False,
num_workers: int = cpu_count()) -> pd.DataFrame:
"""
Calculate enrichment of gene signatures for single cells.
:param df_rnk: The rank matrix (n_cells x n_genes).
:param signatures: The gene signatures or regulons.
:param auc_threshold: The fraction of the ranked genome to take into account for the calculation of the
Area Under the recovery Curve.
:param noweights: Should the weights of the genes part of a signature be used in calculation of enrichment?
:param normalize: Normalize the AUC values to a maximum of 1.0 per regulon.
:param num_workers: The number of cores to use.
:return: A dataframe with the AUCs (n_cells x n_modules).
"""
if num_workers == 1:
# Show progress bar ...
aucs = pd.concat([enrichment4cells(df_rnk,
module.noweights() if noweights else module,
auc_threshold=auc_threshold) for module in tqdm(signatures)]).unstack("Regulon")
aucs.columns = aucs.columns.droplevel(0)
else:
# Decompose the rankings dataframe: the index and columns are shared with the child processes via pickling.
genes = df_rnk.columns.values
cells = df_rnk.index.values
# The actual rankings are shared directly. This is possible because during a fork from a parent process the child
# process inherits the memory of the parent process. A RawArray is used instead of a synchronize Array because
# these rankings are read-only.
shared_ro_memory_array = RawArray(DTYPE_C, mul(*df_rnk.shape))
array = np.frombuffer(shared_ro_memory_array, dtype=DTYPE)
# Copy the contents of df_rank into this shared memory block using row-major ordering.
array[:] = df_rnk.values.flatten(order='C')
# The resulting AUCs are returned via a synchronize array.
auc_mtx = Array('d', len(cells) * len(signatures)) # Double precision floats.
# Convert the modules to modules with uniform weights if necessary.
if noweights:
signatures = list(map(lambda m: m.noweights(), signatures))
# Do the analysis in separate child processes.
chunk_size = ceil(float(len(signatures)) / num_workers)
processes = [Process(target=_enrichment, args=(shared_ro_memory_array, chunk,
genes, cells, auc_threshold,
auc_mtx, (chunk_size*len(cells))*idx))
for idx, chunk in enumerate(chunked(signatures, chunk_size))]
for p in processes:
p.start()
for p in processes:
p.join()
# Reconstitute the results array. Using C or row-major ordering.
aucs = pd.DataFrame(data=np.ctypeslib.as_array(auc_mtx.get_obj()).reshape(len(signatures), len(cells)),
columns=pd.Index(data=cells, name='Cell'),
index=pd.Index(data=list(map(attrgetter("name"), signatures)), name='Regulon')).T
return aucs/aucs.max(axis=0) if normalize else aucs