本文整理汇总了Python中distributed.Executor.map方法的典型用法代码示例。如果您正苦于以下问题:Python Executor.map方法的具体用法?Python Executor.map怎么用?Python Executor.map使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类distributed.Executor
的用法示例。
在下文中一共展示了Executor.map方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test__futures_to_collection
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test__futures_to_collection(s, a, b):
e = Executor((s.ip, s.port), start=False)
yield e._start()
remote_dfs = e.map(identity, dfs)
ddf = yield _futures_to_collection(remote_dfs, divisions=True)
ddf2 = yield _futures_to_dask_dataframe(remote_dfs, divisions=True)
assert isinstance(ddf, dd.DataFrame)
assert ddf.dask == ddf2.dask
remote_arrays = e.map(np.arange, range(3, 5))
x = yield _futures_to_collection(remote_arrays)
y = yield _futures_to_dask_array(remote_arrays)
assert type(x) == type(y)
assert x.dask == y.dask
remote_lists = yield e._scatter([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = yield _futures_to_collection(remote_lists)
c = yield _futures_to_dask_bag(remote_lists)
assert type(b) == type(c)
assert b.dask == b.dask
yield e._shutdown()
示例2: dont_test_dataframes
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def dont_test_dataframes(s, a): # slow
pytest.importorskip('pandas')
n = 3000000
fn = '/tmp/test/file.csv'
with make_hdfs() as hdfs:
data = (b'name,amount,id\r\n' +
b'Alice,100,1\r\nBob,200,2\r\n' * n)
with hdfs.open(fn, 'w') as f:
f.write(data)
e = Executor((s.ip, s.port), start=False)
yield e._start()
futures = read_bytes(fn, hdfs=hdfs, delimiter=b'\r\n')
assert len(futures) > 1
def load(b, **kwargs):
assert b
from io import BytesIO
import pandas as pd
bio = BytesIO(b)
return pd.read_csv(bio, **kwargs)
dfs = e.map(load, futures, names=['name', 'amount', 'id'], skiprows=1)
dfs2 = yield e._gather(dfs)
assert sum(map(len, dfs2)) == n * 2 - 1
示例3: test_with_data
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test_with_data(s, a, b):
ss = HTTPScheduler(s)
ss.listen(0)
e = Executor((s.ip, s.port), start=False)
yield e._start()
L = e.map(inc, [1, 2, 3])
L2 = yield e._scatter(['Hello', 'world!'])
yield _wait(L)
client = AsyncHTTPClient()
response = yield client.fetch('http://localhost:%s/memory-load.json' %
ss.port)
out = json.loads(response.body.decode())
assert all(isinstance(v, int) for v in out.values())
assert set(out) == {a.address_string, b.address_string}
assert sum(out.values()) == sum(map(sys.getsizeof,
[1, 2, 3, 'Hello', 'world!']))
response = yield client.fetch('http://localhost:%s/memory-load-by-key.json'
% ss.port)
out = json.loads(response.body.decode())
assert set(out) == {a.address_string, b.address_string}
assert all(isinstance(v, dict) for v in out.values())
assert all(k in {'inc', 'data'} for d in out.values() for k in d)
assert all(isinstance(v, int) for d in out.values() for v in d.values())
assert sum(v for d in out.values() for v in d.values()) == \
sum(map(sys.getsizeof, [1, 2, 3, 'Hello', 'world!']))
ss.stop()
yield e._shutdown()
示例4: test_framework_runs
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test_framework_runs(self):
with MesosCluster() as cluster:
time.sleep(2)
driver = DistributedDriver().create_driver(DistributedScheduler)
driver.start()
time.sleep(5)
expect(cluster).to(have_activated_slaves(1))
expect(cluster).to(have_framework_name('distributed-framework'))
# distributed test - this probably doesnt belong here
executor = Executor('127.0.0.1:8787')
A = executor.map(lambda x: x**2, range(10))
B = executor.map(lambda x: -x, A)
total = executor.submit(sum, B)
expect(total.result()).to(equal(-285))
driver.stop()
示例5: test_no_divisions
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test_no_divisions(s, a, b):
e = Executor((s.ip, s.port), start=False)
yield e._start()
dfs = e.map(tm.makeTimeDataFrame, range(5, 10))
df = yield _futures_to_dask_dataframe(dfs)
assert not df.known_divisions
assert list(df.columns) == list(tm.makeTimeDataFrame(5).columns)
示例6: f
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def f(c, a, b):
e = Executor((c.ip, c.port), start=False)
IOLoop.current().spawn_callback(e._go)
remote_dfs = e.map(lambda x: x, dfs)
ddf = yield _futures_to_dask_dataframe(e, remote_dfs, divisions=True)
assert isinstance(ddf, dd.DataFrame)
assert ddf.divisions == (0, 30, 60, 80)
expr = ddf.x.sum()
result = yield e._get(expr.dask, expr._keys())
assert result == [sum([df.x.sum() for df in dfs])]
yield e._shutdown()
示例7: test__futures_to_dask_dataframe
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test__futures_to_dask_dataframe(s, a, b):
e = Executor((s.ip, s.port), start=False)
yield e._start()
remote_dfs = e.map(identity, dfs)
ddf = yield _futures_to_dask_dataframe(remote_dfs, divisions=True,
executor=e)
assert isinstance(ddf, dd.DataFrame)
assert ddf.divisions == (0, 30, 60, 80)
expr = ddf.x.sum()
result = yield e._get(expr.dask, expr._keys())
assert result == [sum([df.x.sum() for df in dfs])]
yield e._shutdown()
示例8: f
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def f(c, a, b):
e = Executor((c.ip, c.port), start=False, loop=loop)
yield e._start()
arrays = e.map(np.ones, [(5, 5)] * 6)
y = yield _stack(arrays, axis=0)
assert y.shape == (6, 5, 5)
assert y.chunks == ((1, 1, 1, 1, 1, 1), (5,), (5,))
y_results = yield e._get(y.dask, y._keys())
yy = da.Array._finalize(y, y_results)
assert isinstance(yy, np.ndarray)
assert yy.shape == y.shape
assert (yy == 1).all()
yield e._shutdown()
示例9: test__stack
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test__stack(s, a, b):
import dask.array as da
e = Executor((s.ip, s.port), start=False)
yield e._start()
arrays = e.map(np.ones, [(5, 5)] * 6)
y = yield _stack(arrays, axis=0)
assert y.shape == (6, 5, 5)
assert y.chunks == ((1, 1, 1, 1, 1, 1), (5,), (5,))
y_result = e.compute(y)
yy = yield y_result._result()
assert isinstance(yy, np.ndarray)
assert yy.shape == y.shape
assert (yy == 1).all()
yield e._shutdown()
示例10: test_dataframes
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
def test_dataframes(s, a, b):
e = Executor((s.ip, s.port), start=False)
yield e._start()
dfs = [pd.DataFrame({'x': np.random.random(100),
'y': np.random.random(100)},
index=list(range(i, i + 100)))
for i in range(0, 100*10, 100)]
remote_dfs = e.map(lambda x: x, dfs)
rdf = yield _futures_to_dask_dataframe(remote_dfs, divisions=True)
name = 'foo'
ldf = dd.DataFrame({(name, i): df for i, df in enumerate(dfs)},
name, dfs[0].columns,
list(range(0, 1000, 100)) + [999])
assert rdf.divisions == ldf.divisions
remote = e.compute(rdf)
result = yield remote._result()
tm.assert_frame_equal(result,
ldf.compute(get=dask.get))
exprs = [lambda df: df.x.mean(),
lambda df: df.y.std(),
lambda df: df.assign(z=df.x + df.y).drop_duplicates(),
lambda df: df.index,
lambda df: df.x,
lambda df: df.x.cumsum(),
lambda df: df.loc[50:75]]
for f in exprs:
local = f(ldf).compute(get=dask.get)
remote = e.compute(f(rdf))
remote = yield gen.with_timeout(timedelta(seconds=5), remote._result())
assert_equal(local, remote)
yield e._shutdown()
示例11: DistributedContext
# 需要导入模块: from distributed import Executor [as 别名]
# 或者: from distributed.Executor import map [as 别名]
class DistributedContext(object):
io_loop = None
io_thread = None
def __init__(self,
ip="127.0.0.1",
port=8787,
spawn_workers=0,
write_partial_results=None,
track_progress=False,
time_limit=None,
job_observer=None):
"""
:type ip: string
:type port: int
:type spawn_workers: int
:type write_partial_results: int
:type track_progress: bool
:type time_limit: int
:type job_observer: JobObserver
"""
self.worker_count = spawn_workers
self.ip = ip
self.port = port
self.active = False
self.write_partial_results = write_partial_results
self.track_progress = track_progress
self.execution_count = 0
self.timeout = TimeoutManager(time_limit) if time_limit else None
self.job_observer = job_observer
if not DistributedContext.io_loop:
DistributedContext.io_loop = IOLoop()
DistributedContext.io_thread = Thread(
target=DistributedContext.io_loop.start)
DistributedContext.io_thread.daemon = True
DistributedContext.io_thread.start()
if spawn_workers > 0:
self.scheduler = self._create_scheduler()
self.workers = [self._create_worker()
for i in xrange(spawn_workers)]
time.sleep(0.5) # wait for workers to spawn
self.executor = Executor((ip, port))
def run(self, domain,
worker_reduce_fn, worker_reduce_init,
global_reduce_fn, global_reduce_init):
size = domain.steps
assert size is not None # TODO: Iterators without size
workers = 0
for name, value in self.executor.ncores().items():
workers += value
if workers == 0:
raise Exception("There are no workers")
batch_count = workers * 4
batch_size = max(int(round(size / float(batch_count))), 1)
batches = self._create_batches(batch_size, size, domain,
worker_reduce_fn, worker_reduce_init)
logging.info("Qit: starting {} batches with size {}".format(
batch_count, batch_size))
if self.job_observer:
self.job_observer.on_computation_start(batch_count, batch_size)
futures = self.executor.map(process_batch, batches)
if self.track_progress:
distributed.diagnostics.progress(futures)
if self.write_partial_results is not None:
result_saver = ResultSaver(self.execution_count,
self.write_partial_results)
else:
result_saver = None
timeouted = False
results = []
for future in as_completed(futures):
job = future.result()
if result_saver:
result_saver.handle_result(job.result)
if self.job_observer:
self.job_observer.on_job_completed(job)
results.append(job.result)
if self.timeout and self.timeout.is_finished():
logging.info("Qit: timeouted after {} seconds".format(
self.timeout.timeout))
timeouted = True
break
#.........这里部分代码省略.........