本文整理匯總了Python中mpi4py.MPI.IN_PLACE屬性的典型用法代碼示例。如果您正苦於以下問題:Python MPI.IN_PLACE屬性的具體用法?Python MPI.IN_PLACE怎麽用?Python MPI.IN_PLACE使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類mpi4py.MPI
的用法示例。
在下文中一共展示了MPI.IN_PLACE屬性的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _get_minmax_coordinates_mesh
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def _get_minmax_coordinates_mesh(self, axis=0):
""" Return the minimum and maximum coordinates along axis
parameter:
----------
axis:
axis
returns:
-------
tuple: minV, maxV
"""
maxVal = np.zeros((1))
minVal = np.zeros((1))
maxVal[0] = self.Model.mesh.data[:, axis].max()
minVal[0] = self.Model.mesh.data[:, axis].min()
comm.Barrier()
comm.Allreduce(_MPI.IN_PLACE, maxVal, op=_MPI.MAX)
comm.Allreduce(_MPI.IN_PLACE, minVal, op=_MPI.MIN)
comm.Barrier()
return minVal, maxVal
示例2: reduce
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def reduce(self, input, root=0):
"""Reduce operation in-place.
Sums input across all nodes in root node.
Args:
input (array): input array.
root (int): root node rank.
"""
if self.size > 1:
cpu_input = to_device(input, cpu_device)
if self.rank == root:
self.mpi_comm.Reduce(MPI.IN_PLACE, cpu_input, root=root)
copyto(input, cpu_input)
else:
self.mpi_comm.Reduce(cpu_input, None, root=root)
示例3: safeAllreduceInPlace
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def safeAllreduceInPlace(comm, in_array):
shape = in_array.shape
length = len(shape)
# just use 16 for blocksize, size of complex(double)
chunk_size = get_max_blocksize_from_mem(list(shape),16.,MEM_SIZE,priority_list=numpy.arange(length)[::-1])
task_list = generate_task_list(chunk_size,shape)
for block in task_list:
which_slice = [slice(*x) for x in block]
tmp = in_array[tuple(which_slice)].copy()
comm.Allreduce(MPI.IN_PLACE, tmp, op=MPI.SUM)
in_array[tuple(which_slice)] = tmp
示例4: _get_minmax_velocity_wall
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def _get_minmax_velocity_wall(self, wall, axis=0):
""" Return the minimum and maximum velocity component on the wall
parameters:
-----------
wall: (indexSet)
The wall.
axis:
axis (velocity component).
"""
# Initialise value to max and min sys values
maxV = np.ones((1)) * sys.float_info.min
minV = np.ones((1)) * sys.float_info.max
# if local domain has wall, get velocities
if wall.data.size > 0:
velocities = self.Model.velocityField.data[wall.data, axis]
# get local min and max
maxV[0] = velocities.max()
minV[0] = velocities.min()
# reduce operation
comm.Barrier()
comm.Allreduce(_MPI.IN_PLACE, maxV, op=_MPI.MAX)
comm.Allreduce(_MPI.IN_PLACE, minV, op=_MPI.MIN)
comm.Barrier()
return minV, maxV
示例5: fof_find_peaks
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def fof_find_peaks(source, label, comm,
position='Position', column='Density'):
"""
Find position of the peak (maximum) from a given column for a fof result.
"""
Nhalo0 = max(comm.allgather(label.max())) + 1
N = numpy.bincount(label, minlength=Nhalo0)
comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM)
return hpos
示例6: count
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def count(label, comm=MPI.COMM_WORLD):
"""
Count the number of particles of the same label.
This is a collective operation, and after the call, all ranks
will have the particle count.
Parameters
----------
label : array_like (integers)
Halo label of particles, >=0
comm : :py:class:`MPI.Comm`
communicator for the collective operation.
Returns
-------
count : array_like
the count of number of particles in each halo
"""
Nhalo0 = max(comm.allgather(label.max())) + 1
N = numpy.bincount(label, minlength=Nhalo0)
comm.Allreduce(MPI.IN_PLACE, N, op=MPI.SUM)
return N
示例7: __reduce_like
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def __reduce_like(self, func, sendbuf, recvbuf, *args, **kwargs):
sbuf = None
rbuf = None
buf = None
# unpack the send buffer if it is a HeAT tensor
if isinstance(sendbuf, dndarray.DNDarray):
sendbuf = sendbuf._DNDarray__array
# unpack the receive buffer if it is a HeAT tensor
if isinstance(recvbuf, dndarray.DNDarray):
recvbuf = recvbuf._DNDarray__array
# harmonize the input and output buffers
# MPI requires send and receive buffers to be of same type and length. If the torch tensors are either not both
# contiguous or differently strided, they have to be made matching (if possible) first.
if isinstance(sendbuf, torch.Tensor):
# convert the send buffer to a pointer, number of elements and type are identical to the receive buffer
dummy = (
sendbuf.contiguous()
) # make a contiguous copy and reassign the storage, old will be collected
sendbuf.set_(
dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
)
sbuf = sendbuf if CUDA_AWARE_MPI else sendbuf.cpu()
sendbuf = self.as_buffer(sbuf)
if isinstance(recvbuf, torch.Tensor):
buf = recvbuf
# nothing matches, the buffers have to be made contiguous
dummy = recvbuf.contiguous()
recvbuf.set_(
dummy.storage(), dummy.storage_offset(), size=dummy.shape, stride=dummy.stride()
)
rbuf = recvbuf if CUDA_AWARE_MPI else recvbuf.cpu()
if sendbuf is MPI.IN_PLACE:
recvbuf = self.as_buffer(rbuf)
else:
recvbuf = (self.as_mpi_memory(rbuf), sendbuf[1], sendbuf[2])
# perform the actual reduction operation
return func(sendbuf, recvbuf, *args, **kwargs), sbuf, rbuf, buf
示例8: allreduce
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def allreduce(self, input):
"""All reduce operation in-place.
Sums input across all nodes and broadcast back to each node.
Args:
input (array): input array.
"""
if self.size > 1:
if config.nccl_enabled:
device = get_device(input)
devices = self.mpi_comm.allgather(device.id)
if all([d >= 0 for d in devices]):
nccl_comm = self._get_nccl_comm(device, devices)
nccl_dtype, nccl_size = self._get_nccl_dtype_size(input)
with device:
nccl_comm.allReduce(input.data.ptr,
input.data.ptr,
nccl_size, nccl_dtype,
nccl.NCCL_SUM,
cp.cuda.Stream.null.ptr)
return
cpu_input = to_device(input, cpu_device)
self.mpi_comm.Allreduce(MPI.IN_PLACE, cpu_input)
copyto(input, cpu_input)
示例9: Wooov
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def Wooov(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wklid = np.zeros((nkpts,nkpts,nkpts,nocc,nocc,nocc,nvir),dtype=t2.dtype)
else:
Wklid = fint['Wooov']
# TODO can do much better than this... call recursive function
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
ooov_tmp_size = BLKSIZE + (nocc,nocc,nocc,nvir)
ooov_tmp = np.empty(ooov_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
eris_ooov_kli = _cp(eris.ooov[s0,s1,s2])
eris_oovv_kli = _cp(eris.oovv[s0,s1,s2])
for iterkk,kk in enumerate(ranges0):
for iterkl,kl in enumerate(ranges1):
for iterki,ki in enumerate(ranges2):
kd = kconserv[kk,ki,kl]
ooov_tmp[iterkk,iterkl,iterki] = eris_ooov_kli[iterkk,iterkl,iterki].copy()
ooov_tmp[iterkk,iterkl,iterki] += einsum('ic,klcd->klid',t1[ki],eris_oovv_kli[iterkk,iterkl,iterki])
Wklid[s0,s1,s2] = ooov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wklid, op=MPI.SUM)
return Wklid
示例10: Wvovv
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def Wvovv(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Walcd = np.zeros((nkpts,nkpts,nkpts,nvir,nocc,nvir,nvir),dtype=t2.dtype)
else:
Walcd = fint['Wvovv']
# TODO can do much better than this... call recursive function
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nvir*nocc*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
vovv_tmp_size = BLKSIZE + (nvir,nocc,nvir,nvir)
vovv_tmp = np.empty(vovv_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
eris_vovv_alc = _cp(eris.vovv[s0,s1,s2])
eris_oovv_alc = _cp(eris.oovv[s0,s1,s2])
for iterka,ka in enumerate(ranges0):
for iterkl,kl in enumerate(ranges1):
for iterkc,kc in enumerate(ranges2):
kd = kconserv[ka,kc,kl]
# vovv[ka,kl,kc,kd] <= ovvv[kl,ka,kd,kc].transpose(1,0,3,2)
vovv_tmp[iterka,iterkl,iterkc] = eris_vovv_alc[iterka,iterkl,iterkc] #np.array(eris.ovvv[kl,ka,kd]).transpose(1,0,3,2)
vovv_tmp[iterka,iterkl,iterkc] += -einsum('ka,klcd->alcd',t1[ka],eris_oovv_alc[iterka,iterkl,iterkc])
Walcd[s0,s1,s2] = vovv_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Walcd, op=MPI.SUM)
return Walcd
示例11: W1ovvo
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def W1ovvo(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype)
else:
Wkaci = fint['W1ovvo']
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc)
ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
eris_ovvo_kac = _cp(eris.ovvo[s0,s1,s2])
eris_oovv_kXc = _cp(eris.oovv[s0,:,s2])
eris_oovv_Xkc = _cp(eris.oovv[:,s0,s2])
for iterkk,kk in enumerate(ranges0):
for iterka,ka in enumerate(ranges1):
for iterkc,kc in enumerate(ranges2):
ki = kconserv[kk,kc,ka]
ovvo_tmp[iterkk,iterka,iterkc] = _cp(eris_ovvo_kac[iterkk,iterka,iterkc])
#St2 = 2.*t2[ki,:,ka]
St2 = 2.*unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)])
#St2 -= t2[:,ki,ka].transpose(0,2,1,3,4)
St2 -= unpack_tril(t2,nkpts,range(nkpts),ki,ka,kconserv[range(nkpts),ka,ki]).transpose(0,2,1,3,4)
ovvo_tmp[iterkk,iterka,iterkc] += einsum('klcd,ilad->kaci',eris_oovv_kXc[iterkk,:,iterkc].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir),
St2.transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
ovvo_tmp[iterkk,iterka,iterkc] += -einsum('lkcd,ilad->kaci',eris_oovv_Xkc[:,iterkk,iterkc].reshape(nocc*nkpts,nocc,nvir,nvir),
unpack_tril(t2,nkpts,ki,range(nkpts),ka,kconserv[ki,ka,range(nkpts)]).transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
# t2[ki,:,ka].transpose(1,0,2,3,4).reshape(nocc,nkpts*nocc,nvir,nvir))
Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)
return Wkaci
示例12: W2ovvo
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def W2ovvo(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t1.dtype)
WWooov = Wooov(cc,t1,t2,eris)
else:
Wkaci = fint['W2ovvo']
WWooov = fint['Wooov']
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nvir*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
ovvo_tmp_size = BLKSIZE + (nocc,nvir,nvir,nocc)
ovvo_tmp = np.empty(ovvo_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
Wooov_akX = _cp(WWooov[s1,s0])
eris_ovvv_kac = _cp(eris.ovvv[s0,s1,s2])
for iterkk,kk in enumerate(ranges0):
for iterka,ka in enumerate(ranges1):
for iterkc,kc in enumerate(ranges2):
ki = kconserv[kk,kc,ka]
ovvo_tmp[iterkk,iterka,iterkc] = einsum('la,lkic->kaci',-t1[ka],Wooov_akX[iterka,iterkk,ki])
ovvo_tmp[iterkk,iterka,iterkc] += einsum('akdc,id->kaci',eris_ovvv_kac[iterkk,iterka,iterkc].transpose(1,0,3,2),t1[ki])
Wkaci[s0,s1,s2] = ovvo_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)
return Wkaci
示例13: Wovvo
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def Wovvo(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wkaci = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nvir,nocc),dtype=t2.dtype)
W1kaci = W1ovvo(cc,t1,t2,eris,fint)
W2kaci = W2ovvo(cc,t1,t2,eris,fint)
else:
Wkaci = fint['Wovvo']
W1kaci = fint['W1ovvo']
W2kaci = fint['W2ovvo']
# TODO can do much better than this... call recursive function
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
Wkaci[s0,s1,s2] = _cp(W1kaci[s0,s1,s2]) + _cp(W2kaci[s0,s1,s2])
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wkaci, op=MPI.SUM)
return Wkaci
示例14: W2ovov
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def W2ovov(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype)
WWooov = Wooov(cc,t1,t2,eris)
else:
Wkbid = fint['W2ovov']
WWooov = fint['Wooov']
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nvir*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir)
ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
eris_ovvv = _cp(eris.ovvv[s0,s1,s2])
WWooov_kbi = _cp(WWooov[s0,s1,s2])
for iterkk,kk in enumerate(ranges0):
for iterkb,kb in enumerate(ranges1):
for iterki,ki in enumerate(ranges2):
kd = kconserv[kk,ki,kb]
ovov_tmp[iterkk,iterkb,iterki] = einsum('klid,lb->kbid',WWooov_kbi[iterkk,iterkb,iterki],-t1[kb])
ovov_tmp[iterkk,iterkb,iterki] += einsum('kbcd,ic->kbid',eris_ovvv[iterkk,iterkb,iterki],t1[ki])
Wkbid[s0,s1,s2] = ovov_tmp[:len(ranges0),:len(ranges1),:len(ranges2)]
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM)
return Wkbid
示例15: Wovov
# 需要導入模塊: from mpi4py import MPI [as 別名]
# 或者: from mpi4py.MPI import IN_PLACE [as 別名]
def Wovov(cc,t1,t2,eris,fint=None):
nkpts, nocc, nvir = t1.shape
kconserv = cc.kconserv
if fint is None:
Wkbid = np.zeros((nkpts,nkpts,nkpts,nocc,nvir,nocc,nvir),dtype=t2.dtype)
WW1ovov = W1ovov(cc,t1,t2,eris)
WW2ovov = W2ovov(cc,t1,t2,eris)
else:
Wkbid = fint['Wovov']
WW1ovov = fint['W1ovov']
WW2ovov = fint['W2ovov']
# Adaptive blocking begins here
mem = 0.5e9
pre = 1.*nocc*nocc*nvir*nvir*nkpts*16
nkpts_blksize = min(max(int(np.floor(mem/pre)),1),nkpts)
nkpts_blksize2 = min(max(int(np.floor(mem/(pre*nkpts_blksize))),1),nkpts)
BLKSIZE = (nkpts_blksize2,nkpts_blksize,nkpts,)
loader = mpi_load_balancer.load_balancer(BLKSIZE=BLKSIZE)
loader.set_ranges((range(nkpts),range(nkpts),range(nkpts),))
# Adaptive blocking ends here
ovov_tmp_size = BLKSIZE + (nocc,nvir,nocc,nvir)
ovov_tmp = np.empty(ovov_tmp_size,dtype=t2.dtype)
good2go = True
while(good2go):
good2go, data = loader.slave_set()
if good2go is False:
break
ranges0, ranges1, ranges2 = loader.get_blocks_from_data(data)
s0,s1,s2 = [slice(min(x),max(x)+1) for x in (ranges0,ranges1,ranges2)]
Wkbid[s0,s1,s2] = _cp(WW1ovov[s0,s1,s2]) + _cp(WW2ovov[s0,s1,s2])
loader.slave_finished()
comm.Barrier()
if fint is None:
comm.Allreduce(MPI.IN_PLACE, Wkbid, op=MPI.SUM)
return Wkbid