本文整理汇总了C++中el::DistMatrix类的典型用法代码示例。如果您正苦于以下问题:C++ DistMatrix类的具体用法?C++ DistMatrix怎么用?C++ DistMatrix使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DistMatrix类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: compare_result
void compare_result(size_t rank, DistMatrixType &expected_A,
El::DistMatrix<double, El::STAR, El::STAR> &result) {
col_t &data = expected_A.seq();
const size_t my_row_offset = skylark::utility::cb_my_row_offset(expected_A);
const size_t my_col_offset = skylark::utility::cb_my_col_offset(expected_A);
for(typename col_t::SpColIter col = data.begcol();
col != data.endcol(); col++) {
for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
nz != data.endnz(col); nz++) {
const size_t rowid = nz.rowid() + my_row_offset;
const size_t colid = col.colid() + my_col_offset;
const double value = nz.value();
if(value != result.GetLocal(rowid, colid)) {
std::ostringstream os;
os << rank << ": " << rowid << ", " << colid << ": "
<< value << " != "
<< result.GetLocal(rowid, colid) << std::endl;
std::cout << os.str() << std::flush;
BOOST_FAIL("Result application not as expected");
}
}
}
}
示例2: SymmetricEuclideanDistanceMatrix
void SymmetricEuclideanDistanceMatrix(El::UpperOrLower uplo, direction_t dir,
T alpha, const El::ElementalMatrix<T> &A,
T beta, El::ElementalMatrix<T> &C) {
T *c = C.Buffer();
int ldC = C.LDim();
if (dir == base::COLUMNS) {
El::Herk(uplo, El::ADJOINT, -2.0 * alpha, A, beta, C);
//El::Gemm(El::ADJOINT, El::NORMAL, T(-2.0) * alpha, A, A, beta, C);
El::DistMatrix<El::Base<T>, El::STAR, El::STAR > N;
ColumnNrm2(A, N);
El::Base<T> *nn = N.Buffer();;
El::Int n = C.LocalWidth();
El::Int m = C.LocalHeight();
for(El::Int j = 0; j < n; j++)
for(El::Int i =
((uplo == El::UPPER) ? 0 : C.LocalRowOffset(A.GlobalCol(j)));
i < ((uplo == El::UPPER) ? C.LocalRowOffset(A.GlobalCol(j) + 1) : m); i++) {
El::Base<T> a = nn[C.GlobalRow(i)];
El::Base<T> b = nn[C.GlobalCol(j)];
c[j * ldC + i] += alpha * (a * a + b * b);
}
}
// TODO the rest of the cases.
}
示例3: mixed_gemm_local_part_nn
inline void mixed_gemm_local_part_nn (
const double alpha,
const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
const double beta,
std::vector<value_type> &local_matrix) {
typedef SpDCCols< index_type, value_type > col_t;
typedef SpParMat< index_type, value_type, col_t > matrix_type;
matrix_type &_A = const_cast<matrix_type&>(A);
col_t &data = _A.seq();
//FIXME
local_matrix.resize(S.Width() * data.getnrow(), 0);
size_t cb_col_offset = utility::cb_my_col_offset(A);
for(typename col_t::SpColIter col = data.begcol();
col != data.endcol(); col++) {
for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
nz != data.endnz(col); nz++) {
// we want local index here to fill local dense matrix
index_type rowid = nz.rowid();
// column needs to be global
index_type colid = col.colid() + cb_col_offset;
// compute application of S to yield a partial row in the result.
for(size_t bcol = 0; bcol < S.Width(); ++bcol) {
local_matrix[rowid * S.Width() + bcol] +=
alpha * S.Get(colid, bcol) * nz.value();
}
}
}
}
示例4: Gemv
inline void Gemv(El::Orientation oA,
T alpha, const El::DistMatrix<T, El::VC, El::STAR>& A,
const El::DistMatrix<T, El::STAR, El::STAR>& x,
El::DistMatrix<T, El::VC, El::STAR>& y) {
int y_height = (oA == El::NORMAL ? A.Height() : A.Width());
El::Zeros(y, y_height, 1);
base::Gemv(oA, alpha, A, x, T(0), y);
}
示例5: outer_panel_mixed_gemm_impl_nn
inline void outer_panel_mixed_gemm_impl_nn(
const double alpha,
const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
const double beta,
El::DistMatrix<value_type, col_d, El::STAR> &C) {
utility::combblas_slab_view_t<index_type, value_type> cbview(A, false);
//FIXME: factor
size_t slab_size = 2 * C.Grid().Height();
for(size_t cur_row_idx = 0; cur_row_idx < cbview.nrows();
cur_row_idx += slab_size) {
size_t cur_slab_size =
std::min(slab_size, cbview.nrows() - cur_row_idx);
// get the next slab_size columns of B
El::DistMatrix<value_type, col_d, El::STAR>
A_row(cur_slab_size, S.Height());
cbview.extract_elemental_row_slab_view(A_row, cur_slab_size);
// assemble the distributed column vector
for(size_t l_col_idx = 0; l_col_idx < A_row.LocalWidth();
l_col_idx++) {
size_t g_col_idx = l_col_idx * A_row.RowStride()
+ A_row.RowShift();
for(size_t l_row_idx = 0; l_row_idx < A_row.LocalHeight();
++l_row_idx) {
size_t g_row_idx = l_row_idx * A_row.ColStride()
+ A_row.ColShift() + cur_row_idx;
A_row.SetLocal(l_row_idx, l_col_idx,
cbview(g_row_idx, g_col_idx));
}
}
El::DistMatrix<value_type, col_d, El::STAR>
C_slice(cur_slab_size, C.Width());
El::View(C_slice, C, cur_row_idx, 0, cur_slab_size, C.Width());
El::LocalGemm(El::NORMAL, El::NORMAL, alpha, A_row, S,
beta, C_slice);
}
}
示例6: elstar2vec
/*
* Convert an elemental STAR, STAR distributed vector toa std::vector
*/
int elstar2vec(const El::DistMatrix<El::Complex<double>,El::STAR,El::STAR> &Y, std::vector<double> &vec){
const El::Complex<double> *Y_ptr = Y.LockedBuffer();
int sz = vec.size()/2;
#pragma omp parallel for
for(int i=0;i<sz; i++){
vec[2*i] = El::RealPart(Y_ptr[i]);
vec[2*i+1] = El::ImagPart(Y_ptr[i]);
}
return 0;
}
示例7: vec2elstar
/*
* Convert a std::vector to an elemental STAR, STAR distributed vector
*/
int vec2elstar(const std::vector<double> &vec, El::DistMatrix<El::Complex<double>,El::STAR,El::STAR > &Y){
El::Complex<double> *Y_ptr = Y.Buffer();
int sz = vec.size()/2;
#pragma omp parallel for
for(int i=0;i<sz; i++){
double real = vec[2*i];
double imag = vec[2*i+1];
El::SetRealPart(Y_ptr[i],real);
El::SetImagPart(Y_ptr[i],imag);
}
return 0;
}
示例8: elemental2vec
int elemental2vec(const El::DistMatrix<El::Complex<double>,El::VC,El::STAR> &Y, std::vector<double> &vec){
assert((Y.DistData().colDist == El::STAR) and (Y.DistData().rowDist == El::VC));
int data_dof=2;
int SCAL_EXP = 1;
//double *pt_array,*pt_perm_array;
int r,q,ll,rq; // el vec info
int nbigs; //Number of large recv (i.e. recv 1 extra data point)
int pstart; // p_id of nstart
int rank = El::mpi::WorldRank(); //p_id
int recv_size; // base recv size
bool print = (rank == -1);
// Get el vec info
ll = Y.Height();
const El::Grid* g = &(Y.Grid());
r = g->Height();
q = g->Width();
MPI_Comm comm = (g->Comm()).comm;
int cheb_deg = InvMedTree<FMM_Mat_t>::cheb_deg;
int omp_p=omp_get_max_threads();
size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6;
// Get petsc vec params
//VecGetLocalSize(pt_vec,&nlocal);
int nlocal = (vec.size())/data_dof;
if(print) std::cout << "m: " << std::endl;
int nstart = 0;
//VecGetArray(pt_vec,&pt_array);
//VecGetOwnershipRange(pt_vec,&nstart,NULL);
MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);
// Determine who owns the first element we want
rq = r * q;
pstart = nstart % rq;
nbigs = nlocal % rq;
recv_size = nlocal / rq;
if(print){
std::cout << "r: " << r << " q: " << q <<std::endl;
std::cout << "nstart: " << nstart << std::endl;
std::cout << "ps: " << pstart << std::endl;
std::cout << "nbigs: " << nbigs << std::endl;
std::cout << "recv_size: " << recv_size << std::endl;
}
// Make recv sizes
std::vector<int> recv_lengths(rq);
std::fill(recv_lengths.begin(),recv_lengths.end(),recv_size);
if(nbigs >0){
for(int i=0;i<nbigs;i++){
recv_lengths[(pstart + i) % rq] += 1;
}
}
// Make recv disps
std::vector<int> recv_disps = exscan(recv_lengths);
// All2all to get send sizes
std::vector<int> send_lengths(rq);
MPI_Alltoall(&recv_lengths[0], 1, MPI_INT, &send_lengths[0], 1, MPI_INT,comm);
// Scan to get send_disps
std::vector<int> send_disps = exscan(send_lengths);
// Do all2allv to get data on correct processor
std::vector<El::Complex<double>> recv_data(nlocal);
std::vector<El::Complex<double>> recv_data_ordered(nlocal);
//MPI_Alltoallv(el_vec.Buffer(),&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
El::mpi::AllToAll(Y.LockedBuffer(), &send_lengths[0], &send_disps[0], &recv_data[0],&recv_lengths[0],&recv_disps[0],comm);
if(print){
//std::cout << "Send data: " <<std::endl << *el_vec.Buffer() <<std::endl;
std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
}
// Reorder the data so taht it is in the right order for the fmm tree
for(int p=0;p<rq;p++){
int base_idx = (p - pstart + rq) % rq;
int offset = recv_disps[p];
for(int i=0;i<recv_lengths[p];i++){
recv_data_ordered[base_idx + rq*i] = recv_data[offset + i];
}
}
// loop through and put the data into the vector
#pragma omp parallel for
for(int i=0;i<nlocal; i++){
vec[2*i] = El::RealPart(recv_data_ordered[i]);
vec[2*i+1] = El::ImagPart(recv_data_ordered[i]);
}
//.........这里部分代码省略.........
示例9: Width
int Width(const El::DistMatrix<T, U, V>& A) {
return A.Width();
}
示例10: build_compatible
static type build_compatible(int m, int n, const El::DistMatrix<F, U, V>& A) {
return type(m, n, A.Grid(), A.Root());
}
示例11: owner
index_type owner(const El::DistMatrix<value_type, El::VR, El::STAR> &A,
const index_type row_idx, const index_type col_idx) {
return (row_idx / A.Grid().Width()) % A.Grid().Height() +
(row_idx % A.Grid().Width()) * A.Grid().Height();
}
示例12: vec2elemental
int vec2elemental(const std::vector<double> &vec, El::DistMatrix<El::Complex<double>,El::VC,El::STAR> &Y){
int data_dof=2;
int SCAL_EXP = 1;
int nlocal,gsize; //local elements, start p_id, global size
double *pt_array; // will hold local array
int r,q,rq; //Grid sizes
int nbigs; //Number of large sends (i.e. send 1 extra data point)
int pstart; // p_id of nstart
int rank = El::mpi::WorldRank(); //p_id
int send_size; // base send size
bool print = rank == -1;
// Get Grid and associated params
const El::Grid* g = &(Y.Grid());
r = g->Height();
q = g->Width();
MPI_Comm comm = (g->Comm()).comm;
// Get sizes, array in petsc
nlocal = vec.size()/data_dof;
int nstart = 0;
MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);
//VecGetOwnershipRange(pt_vec,&nstart,NULL);
//Find processor that nstart belongs to, number of larger sends
rq = r * q;
pstart = nstart % rq; //int div
nbigs = nlocal % rq;
send_size = nlocal/rq;
if(print){
std::cout << "r: " << r << " q: " << q <<std::endl;
std::cout << "nstart: " << nstart << std::endl;
std::cout << "ps: " << pstart << std::endl;
std::cout << "nbigs: " << nbigs << std::endl;
std::cout << "send_size: " << send_size << std::endl;
}
// Make send_lengths
std::vector<int> send_lengths(rq);
std::fill(send_lengths.begin(),send_lengths.end(),send_size);
if(nbigs >0){
for(int j=0;j<nbigs;j++){
send_lengths[(pstart + j) % rq] += 1;
}
}
// Make send_disps
std::vector<int> send_disps = exscan(send_lengths);
std::vector<El::Complex<double>> indata(nlocal);
// copy the data from an ffm tree to into a local vec of complex data for sending #pragma omp parallel for
El::Complex<double> val;
for(int i=0;i<nlocal;i++){
El::SetRealPart(val,vec[2*i+0]);
El::SetImagPart(val,vec[2*i+1]);
indata[i] = val;
}
// Make send_dataA, i.e. reorder the data
std::vector<El::Complex<double>> send_data(nlocal);
for(int proc=0;proc<rq;proc++){
int offset = send_disps[proc];
int base_idx = (proc - pstart + rq) % rq;
for(int j=0; j<send_lengths[proc]; j++){
int idx = base_idx + (j * rq);
send_data[offset + j] = indata[idx];
}
}
// Do all2all to get recv_lengths
std::vector<int> recv_lengths(rq);
MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);
// Scan to get recv_disps
std::vector<int> recv_disps = exscan(recv_lengths);
// Do all2allv to get data on correct processor
El::Complex<double> * recv_data = Y.Buffer();
//MPI_Alltoallv(&send_data[0],&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
// &recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
El::mpi::AllToAll(&send_data[0], &send_lengths[0], &send_disps[0], recv_data,&recv_lengths[0],&recv_disps[0],comm);
if(print){
std::cout << "Send data: " <<std::endl << send_data <<std::endl;
std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
}
return 0;
}
示例13: tree2elemental
int tree2elemental(InvMedTree<FMM_Mat_t> *tree, El::DistMatrix<T,El::VC,El::STAR> &Y){
int data_dof=2;
int SCAL_EXP = 1;
int nlocal,gsize; //local elements, start p_id, global size
double *pt_array; // will hold local array
int r,q,rq; //Grid sizes
int nbigs; //Number of large sends (i.e. send 1 extra data point)
int pstart; // p_id of nstart
int rank = El::mpi::WorldRank(); //p_id
int send_size; // base send size
bool print = rank == -1;
// Get Grid and associated params
const El::Grid* g = &(Y.Grid());
r = g->Height();
q = g->Width();
MPI_Comm comm = (g->Comm()).comm;
std::vector<FMMNode_t*> nlist = tree->GetNGLNodes();
int cheb_deg = InvMedTree<FMM_Mat_t>::cheb_deg;
int omp_p=omp_get_max_threads();
size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6;
// Get sizes, array in petsc
//VecGetSize(pt_vec,&gsize);
gsize = tree->M/data_dof;
nlocal = tree->m/data_dof;
//VecGetLocalSize(pt_vec,&nlocal);
//VecGetArray(pt_vec,&pt_array);
int nstart = 0;
MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);
//VecGetOwnershipRange(pt_vec,&nstart,NULL);
//Find processor that nstart belongs to, number of larger sends
rq = r * q;
pstart = nstart % rq; //int div
nbigs = nlocal % rq;
send_size = nlocal/rq;
if(print){
std::cout << "r: " << r << " q: " << q <<std::endl;
std::cout << "nstart: " << nstart << std::endl;
std::cout << "ps: " << pstart << std::endl;
std::cout << "nbigs: " << nbigs << std::endl;
std::cout << "send_size: " << send_size << std::endl;
}
// Make send_lengths
std::vector<int> send_lengths(rq);
std::fill(send_lengths.begin(),send_lengths.end(),send_size);
if(nbigs >0){
for(int j=0;j<nbigs;j++){
send_lengths[(pstart + j) % rq] += 1;
}
}
// Make send_disps
std::vector<int> send_disps = exscan(send_lengths);
std::vector<El::Complex<double>> indata(nlocal);
// copy the data from an ffm tree to into a local vec of complex data for sending #pragma omp parallel for
for(size_t tid=0;tid<omp_p;tid++){
size_t i_start=(nlist.size()* tid )/omp_p;
size_t i_end =(nlist.size()*(tid+1))/omp_p;
for(size_t i=i_start;i<i_end;i++){
pvfmm::Vector<double>& coeff_vec=nlist[i]->ChebData();
double s=std::pow(0.5,COORD_DIM*nlist[i]->Depth()*0.5*SCAL_EXP);
size_t offset=i*n_coeff3;
for(size_t j=0;j<n_coeff3;j++){
double real = coeff_vec[j]*s; // local indices as in the pvfmm trees
double imag = coeff_vec[j+n_coeff3]*s;
El::Complex<double> coeff;
El::SetRealPart(coeff,real);
El::SetImagPart(coeff,imag);
indata[offset+j] = coeff;
}
}
}
// Make send_data
std::vector<El::Complex<double>> send_data(nlocal);
for(int proc=0;proc<rq;proc++){
int offset = send_disps[proc];
int base_idx = (proc - pstart + rq) % rq;
for(int j=0; j<send_lengths[proc]; j++){
int idx = base_idx + (j * rq);
send_data[offset + j] = indata[idx];
}
}
// Do all2all to get recv_lengths
std::vector<int> recv_lengths(rq);
MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);
//.........这里部分代码省略.........
示例14: El2Petsc_vec
void El2Petsc_vec(El::DistMatrix<double,VC,STAR>& el_vec,Vec& pt_vec){
PetscInt nlocal, nstart; // petsc vec info
PetscScalar *pt_array,*pt_perm_array;
int r,q,ll,rq; // el vec info
int nbigs; //Number of large recv (i.e. recv 1 extra data point)
int pstart; // p_id of nstart
int p = El::mpi::WorldRank(); //p_id
int recv_size; // base recv size
bool print = p == -1;
// Get el vec info
ll = el_vec.Height();
const El::Grid* g = &(el_vec.Grid());
r = g->Height();
q = g->Width();
MPI_Comm comm = (g->Comm()).comm;
// Get petsc vec params
VecGetLocalSize(pt_vec,&nlocal);
VecGetArray(pt_vec,&pt_array);
VecGetOwnershipRange(pt_vec,&nstart,NULL);
// Determine who owns the first element we want
rq = r * q;
pstart = nstart % rq;
nbigs = nlocal % rq;
recv_size = nlocal / rq;
if(print){
std::cout << "r: " << r << " q: " << q <<std::endl;
std::cout << "nstart: " << nstart << std::endl;
std::cout << "ps: " << pstart << std::endl;
std::cout << "nbigs: " << nbigs << std::endl;
std::cout << "recv_size: " << recv_size << std::endl;
}
// Make recv sizes
std::vector<int> recv_lengths(rq);
std::fill(recv_lengths.begin(),recv_lengths.end(),recv_size);
if(nbigs >0){
for(int i=0;i<nbigs;i++){
recv_lengths[(pstart + i) % rq] += 1;
}
}
// Make recv disps
std::vector<int> recv_disps = exscan(recv_lengths);
// All2all to get send sizes
std::vector<int> send_lengths(rq);
MPI_Alltoall(&recv_lengths[0], 1, MPI_INT, &send_lengths[0], 1, MPI_INT,comm);
// Scan to get send_disps
std::vector<int> send_disps = exscan(send_lengths);
// Do all2allv to get data on correct processor
std::vector<double> recv_data(nlocal);
MPI_Alltoallv(el_vec.Buffer(),&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
if(print){
//std::cout << "Send data: " <<std::endl << *el_vec.Buffer() <<std::endl;
std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
}
// Reorder for petsc
for(int p=0;p<rq;p++){
int base_idx = (p - pstart + rq) % rq;
int offset = recv_disps[p];
for(int i=0;i<recv_lengths[p];i++){
pt_array[base_idx + rq*i] = recv_data[offset + i];
}
}
// Copy into array
VecRestoreArray(pt_vec,&pt_array);
}
示例15: Petsc2El_vec
void Petsc2El_vec(Vec& pt_vec,El::DistMatrix<double,VC,STAR>& el_vec){
PetscInt nlocal,nstart,gsize; //local elements, start p_id, global size
PetscScalar *pt_array; // will hold local array
int r,q,rq; //Grid sizes
int nbigs; //Number of large sends (i.e. send 1 extra data point)
int pstart; // p_id of nstart
int p = El::mpi::WorldRank(); //p_id
int send_size; // base send size
bool print = p == -1;
// Get Grid and associated params
const El::Grid* g = &(el_vec.Grid());
r = g->Height();
q = g->Width();
MPI_Comm comm = (g->Comm()).comm;
// Get sizes, array in petsc
VecGetSize(pt_vec,&gsize);
VecGetLocalSize(pt_vec,&nlocal);
VecGetArray(pt_vec,&pt_array);
VecGetOwnershipRange(pt_vec,&nstart,NULL);
//Find processor that nstart belongs to, number of larger sends
rq = r * q;
pstart = nstart % rq; //int div
nbigs = nlocal % rq;
send_size = nlocal/rq;
if(print){
std::cout << "r: " << r << " q: " << q <<std::endl;
std::cout << "nstart: " << nstart << std::endl;
std::cout << "ps: " << pstart << std::endl;
std::cout << "nbigs: " << nbigs << std::endl;
std::cout << "send_size: " << send_size << std::endl;
}
// Make send_lengths
std::vector<int> send_lengths(rq);
std::fill(send_lengths.begin(),send_lengths.end(),send_size);
if(nbigs >0){
for(int j=0;j<nbigs;j++){
send_lengths[(pstart + j) % rq] += 1;
}
}
// Make send_disps
std::vector<int> send_disps = exscan(send_lengths);
// Make send_data
std::vector<double> send_data(nlocal);
for(int proc=0;proc<rq;proc++){
int offset = send_disps[proc];
int base_idx = (proc - pstart + rq) % rq;
for(int j=0; j<send_lengths[proc]; j++){
int idx = base_idx + (j * rq);
send_data[offset + j] = pt_array[idx];
}
}
// Do all2all to get recv_lengths
std::vector<int> recv_lengths(rq);
MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);
// Scan to get recv_disps
std::vector<int> recv_disps = exscan(recv_lengths);
// Do all2allv to get data on correct processor
double * recv_data = el_vec.Buffer();
MPI_Alltoallv(&send_data[0],&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
if(print){
std::cout << "Send data: " <<std::endl << send_data <<std::endl;
std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
}
}