本文整理汇总了C++中MPI_Allgatherv函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Allgatherv函数的具体用法?C++ MPI_Allgatherv怎么用?C++ MPI_Allgatherv使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Allgatherv函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: default
void l2r_rank_fun::grad(double *w, double *g)
{
int i;
int l=prob->l;
double *lg = new double[l];
double *tmp_vector = new double[l];
double *gtmp_vector = new double[global_l];
#pragma omp parallel for default(shared) private(i)
for (i=0;i<l;i++)
{
tmp_vector[i] = ATAQb[i] - ATe[i];
}
MPI_Allgatherv((void*)tmp_vector, l, MPI_DOUBLE, (void*)gtmp_vector, local_l, start_ptr, MPI_DOUBLE, MPI_COMM_WORLD);
Qv(gtmp_vector, lg);
MPI_Allgatherv((void*)lg, l, MPI_DOUBLE, (void*)g, local_l, start_ptr, MPI_DOUBLE, MPI_COMM_WORLD);
#pragma omp parallel for default(shared) private(i)
for(i=0;i<global_l;i++)
{
g[i] = gz[i] + 2*C*g[i];
}
delete[] tmp_vector;
delete[] gtmp_vector;
delete[] lg;
}
示例2: sharp_make_mpi_info
static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
sharp_mpi_info *minfo)
{
minfo->comm = comm;
MPI_Comm_size (comm, &minfo->ntasks);
MPI_Comm_rank (comm, &minfo->mytask);
minfo->nm=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ainfo->nm),1,MPI_INT,minfo->nm,1,MPI_INT,comm);
minfo->ofs_m=RALLOC(int,minfo->ntasks+1);
minfo->ofs_m[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_m[i] = minfo->ofs_m[i-1]+minfo->nm[i-1];
minfo->nmtotal=minfo->ofs_m[minfo->ntasks];
minfo->mval=RALLOC(int,minfo->nmtotal);
MPI_Allgatherv(job->ainfo->mval, job->ainfo->nm, MPI_INT, minfo->mval,
minfo->nm, minfo->ofs_m, MPI_INT, comm);
minfo->mmax=sharp_get_mmax(minfo->mval,minfo->nmtotal);
minfo->npair=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ginfo->npairs), 1, MPI_INT, minfo->npair, 1,
MPI_INT, comm);
minfo->ofs_pair=RALLOC(int,minfo->ntasks+1);
minfo->ofs_pair[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_pair[i] = minfo->ofs_pair[i-1]+minfo->npair[i-1];
minfo->npairtotal=minfo->ofs_pair[minfo->ntasks];
double *theta_tmp=RALLOC(double,job->ginfo->npairs);
int *ispair_tmp=RALLOC(int,job->ginfo->npairs);
for (int i=0; i<job->ginfo->npairs; ++i)
{
theta_tmp[i]=job->ginfo->pair[i].r1.theta;
ispair_tmp[i]=job->ginfo->pair[i].r2.nph>0;
}
minfo->theta=RALLOC(double,minfo->npairtotal);
minfo->ispair=RALLOC(int,minfo->npairtotal);
MPI_Allgatherv(theta_tmp, job->ginfo->npairs, MPI_DOUBLE, minfo->theta,
minfo->npair, minfo->ofs_pair, MPI_DOUBLE, comm);
MPI_Allgatherv(ispair_tmp, job->ginfo->npairs, MPI_INT, minfo->ispair,
minfo->npair, minfo->ofs_pair, MPI_INT, comm);
DEALLOC(theta_tmp);
DEALLOC(ispair_tmp);
minfo->nph=2*job->nmaps*job->ntrans;
minfo->almcount=RALLOC(int,minfo->ntasks);
minfo->almdisp=RALLOC(int,minfo->ntasks+1);
minfo->mapcount=RALLOC(int,minfo->ntasks);
minfo->mapdisp=RALLOC(int,minfo->ntasks+1);
minfo->almdisp[0]=minfo->mapdisp[0]=0;
for (int i=0; i<minfo->ntasks; ++i)
{
minfo->almcount[i] = 2*minfo->nph*minfo->nm[minfo->mytask]*minfo->npair[i];
minfo->almdisp[i+1] = minfo->almdisp[i]+minfo->almcount[i];
minfo->mapcount[i] = 2*minfo->nph*minfo->nm[i]*minfo->npair[minfo->mytask];
minfo->mapdisp[i+1] = minfo->mapdisp[i]+minfo->mapcount[i];
}
}
示例3: invoke
void invoke() {
if (!has_contiguous_data(lhs)) TRIQS_RUNTIME_ERROR << "mpi gather of array into a non contiguous view";
auto c = laz.c;
auto recvcounts = std::vector<int>(c.size());
auto displs = std::vector<int>(c.size() + 1, 0);
int sendcount = laz.ref.domain().number_of_elements();
auto D = mpi::mpi_datatype<typename A::value_type>();
auto d = laz.domain();
if (laz.all || (laz.c.rank() == laz.root)) resize_or_check_if_view(lhs, d.lengths());
void *lhs_p = lhs.data_start();
const void *rhs_p = laz.ref.data_start();
auto mpi_ty = mpi::mpi_datatype<int>();
if (!laz.all)
MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, laz.root, c.get());
else
MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get());
for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r];
if (!laz.all)
MPI_Gatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, laz.root, c.get());
else
MPI_Allgatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, c.get());
}
示例4: MPI_Comm_size
void PETScVector::gatherLocalVectors( PetscScalar local_array[],
PetscScalar global_array[])
{
// Collect vectors from processors.
int size_rank;
MPI_Comm_size(PETSC_COMM_WORLD, &size_rank);
// number of elements to be sent for each rank
std::vector<PetscInt> i_cnt(size_rank);
MPI_Allgather(&_size_loc, 1, MPI_INT, &i_cnt[0], 1, MPI_INT, PETSC_COMM_WORLD);
// collect local array
PetscInt offset = 0;
// offset in the receive vector of the data from each rank
std::vector<PetscInt> i_disp(size_rank);
for(PetscInt i=0; i<size_rank; i++)
{
i_disp[i] = offset;
offset += i_cnt[i];
}
MPI_Allgatherv(local_array, _size_loc, MPI_DOUBLE,
global_array, &i_cnt[0], &i_disp[0], MPI_DOUBLE, PETSC_COMM_WORLD);
}
示例5: all_gather
static void all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
{
std::vector<int> counts(comm.size());
Collectives<int,void*>::all_gather(comm, (int) in.size(), counts);
std::vector<int> offsets(comm.size(), 0);
for (unsigned i = 1; i < offsets.size(); ++i)
offsets[i] = offsets[i-1] + counts[i-1];
std::vector<T> buffer(offsets.back() + counts.back());
MPI_Allgatherv(Datatype::address(const_cast<T&>(in[0])),
in.size(),
Datatype::datatype(),
Datatype::address(buffer[0]),
&counts[0],
&offsets[0],
Datatype::datatype(),
comm);
out.resize(comm.size());
size_t cur = 0;
for (int i = 0; i < comm.size(); ++i)
{
out[i].reserve(counts[i]);
for (int j = 0; j < counts[i]; ++j)
out[i].push_back(buffer[cur++]);
}
}
示例6: idft
int idft(float complex *dst, float complex* src, float complex* w1, float complex* w2, float complex* tmp, int N, int M, int start, int end, int* cnt, int* disp, MPI_Datatype mpi_complexf, int rank) {
int k, l, m, n;
for (n = start; n<end; n++) {
int nMl = n*M;
for (l=0; l<M; l++) {
int lm = 0;
int nMm = n*M;
tmp[nMl] = 0.0;
for (m = 0; m<M; m++) {
tmp[nMl] += src[nMm]/w1[lm]/M;
nMm ++;
lm += l;
}
nMl ++;
}
}
MPI_Allgatherv(tmp + disp[rank], cnt[rank], mpi_complexf, tmp, cnt, disp, mpi_complexf, MPI_COMM_WORLD);
for (k=start; k<end; k++) {
int nMl = 0;
for (n = 0; n<N; n++) {
int kMl = k*M;
for (l=0; l<M; l++) {
if (n == 0)
dst[kMl] = 0.0;
dst[kMl] += tmp[n*M+l]/w2[n*k]/N;
kMl ++;
}
}
}
return 0;
}
示例7: ISGatherTotal_Private
static PetscErrorCode ISGatherTotal_Private(IS is)
{
PetscErrorCode ierr;
PetscInt i,n,N;
const PetscInt *lindices;
MPI_Comm comm;
PetscMPIInt rank,size,*sizes = NULL,*offsets = NULL,nn;
PetscFunctionBegin;
PetscValidHeaderSpecific(is,IS_CLASSID,1);
ierr = PetscObjectGetComm((PetscObject)is,&comm);CHKERRQ(ierr);
ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr);
ierr = PetscMalloc2(size,PetscMPIInt,&sizes,size,PetscMPIInt,&offsets);CHKERRQ(ierr);
ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
ierr = MPI_Allgather(&nn,1,MPI_INT,sizes,1,MPI_INT,comm);CHKERRQ(ierr);
offsets[0] = 0;
for (i=1; i<size; ++i) offsets[i] = offsets[i-1] + sizes[i-1];
N = offsets[size-1] + sizes[size-1];
ierr = PetscMalloc(N*sizeof(PetscInt),&(is->total));CHKERRQ(ierr);
ierr = ISGetIndices(is,&lindices);CHKERRQ(ierr);
ierr = MPI_Allgatherv((void*)lindices,nn,MPIU_INT,is->total,sizes,offsets,MPIU_INT,comm);CHKERRQ(ierr);
ierr = ISRestoreIndices(is,&lindices);CHKERRQ(ierr);
is->local_offset = offsets[rank];
ierr = PetscFree2(sizes,offsets);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
示例8: mpla_copy_distributed_vector_to_cpu
void mpla_copy_distributed_vector_to_cpu(double* x_cpu, struct mpla_vector* x, struct mpla_instance* instance)
{
// create sub-communicator for each process column
int remain_dims[2];
remain_dims[0]=1;
remain_dims[1]=0;
MPI_Comm column_comm;
MPI_Cart_sub(instance->comm, remain_dims, &column_comm);
int column_rank;
MPI_Comm_rank(column_comm, &column_rank);
// columnwise creation of the full vector
double* full_vector = x_cpu;
int* recvcounts = new int[instance->proc_rows];
int* displs = new int[instance->proc_rows];
for (int i=0; i<instance->proc_rows; i++)
{
recvcounts[i] = x->proc_row_count[i][instance->cur_proc_col];
displs[i] = x->proc_row_offset[i][instance->cur_proc_col];
}
// cudaMalloc((void**)&full_vector, sizeof(double)*x->vec_row_count);
// cudaThreadSynchronize();
// checkCUDAError("cudaMalloc");
MPI_Allgatherv(x->data, x->cur_proc_row_count, MPI_DOUBLE, full_vector, recvcounts, displs, MPI_DOUBLE, column_comm);
// memory cleanup
MPI_Comm_free(&column_comm);
MPI_Barrier(instance->comm);
}
示例9: ompi_allgatherv_f
void ompi_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs,
MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)
{
MPI_Comm c_comm;
MPI_Datatype c_sendtype, c_recvtype;
int size, ierr_c;
OMPI_ARRAY_NAME_DECL(recvcounts);
OMPI_ARRAY_NAME_DECL(displs);
c_comm = MPI_Comm_f2c(*comm);
c_sendtype = MPI_Type_f2c(*sendtype);
c_recvtype = MPI_Type_f2c(*recvtype);
MPI_Comm_size(c_comm, &size);
OMPI_ARRAY_FINT_2_INT(recvcounts, size);
OMPI_ARRAY_FINT_2_INT(displs, size);
sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);
ierr_c = MPI_Allgatherv(sendbuf,
OMPI_FINT_2_INT(*sendcount),
c_sendtype,
recvbuf,
OMPI_ARRAY_NAME_CONVERT(recvcounts),
OMPI_ARRAY_NAME_CONVERT(displs),
c_recvtype, c_comm);
if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c);
OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts);
OMPI_ARRAY_FINT_2_INT_CLEANUP(displs);
}
示例10: PS_get_diag
void PS_get_diag(ParaSparse *A, double *d)
{
int n;
int *nas, *nds;
double *diag_loc;
nas = (int *) malloc(A->size * sizeof(int));
nds = (int *) malloc(A->size * sizeof(int));
diag_loc = (double *) malloc(A->nd * sizeof(double));
for(n = 0; n < A->nd; n++)
diag_loc[n] = 0; //TODO: memset?
for(n = 0; n < A->Ne; n++)
if(A->i[n] == A->j[n])
diag_loc[A->i[n] - A->na] += A->Mij[n];
MPI_Allgather(&(A->na), 1, MPI_INT, nas, 1, MPI_INT, A->comm);
MPI_Allgather(&(A->nd), 1, MPI_INT, nds, 1, MPI_INT, A->comm);
MPI_Allgatherv(diag_loc, A->nd, MPI_DOUBLE,
d, nds, nas, MPI_DOUBLE, A->comm);
free(nas);
free(nds);
free(diag_loc);
}
示例11: PMR_comm_eigvals
/*
* Routine to communicate eigenvalues such that every process has
* all computed eigenvalues (iu-il+1) in W; this routine is designed
* to be called right after 'pmrrr'.
*/
int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, double *W)
{
MPI_Comm comm_dup;
MPI_Comm_dup(comm, &comm_dup);
int nproc;
MPI_Comm_size(comm_dup, &nproc);
int *rcount = (int*)malloc(nproc*sizeof(int)); assert(rcount!=NULL);
int *rdispl = (int*)malloc(nproc*sizeof(int)); assert(rdispl!=NULL);
double *work = (double*)malloc((*nz+1)*sizeof(double)); assert(work!=NULL);
if (*nz > 0)
memcpy(work, W, (*nz)*sizeof(double) );
MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup);
MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup);
MPI_Allgatherv
(work, *nz, MPI_DOUBLE, W, rcount, rdispl, MPI_DOUBLE, comm_dup);
MPI_Comm_free(&comm_dup);
free(rcount);
free(rdispl);
free(work);
return 0;
}
示例12: spmm_csr_info_data_sep_BCBCG
void spmm_csr_info_data_sep_BCBCG(csrType_local csr_mat, denseType dense_mat_info, double * dataSrc, int dataDisp
, denseType *res_mat, int myid, int numprocs) {
int ierr;
int idx;
// gather all data from all processes
int recv_count[numprocs];
int displs[numprocs];
int local_num_row_normal = dense_mat_info.global_num_row / numprocs;
int local_num_col_normal = dense_mat_info.local_num_col;
int normal_num_elements = local_num_row_normal * local_num_col_normal;
// recvBuf
double * recvBuf = (double*)calloc( dense_mat_info.global_num_col * dense_mat_info.global_num_row, sizeof(double));
// values allocated by calloc() is initialized to zero
double *res_buffer = (double *) calloc(res_mat->local_num_col * res_mat->local_num_row, sizeof (double));
for (idx = 0; idx < numprocs; idx++) {
recv_count[idx] = normal_num_elements;
displs[idx] = idx * normal_num_elements;
if (idx == (numprocs - 1)) {
recv_count[idx] = (dense_mat_info.global_num_row - local_num_row_normal * (numprocs - 1))
* local_num_col_normal;
}
}
ierr = MPI_Allgatherv((void *) (dataSrc+dataDisp), dense_mat_info.local_num_col * dense_mat_info.local_num_row, MPI_DOUBLE
, recvBuf, recv_count, displs
, MPI_DOUBLE, MPI_COMM_WORLD);
// spmv using csr format
int idx_row;
for (idx_row = 0; idx_row < csr_mat.num_rows; idx_row++) {
int row_start_idx = csr_mat.row_start[idx_row];
int row_end_idx = csr_mat.row_start[idx_row + 1];
int idx_data;
for (idx_data = row_start_idx; idx_data < row_end_idx; idx_data++) {
int col_idx = csr_mat.col_idx[idx_data];
double csr_data = csr_mat.csrdata[idx_data];
int block_size = dense_mat_info.global_num_col;
int block_idx;
for (block_idx = 0; block_idx < block_size; block_idx++) {
res_buffer[idx_row * res_mat->local_num_col + block_idx] +=
csr_data * recvBuf[col_idx * dense_mat_info.global_num_col + block_idx];
}
}
}
// Data zone changes
if (res_mat->data != 0) {
free(res_mat->data);
} else {
exit(0);
}
res_mat->data = res_buffer;
}
示例13: spmm_csr_v2
void spmm_csr_v2(csrType_local csr_mat, denseType dense_mat, denseType *res_mat, int myid, int numprocs) {
int ierr;
int idx;
// gather all data from all processes
int recv_count[numprocs];
int displs[numprocs];
int local_num_row_normal = dense_mat.global_num_row / numprocs;
int local_num_col_normal = dense_mat.global_num_col;
int normal_num_elements = local_num_row_normal * local_num_col_normal;
double *recv_buffer = (double*)calloc(dense_mat.global_num_col * dense_mat.global_num_row, sizeof(double));
// values allocated by calloc() is initialized to zero
double *res_buffer = (double *) calloc(res_mat->local_num_col * res_mat->local_num_row, sizeof (double));
for (idx = 0; idx < numprocs; idx++) {
recv_count[idx] = normal_num_elements;
displs[idx] = idx * normal_num_elements;
if (idx == (numprocs - 1)) {
recv_count[idx] = (dense_mat.global_num_row - local_num_row_normal * (numprocs - 1))
* local_num_col_normal;
}
}
// ierr = MPI_Barrier(MPI_COMM_WORLD);
ierr = MPI_Allgatherv((void *) dense_mat.data, dense_mat.local_num_col * dense_mat.local_num_row, MPI_DOUBLE
, recv_buffer, recv_count, displs
, MPI_DOUBLE, MPI_COMM_WORLD);
// spmm using csr format
int idx_row;
#ifdef SPMM_CAL_DEBUG_2
printf("in BLAS3.c, myid=%d,number of row: %d\n", myid, csr_mat.num_rows);
#endif
for (idx_row = 0; idx_row < csr_mat.num_rows; idx_row++) {
int row_start_idx = csr_mat.row_start[idx_row];
int row_end_idx = csr_mat.row_start[idx_row + 1];
int idx_data;
for (idx_data = row_start_idx; idx_data < row_end_idx; idx_data++) {
int col_idx = csr_mat.col_idx[idx_data];
double csr_data = csr_mat.csrdata[idx_data];
int block_size = dense_mat.local_num_col;
int block_idx;
for (block_idx = 0; block_idx < block_size; block_idx++) {
res_buffer[idx_row * res_mat->local_num_col + block_idx] +=
csr_data * recv_buffer[col_idx * dense_mat.local_num_col + block_idx];
}
}
}
if (res_mat->data != 0) {
free(res_mat->data);
} else {
exit(0);
}
res_mat->data = res_buffer;
free(recv_buffer);
}
示例14: hypre_thread_MPI_Allgatherv
int
hypre_thread_MPI_Allgatherv( void *sendbuf,
int sendcount,
MPI_Datatype sendtype,
void *recvbuf,
int *recvcounts,
int *displs,
MPI_Datatype recvtype,
MPI_Comm comm )
{
int i,returnval;
int unthreaded = pthread_equal(initial_thread,pthread_self());
int I_call_mpi = unthreaded || pthread_equal(hypre_thread[0],pthread_self());
hypre_barrier(&mpi_mtx, unthreaded);
if (I_call_mpi)
{
returnval=MPI_Allgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcounts,
displs,recvtype,comm);
}
else
{
returnval=0;
}
hypre_barrier(&mpi_mtx, unthreaded);
return returnval;
}
示例15: time_allgatherv
double time_allgatherv(struct collParams* p)
{
int i, size2;
int disp = 0;
for ( i = 0; i < p->nranks; i++) {
int size2 = i % (p->size+1);
recvcounts[i] = size2;
rdispls[i] = disp;
disp += size2;
}
MPI_Barrier(MPI_COMM_WORLD);
size2 = p->myrank % (p->size+1);
__TIME_START__;
for (i = 0; i < p->iter; i++) {
MPI_Allgatherv(sbuffer, size2, p->type, rbuffer, recvcounts, rdispls, p->type, p->comm);
__BAR__(p->comm);
}
__TIME_END__;
if (check_buffers) {
check_sbuffer(p->myrank);
for (i = 0; i < p->nranks; i++) {
check_rbuffer(rbuffer, rdispls[i], i, 0, recvcounts[i]);
}
}
return __TIME_USECS__ / (double)p->iter;
}