本文整理汇总了C++中MPI_Allgather函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Allgather函数的具体用法?C++ MPI_Allgather怎么用?C++ MPI_Allgather使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Allgather函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: run_rma_test
void run_rma_test(int nprocs_per_node)
{
int myrank, nprocs;
int mem_rank;
MPI_Win win;
int *baseptr;
MPI_Aint local_size;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
if (nprocs < nprocs_per_node * 2)
{
if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2);
MPI_Finalize();
exit(EXIT_FAILURE);
}
mem_rank = nprocs_per_node + nprocs_per_node / 2;
local_size = (myrank == mem_rank) ? COUNT : 0;
MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);
MPI_Win_lock_all(0, win);
int type_size;
MPI_Type_size(MPI_INT, &type_size);
size_t nbytes = COUNT * type_size;
assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS);
assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS);
MPI_Aint ldisp;
MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint));
assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS);
assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS);
if (myrank == 0)
{
for (size_t idx = 0; idx < COUNT; ++idx) {
baseptr[idx] = idx * COUNT + 1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (myrank == mem_rank) {
assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS);
assert(MPI_Win_flush(0, win) == MPI_SUCCESS);
for (size_t idx = 0; idx < COUNT; ++idx) {
assert(baseptr[idx] == idx * 10 + 1);
}
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Win_free(&win);
MPI_Free_mem(baseptr);
printf("Test finished\n");
}
示例2: declareBindings
//.........这里部分代码省略.........
MPI_Type_size (datatype, &size);
MPI_Aint displacement;
MPI_Type_lb (datatype, &displacement); // L105
MPI_Type_ub (datatype, &displacement);
MPI_Type_commit (&datatype);
MPI_Type_free (&datatype);
MPI_Get_elements (&status, datatype, &count);
void* inbuf;
void* outbuf;
int outsize;
int position;
MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114
int insize;
MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype,
comm); // L116--117
MPI_Pack_size (incount, datatype, comm, &size);
/* === Collectives === */
MPI_Barrier (comm); // L121
int root;
MPI_Bcast (buffer, count, datatype, root, comm); // L123
MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
root, comm); // L124--125
int* recvcounts;
int* displs;
MPI_Gatherv (sendbuf, sendcount, sendtype,
recvbuf, recvcounts, displs, recvtype,
root, comm); // L128--130
MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
root, comm); // L131--132
int* sendcounts;
MPI_Scatterv (sendbuf, sendcounts, displs, sendtype,
recvbuf, recvcount, recvtype, root, comm); // L134--135
MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
comm); // L136--137
MPI_Allgatherv (sendbuf, sendcount, sendtype,
recvbuf, recvcounts, displs, recvtype,
comm); // L138--140
MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
comm); // L141--142
int* sdispls;
int* rdispls;
MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype,
recvbuf, recvcounts, rdispls, recvtype,
comm); // L145--147
MPI_Op op;
MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149
#if 0
MPI_User_function function;
int commute;
MPI_Op_create (function, commute, &op); // L153
#endif
MPI_Op_free (&op); // L155
MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm);
MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm);
MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm);
/* === Groups, contexts, and communicators === */
MPI_Group group;
MPI_Group_size (group, &size); // L162
int rank;
MPI_Group_rank (group, &rank); // L164
MPI_Group group1;
int n;
int* ranks1;
MPI_Group group2;
示例3: ADIOI_LUSTRE_WriteStridedColl
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype,
int file_ptr_type, ADIO_Offset offset,
ADIO_Status *status, int *error_code)
{
/* Uses a generalized version of the extended two-phase method described
* in "An Extended Two-Phase Method for Accessing Sections of
* Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary,
* Scientific Programming, (5)4:301--317, Winter 1996.
* http://www.mcs.anl.gov/home/thakur/ext2ph.ps
*/
ADIOI_Access *my_req;
/* array of nprocs access structures, one for each other process has
this process's request */
ADIOI_Access *others_req;
/* array of nprocs access structures, one for each other process
whose request is written by this process. */
int i, filetype_is_contig, nprocs, myrank, do_collect = 0;
int contig_access_count = 0, buftype_is_contig, interleave_count = 0;
int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
ADIO_Offset orig_fp, start_offset, end_offset, off;
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL;
ADIO_Offset *len_list = NULL;
int **buf_idx = NULL, *striping_info = NULL;
int old_error, tmp_error;
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
orig_fp = fd->fp_ind;
/* IO patten identification if cb_write isn't disabled */
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
/* For this process's request, calculate the list of offsets and
lengths in the file and determine the start and end offsets. */
/* Note: end_offset points to the last byte-offset that will be accessed.
* e.g., if start_offset=0 and 100 bytes to be read, end_offset=99
*/
ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
&offset_list, &len_list, &start_offset,
&end_offset, &contig_access_count);
/* each process communicates its start and end offsets to other
* processes. The result is an array each of start and end offsets
* stored in order of process rank.
*/
st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1,
ADIO_OFFSET, fd->comm);
MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1,
ADIO_OFFSET, fd->comm);
/* are the accesses of different processes interleaved? */
for (i = 1; i < nprocs; i++)
if ((st_offsets[i] < end_offsets[i-1]) &&
(st_offsets[i] <= end_offsets[i]))
interleave_count++;
/* This is a rudimentary check for interleaving, but should suffice
for the moment. */
/* Two typical access patterns can benefit from collective write.
* 1) the processes are interleaved, and
* 2) the req size is small.
*/
if (interleave_count > 0) {
do_collect = 1;
} else {
do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count,
len_list, nprocs);
}
}
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
/* Decide if collective I/O should be done */
if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) ||
fd->hints->cb_write == ADIOI_HINT_DISABLE) {
/* use independent accesses */
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
ADIOI_Free(offset_list);
ADIOI_Free(len_list);
ADIOI_Free(st_offsets);
ADIOI_Free(end_offsets);
}
fd->fp_ind = orig_fp;
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
if (buftype_is_contig && filetype_is_contig) {
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
ADIO_WriteContig(fd, buf, count, datatype,
ADIO_EXPLICIT_OFFSET,
off, status, error_code);
} else
ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
//.........这里部分代码省略.........
示例4: all_gather
void all_gather(const T& elem, std::vector<T>& results) {
#ifdef HAS_MPI
// Get the mpi rank and size
size_t mpi_size(size());
if(results.size() != mpi_size) results.resize(mpi_size);
// Serialize the local map
graphlab::charstream cstrm(128);
graphlab::oarchive oarc(cstrm);
oarc << elem;
cstrm.flush();
char* send_buffer = cstrm->c_str();
int send_buffer_size = (int)cstrm->size();
assert(send_buffer_size >= 0);
// compute the sizes
std::vector<int> recv_sizes(mpi_size, -1);
// Compute the sizes
int error = MPI_Allgather(&send_buffer_size, // Send buffer
1, // send count
MPI_INT, // send type
&(recv_sizes[0]), // recvbuffer
1, // recvcount
MPI_INT, // recvtype
MPI_COMM_WORLD);
assert(error == MPI_SUCCESS);
for(size_t i = 0; i < recv_sizes.size(); ++i)
assert(recv_sizes[i] >= 0);
// Construct offsets
std::vector<int> recv_offsets(recv_sizes);
int sum = 0, tmp = 0;
for(size_t i = 0; i < recv_offsets.size(); ++i) {
tmp = recv_offsets[i];
recv_offsets[i] = sum;
sum += tmp;
}
// if necessary realloac recv_buffer
std::vector<char> recv_buffer(sum);
// recv all the maps
error = MPI_Allgatherv(send_buffer, // send buffer
send_buffer_size, // how much to send
MPI_BYTE, // send type
&(recv_buffer[0]), // recv buffer
&(recv_sizes[0]), // amount to recv
// for each cpuess
&(recv_offsets[0]), // where to place data
MPI_BYTE,
MPI_COMM_WORLD);
assert(error == MPI_SUCCESS);
// Update the local map
namespace bio = boost::iostreams;
typedef bio::stream<bio::array_source> icharstream;
icharstream strm(&(recv_buffer[0]), recv_buffer.size());
graphlab::iarchive iarc(strm);
for(size_t i = 0; i < results.size(); ++i) {
iarc >> results[i];
}
#else
logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
} // end of mpi all gather
示例5: uctSort
int uctSort(NimGameState rootState, int maximumIterations, bool useRanks)
{
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
// Get the number of processes
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MctsNode root(0, NULL, &rootState);
for(int i=0; i<maximumIterations; i++)
{
MctsNode* node = &root;
NimGameState state = rootState.clone();
// std::cout<< "Copy Repr " << node->representation() << std::endl;
while(node->actionsNotTaken.empty() && !node->childNodes.empty())
{
// std::cout<< "Entering Selection Step" << std::endl;
node = node->selectNextChildNode();
state.performAction(node->previousAction);
}
if(!node->actionsNotTaken.empty())
{
// std::cout<< "Entering Expansion Step" << std::endl;
std::random_shuffle(node->actionsNotTaken.begin(), node->actionsNotTaken.end());
int action = node->actionsNotTaken.back();
// std::cout<< "Action: " << action << std::endl;
state.performAction(action);
// std::cout<< state.representation() << std::endl;
node = node->addChildNode(action, &state);
// std::cout<< node->representation() << std::endl;
}
while(!state.getAvailableActions().empty())
{
// std::cout<< "Entering Simulation Step" << std::endl;
std::random_shuffle(node->actionsNotTaken.begin(), node->actionsNotTaken.end());
int action = node->actionsNotTaken.back();
state.performAction(action);
}
while(node->parentNode != NULL)
{
// std::cout<< "Entering Backpropagation Step" << std::endl;
node->update(state.getValue(node->lastActivePlayer));
node = node->parentNode;
}
}
if(useRanks && world_size > 1)
{
// Root synchronization
std::string serialized = MctsNodeSerializer::Serialize(root);
serialized += "#";
if (world_rank == 0)
{
//std::cout << "Sending:" << serialized.length() << "/" << DEFAULT_MESSAGE_SIZE << std::endl;
//std::cout << "Rank 0, root looks before merge: " << serialized << std::endl;
}
if (serialized.length() > DEFAULT_MESSAGE_SIZE)
{
std::cout << "Error:" << serialized.length() << "/" << DEFAULT_MESSAGE_SIZE << std::endl;
std::cout << "Seralized tree is too big!" << std::endl;
return -1;
}
if (serialized.length() < DEFAULT_MESSAGE_SIZE)
{
serialized.resize(DEFAULT_MESSAGE_SIZE, '@');
}
char *rcv_buffer = new char[DEFAULT_MESSAGE_SIZE * world_size];
MPI_Allgather((void*)serialized.c_str(), DEFAULT_MESSAGE_SIZE, MPI::CHAR,
rcv_buffer, DEFAULT_MESSAGE_SIZE, MPI::CHAR,
MPI_COMM_WORLD);
std::string received(rcv_buffer, (unsigned long) (DEFAULT_MESSAGE_SIZE * world_size));
delete rcv_buffer;
//long ct = std::count(received.begin(), received.end(), '@');
received.erase(std::remove(received.begin(), received.end(), '@'), received.end());
std::stringstream receivedDataStream(received);
//std::cout << "All data: " << received << std::endl;
std::string serializedTree;
while (std::getline(receivedDataStream, serializedTree, '#'))
{
serializedTree = serializedTree.substr(0, serializedTree.length() - 1);
if (world_rank == 0)
{
//std::cout << "Rank 0, deserialized new tree before unboxing: " << serializedTree << std::endl;
//std::cout << serialized.length() << std::endl;
//.........这里部分代码省略.........
示例6: data
//.........这里部分代码省略.........
/*---------------------------------------------------*/
/* allocate spaces */
ierr = PetscMalloc1(is_max,&n);
CHKERRQ(ierr);
len = 0;
for (i=0; i<is_max; i++) {
ierr = ISGetLocalSize(is[i],&n[i]);
CHKERRQ(ierr);
len += n[i];
}
if (!len) {
is_max = 0;
} else {
len += 1 + is_max; /* max length of data1 for one processor */
}
ierr = PetscMalloc1(size*len+1,&data1);
CHKERRQ(ierr);
ierr = PetscMalloc1(size,&data1_start);
CHKERRQ(ierr);
for (i=0; i<size; i++) data1_start[i] = data1 + i*len;
ierr = PetscMalloc4(size,&len_s,size,&btable,size,&iwork,size+1,&Bowners);
CHKERRQ(ierr);
/* gather c->garray from all processors */
ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local);
CHKERRQ(ierr);
ierr = ISAllGather(garray_local, &garray_gl);
CHKERRQ(ierr);
ierr = ISDestroy(&garray_local);
CHKERRQ(ierr);
ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);
CHKERRQ(ierr);
Bowners[0] = 0;
for (i=0; i<size; i++) Bowners[i+1] += Bowners[i];
if (is_max) {
/* hash table ctable which maps c->row to proc_id) */
ierr = PetscMalloc1(Mbs,&ctable);
CHKERRQ(ierr);
for (proc_id=0,j=0; proc_id<size; proc_id++) {
for (; j<C->rmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id;
}
/* hash tables marking c->garray */
ierr = ISGetIndices(garray_gl,&idx_i);
CHKERRQ(ierr);
for (i=0; i<size; i++) {
table_i = table[i];
ierr = PetscBTMemzero(Mbs,table_i);
CHKERRQ(ierr);
for (j = Bowners[i]; j<Bowners[i+1]; j++) { /* go through B cols of proc[i]*/
ierr = PetscBTSet(table_i,idx_i[j]);
CHKERRQ(ierr);
}
}
ierr = ISRestoreIndices(garray_gl,&idx_i);
CHKERRQ(ierr);
} /* if (is_max) */
ierr = ISDestroy(&garray_gl);
CHKERRQ(ierr);
/* evaluate communication - mesg to who, length, and buffer space */
示例7: Parallel_daypx
/*-----------------------------------------------------------------------------
* Function: Parallel_daypx
* Purpose: scalar a * y + x = y and gathers the complete solution vector
* onto each processor
* In args: a, loc_x, loc_y, y, n, comm
* Out args: loc_y, y
*/
void Parallel_daypx(double a, double loc_x[], double loc_y[], double y[],
int n, MPI_Comm comm) {
daypx(a, loc_x, loc_y, n);
MPI_Allgather(loc_y, n, MPI_DOUBLE, y, n, MPI_DOUBLE, comm);
} /* Parallel_daypx */
示例8: COM
/* communicate integers and doubles using point to point communication */
int COM (MPI_Comm comm, int tag,
COMDATA *send, int nsend,
COMDATA **recv, int *nrecv) /* recv is contiguous => free (*recv) releases all memory */
{
COMDATA *cd;
int rank,
ncpu,
send_size,
(*send_sizes) [3],
*send_position,
*send_rank,
send_count,
*send_rank_all,
*send_count_all,
*send_rank_disp,
*recv_rank,
(*recv_sizes) [3],
recv_count,
i, j, k, l;
char **send_data,
**recv_data;
MPI_Request *req;
MPI_Status *sta;
void *p;
MPI_Comm_rank (comm, &rank);
MPI_Comm_size (comm, &ncpu);
ERRMEM (send_sizes = MEM_CALLOC (ncpu * sizeof (int [3])));
ERRMEM (send_position = MEM_CALLOC (ncpu * sizeof (int)));
ERRMEM (send_rank = malloc (ncpu * sizeof (int)));
ERRMEM (send_data = malloc (ncpu * sizeof (char*)));
/* compute send sizes */
for (i = 0, cd = send; i < nsend; i ++, cd ++)
{
send_sizes [cd->rank][0] += cd->ints;
send_sizes [cd->rank][1] += cd->doubles;
MPI_Pack_size (cd->ints, MPI_INT, comm, &j);
MPI_Pack_size (cd->doubles, MPI_DOUBLE, comm, &k);
send_sizes [cd->rank][2] += (j + k);
}
/* allocate send buffers */
for (send_size = i = 0; i < ncpu; i ++)
{
if (send_sizes [i][2])
{
ERRMEM (send_data [i] = malloc (send_sizes [i][2]));
send_position [i] = 0;
send_size += send_sizes [i][2];
}
}
/* pack ints */
for (i = 0, cd = send; i < nsend; i ++, cd ++)
{
if (cd->ints)
{
MPI_Pack (cd->i, cd->ints, MPI_INT, send_data [cd->rank], send_sizes [cd->rank][2], &send_position [cd->rank], comm);
}
}
/* pack doubles */
for (i = 0, cd = send; i < nsend; i ++, cd ++)
{
if (cd->doubles)
{
MPI_Pack (cd->d, cd->doubles, MPI_DOUBLE, send_data [cd->rank], send_sizes [cd->rank][2], &send_position [cd->rank], comm);
}
}
#if DEBUG
for (i = 0; i < ncpu; i ++)
{
ASSERT_DEBUG (send_position [i] <= send_sizes [i][2], "Incorrect packing");
}
#endif
/* compute send ranks and move data */
for (send_count = i = 0; i < ncpu; i ++)
{
if (send_sizes [i][2])
{
send_rank [send_count] = i;
send_data [send_count] = send_data [i];
send_sizes [send_count][0] = send_sizes [i][0];
send_sizes [send_count][1] = send_sizes [i][1];
send_sizes [send_count][2] = send_sizes [i][2];
send_count ++;
}
}
ERRMEM (send_count_all = malloc (ncpu * sizeof (int)));
ERRMEM (recv_rank = malloc (ncpu * sizeof (int)));
/* gather all send ranks */
MPI_Allgather (&send_count, 1, MPI_INT, send_count_all, 1, MPI_INT, comm);
ERRMEM (send_rank_disp = malloc (ncpu * sizeof (int)));
//.........这里部分代码省略.........
示例9: main
//.........这里部分代码省略.........
// start time, total time should include distributing the data
// to other processes as part of the parallization
start_time = MPI_Wtime();
// make every proc has myrows rows of the mat
if(my_rank == 0){
int dest = 0;
for(i = myrows; i < n; i++){
dest = i / myrows;
MPI_Send(&mat[i][0], n + 1, MPI_DOUBLE, dest, i, MPI_COMM_WORLD);
}
for(i = 0; i < myrows; i++){
for(j = 0; j < n + 1; j++)
mymat[i][j] = mat[i][j];
}
}
else{
for(i = 0; i < myrows; i++){
MPI_Recv(&mymat[i][0], n + 1, MPI_DOUBLE, 0, my_rank * myrows + i, MPI_COMM_WORLD, &status);
}
}
iter=0;
double allbb;
double compute_time = MPI_Wtime();
do {
bb=0;
// all proc get all x
MPI_Allgather(myx, myrows, MPI_DOUBLE, x, myrows, MPI_DOUBLE, MPI_COMM_WORLD);
for(i=0;i<myrows;i++){
sum=0;
for(j=0;j<n;j++){
if(j!=i+myrows*my_rank){
sum=sum+mymat[i][j]*x[j];
}
}
temp=(mymat[i][n]-sum) / mymat[i][i+myrows*my_rank];
diff=fabs(x[i]-temp);
if(diff>bb){
bb=diff;
}
myx[i]=temp;
}
// each process get same bb value so all can go out of loop
MPI_Allreduce( &bb, &allbb, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD );
iter++;
}
while(allbb>=e);
// gather final x for print
MPI_Allgather(myx, myrows, MPI_DOUBLE, x, myrows, MPI_DOUBLE, MPI_COMM_WORLD);
if(my_rank ==0 ){
// record end time of computation
end_time = MPI_Wtime();
total_time = end_time - start_time;
printf("Total time:%lf; Computation time is:%lf\n", total_time, end_time - compute_time);
示例10: ADIOI_PE_gen_agg_ranklist
//.........这里部分代码省略.........
fd->hints->ranklist[aggIndex++] = newAggRank;
else
FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%d' is specified multiple times in MP_IOTASKLIST - duplicates are ignored.\n",newAggRank);
}
}
else {
FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not a valid integer task id - it will be ignored.\n",tmpBuf);
}
/* At the end check whether the list is shorter than specified. */
if (ioTaskListPos == ioTaskListLen) {
if (aggIndex == 0) {
FPRINTF(stderr,"ERROR: ATTENTION: No aggregators were correctly specified in MP_IOTASKLIST - default aggregator selection will be used.\n");
ADIOI_Free(fd->hints->ranklist);
}
else if (aggIndex < numAggs)
FPRINTF(stderr,"ERROR: ATTENTION: %d aggregators were specified in MP_IOTASKLIST but only %d were correctly specified - setting the number of aggregators to %d.\n",numAggs, aggIndex,aggIndex);
numAggs = aggIndex;
}
}
}
if (numAggs == 0) {
MPID_Comm *mpidCommData;
MPID_Comm_get_ptr(fd->comm,mpidCommData);
int localSize = mpidCommData->local_size;
// get my node rank
int myNodeRank = mpidCommData->intranode_table[mpidCommData->rank];
int *allNodeRanks = (int *) ADIOI_Malloc (localSize * sizeof(int));
allNodeRanks[myRank] = myNodeRank;
MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, allNodeRanks, 1, MPI_INT, fd->comm);
#ifdef AGG_DEBUG
printf("MPID_Comm data: local_size is %d\nintranode_table entries:\n",mpidCommData->local_size);
for (i=0;i<localSize;i++) {
printf("%d ",mpidCommData->intranode_table[i]);
}
printf("\ninternode_table entries:\n");
for (i=0;i<localSize;i++) {
printf("%d ",mpidCommData->internode_table[i]);
}
printf("\n");
printf("\nallNodeRanks entries:\n");
for (i=0;i<localSize;i++) {
printf("%d ",allNodeRanks[i]);
}
printf("\n");
#endif
if (ioAggrCount) {
int cntType = -1;
if ( strcasecmp(ioAggrCount, "ALL") ) {
if ( (cntType = atoi(ioAggrCount)) <= 0 ) {
/* Input is other non-digit or less than 1 the assume */
/* 1 aggregator per node. Note: atoi(-1) reutns -1. */
/* No warning message given here -- done earlier. */
cntType = -1;
}
}
else {
示例11: COM_Pattern
/* create a repetitive point to point communication pattern;
* ranks and sizes must not change during the communication;
* pointers to send and receive buffers data must not change */
void* COM_Pattern (MPI_Comm comm, int tag,
COMDATA *send, int nsend,
COMDATA **recv, int *nrecv) /* recv is contiguous => free (*recv) releases all memory */
{
COMPATTERN *pattern;
COMDATA *cd;
int rank,
ncpu,
*send_rank_all,
*send_count_all,
*send_rank_disp,
i, j, k, l;
void *p;
MPI_Comm_rank (comm, &rank);
MPI_Comm_size (comm, &ncpu);
ERRMEM (pattern = malloc (sizeof (COMPATTERN)));
ERRMEM (pattern->rankmap = MEM_CALLOC (ncpu * sizeof (int)));
ERRMEM (pattern->send_sizes = MEM_CALLOC (ncpu * sizeof (int [3])));
ERRMEM (pattern->send_position = MEM_CALLOC (ncpu * sizeof (int)));
ERRMEM (pattern->send_rank = malloc (ncpu * sizeof (int)));
ERRMEM (pattern->send_data = malloc (ncpu * sizeof (char*)));
pattern->nsend = nsend;
pattern->send = send;
pattern->comm = comm;
pattern->tag = tag;
/* compute send sizes */
for (i = 0, cd = send; i < nsend; i ++, cd ++)
{
pattern->send_sizes [cd->rank][0] += cd->ints;
pattern->send_sizes [cd->rank][1] += cd->doubles;
MPI_Pack_size (cd->ints, MPI_INT, comm, &j);
MPI_Pack_size (cd->doubles, MPI_DOUBLE, comm, &k);
pattern->send_sizes [cd->rank][2] += (j + k);
}
/* allocate send buffers and prepare rank map */
for (pattern->send_size = i = j = 0; i < ncpu; i ++)
{
if (pattern->send_sizes [i][2])
{
ERRMEM (pattern->send_data [i] = malloc (pattern->send_sizes [i][2]));
pattern->rankmap [i] = j;
pattern->send_size += pattern->send_sizes [i][2];
j ++;
}
}
/* compute send ranks and move data */
for (pattern->send_count = i = 0; i < ncpu; i ++)
{
if (pattern->send_sizes [i][2])
{
pattern->send_rank [pattern->send_count] = i;
pattern->send_data [pattern->send_count] = pattern->send_data [i];
pattern->send_sizes [pattern->send_count][0] = pattern->send_sizes [i][0];
pattern->send_sizes [pattern->send_count][1] = pattern->send_sizes [i][1];
pattern->send_sizes [pattern->send_count][2] = pattern->send_sizes [i][2];
pattern->send_count ++;
}
}
ERRMEM (send_count_all = malloc (ncpu * sizeof (int)));
ERRMEM (pattern->recv_rank = malloc (ncpu * sizeof (int)));
/* gather all send ranks */
MPI_Allgather (&pattern->send_count, 1, MPI_INT, send_count_all, 1, MPI_INT, comm);
ERRMEM (send_rank_disp = malloc (ncpu * sizeof (int)));
for (send_rank_disp [0] = l = i = 0; i < ncpu; i ++)
{ l += send_count_all [i]; if (i < ncpu-1) send_rank_disp [i+1] = l; }
ERRMEM (send_rank_all = malloc (l * sizeof (int)));
MPI_Allgatherv (pattern->send_rank, pattern->send_count, MPI_INT, send_rank_all, send_count_all, send_rank_disp, MPI_INT, comm);
/* compute receive ranks */
for (pattern->recv_count = k = i = 0; i < l; i += send_count_all [k], k ++)
{
for (j = 0; j < send_count_all [k]; j ++)
{
if (send_rank_all [i+j] == rank) /* 'k'th rank is sending here */
{
pattern->recv_rank [pattern->recv_count] = k;
pattern->recv_count ++;
break;
}
}
}
ERRMEM (pattern->recv_sizes = malloc (pattern->recv_count * sizeof (int [3])));
ERRMEM (pattern->recv_req = malloc (pattern->recv_count * sizeof (MPI_Request)));
ERRMEM (pattern->recv_sta = malloc (pattern->recv_count * sizeof (MPI_Status)));
ERRMEM (pattern->send_req = malloc (pattern->send_count * sizeof (MPI_Request)));
ERRMEM (pattern->send_sta = malloc (pattern->send_count * sizeof (MPI_Status)));
/* communicate receive sizes */
for (i = 0; i < pattern->recv_count; i ++)
//.........这里部分代码省略.........
示例12: splitSources
void splitSources(std::vector<double>& expandSources, std::vector<double>& directSources,
std::vector<ot::TreeNode>& fgtList, std::vector<double>& sources, const unsigned int minPtsInFgt,
const unsigned int FgtLev, MPI_Comm comm) {
PetscLogEventBegin(splitSourcesEvent, 0, 0, 0, 0);
int numPts = ((sources.size())/4);
#ifdef DEBUG
assert(!(sources.empty()));
assert(fgtList.empty());
#endif
{
unsigned int px = static_cast<unsigned int>(sources[0]*(__DTPMD__));
unsigned int py = static_cast<unsigned int>(sources[1]*(__DTPMD__));
unsigned int pz = static_cast<unsigned int>(sources[2]*(__DTPMD__));
ot::TreeNode ptOct(px, py, pz, __MAX_DEPTH__, __DIM__, __MAX_DEPTH__);
ot::TreeNode newFgt = ptOct.getAncestor(FgtLev);
fgtList.push_back(newFgt);
}
for(int i = 1; i < numPts; ++i) {
unsigned int px = static_cast<unsigned int>(sources[4*i]*(__DTPMD__));
unsigned int py = static_cast<unsigned int>(sources[(4*i)+1]*(__DTPMD__));
unsigned int pz = static_cast<unsigned int>(sources[(4*i)+2]*(__DTPMD__));
ot::TreeNode ptOct(px, py, pz, __MAX_DEPTH__, __DIM__, __MAX_DEPTH__);
ot::TreeNode newFgt = ptOct.getAncestor(FgtLev);
if(fgtList[fgtList.size() - 1] == newFgt) {
fgtList[fgtList.size() - 1].addWeight(1);
} else {
fgtList.push_back(newFgt);
}
}//end for i
#ifdef DEBUG
assert(!(fgtList.empty()));
#endif
int rank;
int npes;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &npes);
int localFlag = 0;
if( (rank > 0) && (rank < (npes - 1)) && ((fgtList.size()) == 1) ) {
localFlag = 1;
}
int globalFlag;
MPI_Allreduce(&localFlag, &globalFlag, 1, MPI_INT, MPI_SUM, comm);
int prevRank = rank - 1;
int nextRank = rank + 1;
if(globalFlag > 0) {
int gatherSendBuf = 0;
if( (rank > 0) && (rank < (npes - 1)) && (fgtList.size() == 1) ) {
gatherSendBuf = sources.size();
}
int* gatherList = new int[npes];
MPI_Allgather((&gatherSendBuf), 1, MPI_INT, gatherList, 1, MPI_INT, comm);
if(rank > 0) {
while(gatherList[prevRank] > 0) {
--prevRank;
}//end while
}
if(rank < (npes - 1)) {
while(gatherList[nextRank] > 0) {
++nextRank;
}//end while
}
int* sendFgtCnts = new int[npes];
int* recvFgtCnts = new int[npes];
int* sendSourceCnts = new int[npes];
int* recvSourceCnts = new int[npes];
for(int i = 0; i < npes; ++i) {
sendFgtCnts[i] = 0;
recvFgtCnts[i] = 0;
sendSourceCnts[i] = 0;
recvSourceCnts[i] = 0;
}//end i
if(gatherSendBuf > 0) {
sendFgtCnts[prevRank] = 1;
sendSourceCnts[prevRank] = gatherSendBuf;
}
for(int i = rank + 1; i < nextRank; ++i) {
recvFgtCnts[i] = 1;
recvSourceCnts[i] = gatherList[i];
}//end i
delete [] gatherList;
int* sendFgtDisps = new int[npes];
//.........这里部分代码省略.........
示例13: mpi_init
template <class T> PCrsMatrix<U>::PCrsMatrix(const CrsMatrix<T>& s, const MPI_Comm ncomm)
{
mpi_init(ncomm);
int iamsender=(s.rows()>0?1:0); //! we rely on the fact that only one node has this cond. fulfilled.
// now we let everyone know who is the sender
std::valarray<int> slist(mysize);
MPI_Allgather(&iamsender, 1, MPI_INTEGER, &slist[0], 1, MPI_INTEGER, mycomm);
int sender=-1;
for (int i=0; i< mysize; ++i)
if (slist[i]==1) if(sender==-1) sender=i; else ERROR("More than one process qualified as sender!");
if (sender==-1) ERROR("No process qualified as sender!");
// now we get the matrix size and resize it.
typename PCrsMatrix<U>::index_type dim[2];
if (iamsender) { dim[0]=s.rows(); dim[1]=s.cols(); }
MPI_Bcast(dim,2,mpi_index,sender,mycomm);
resize(dim[0],dim[1]);
// now we copy the options, as if it were an array of char...
MPI_Bcast(&(const_cast<CrsMatrix<T> &>(s).pops),sizeof(pops),MPI_CHAR,sender,mycomm);
setops(s.pops);
// now we can send out the row indices to the nodes.
unsigned long nmyrows=nroots[myrank+1]-nroots[myrank];
MPI_Request rreq;
MPI_Irecv(&pmat.rpoints[0],nmyrows+1,mpi_index,sender,101,mycomm,&rreq);
if (iamsender) {
for (int i=0; i<mysize; ++i)
MPI_Send(&(const_cast<CrsMatrix<T> &>(s).rpoints[nroots[i]]),nroots[i+1]-nroots[i]+1,mpi_index,i,101,mycomm);
};
//wait for receive
MPI_Status rstatus;
MPI_Wait(&rreq, &rstatus);
//then shift the indices as necessary, since we are getting chunks of data
for (typename PCrsMatrix<U>::index_type i=1;i<=nmyrows; ++i)
pmat.rpoints[i]-=pmat.rpoints[0];
pmat.rpoints[0]=0;
pmat.presize(pmat.rpoints[nmyrows]);
//very well. now we can share the column indices and the data!
MPI_Request rreq_i, rreq_d;
MPI_Irecv(&pmat.indices[0],pmat.rpoints[nmyrows],mpi_index,sender,202,mycomm,&rreq_i);
if (iamsender) {
for (int i=0; i<mysize; ++i)
MPI_Send(&(const_cast<CrsMatrix<T> &>(s).indices[s.rpoints[nroots[i]]]),
s.rpoints[nroots[i+1]]-s.rpoints[nroots[i]],mpi_index,i,202,mycomm);
};
MPI_Irecv(&pmat.values[0],pmat.rpoints[nmyrows],mpi_data,sender,303,mycomm,&rreq_d);
if (iamsender) {
for (int i=0; i<mysize; ++i)
MPI_Send(&(const_cast<CrsMatrix<T> &>(s).values[s.rpoints[nroots[i]]]),
s.rpoints[nroots[i+1]]-s.rpoints[nroots[i]],mpi_data,i,303,mycomm);
};
MPI_Wait(&rreq_i, &rstatus);
MPI_Wait(&rreq_d, &rstatus);
}
示例14: main
/*------------------------------------------------*/
int main (int argc, char **argv)
{
int cols, rows, iter, particles, x, y;
int *pic;
PartStr *p, *changes, *totalChanges;
int rank, num, i, numChanges, numTotalChanges;
int *changesPerNode, *buffDispl;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &num);
if (argc < 2) // use default values if user does not specify anything
{
cols = PIC_SIZE + 2;
rows = PIC_SIZE + 2;
iter = MAX_ITER;
particles = PARTICLES;
}
else
{
cols = atoi (argv[1]) + 2;
rows = atoi (argv[2]) + 2;
particles = atoi (argv[3]);
iter = atoi (argv[4]);
}
// initialize the random number generator
srand(rank);
// srand(time(0)); // this should be used instead if the program runs on multiple hosts
int particlesPerNode = particles / num;
if (rank == num - 1)
particlesPerNode = particles - particlesPerNode * (num - 1); // in case particles cannot be split evenly
// printf("%i has %i\n", rank, particlesPerNode);
pic = (int *) malloc (sizeof (int) * cols * rows);
p = (PartStr *) malloc (sizeof (PartStr) * particlesPerNode);
changes = (PartStr *) malloc (sizeof (PartStr) * particlesPerNode);
totalChanges = (PartStr *) malloc (sizeof (PartStr) * particlesPerNode);
changesPerNode = (int *) malloc (sizeof (int) * num);
buffDispl = (int *) malloc (sizeof (int) * num);
assert (pic != 0 && p != 0 && changes != 0 && totalChanges != 0
&& changesPerNode != 0);
// MPI user type declaration
int lengths[2] = { 1, 1 };
MPI_Datatype types[2] = { MPI_INT, MPI_INT };
MPI_Aint add1, add2;
MPI_Aint displ[2];
MPI_Datatype Point;
MPI_Address (p, &add1);
MPI_Address (&(p[0].y), &add2);
displ[0] = 0;
displ[1] = add2 - add1;
MPI_Type_struct (2, lengths, displ, types, &Point);
MPI_Type_commit (&Point);
dla_init_plist (pic, rows, cols, p, particlesPerNode, 1);
while (--iter)
{
dla_evolve_plist (pic, rows, cols, p, &particlesPerNode, changes, &numChanges);
// printf("%i changed %i on iter %i : ",rank, numChanges, iter);
// for(i=0;i<numChanges;i++) printf("(%i, %i) ", changes[i].x, changes[i].y);
// printf("\n");
//exchange information with other nodes
MPI_Allgather (&numChanges, 1, MPI_INT, changesPerNode, 1, MPI_INT, MPI_COMM_WORLD);
//calculate offsets
numTotalChanges = 0;
for (i = 0; i < num; i++)
{
buffDispl[i] = numTotalChanges;
numTotalChanges += changesPerNode[i];
}
// if(rank==0)
// {
// for(i=0;i<num;i++)
// printf("%i tries to send %i\n",i,changesPerNode[i]);
// printf("-----------\n");
// }
if(numTotalChanges >0)
{
MPI_Allgatherv (changes, numChanges, Point,
totalChanges, changesPerNode, buffDispl, Point,
MPI_COMM_WORLD);
apply_changes (pic, rows, cols, totalChanges, numTotalChanges);
// if(rank==0)
// {
// printf("Total changes %i : ", numTotalChanges);
// for(i=0;i<numTotalChanges;i++) printf("(%i, %i) ", totalChanges[i].x, totalChanges[i].y);
//
// printf("\n");
//.........这里部分代码省略.........
示例15: M_vertexIter
//.........这里部分代码省略.........
}
EIter_delete(eit);
// search for flagged faces (on boundary only)
// -------------------------------------------------------
if (theMesh->getDim()==3){
// External boundary condition defined
// -------------------------------------------------------
if (pSimPar->SimulationHas_BC_ExternalDefinition()){
double coords[3], x, y, z;
FIter fit = M_faceIter( theMesh );
while ( (face = FIter_next(fit)) ){
if (!theMesh->getRefinementDepth(face)){
int flag = EN_getFlag(face);
if ( !pSimPar->isNodeFree(flag) ){
for (i=0; i<3; i++){
ID = EN_id(face->get(0,i));
pVertex v = (pVertex)theMesh->getVertex(ID);
ID = get_AppToPETSc_Ordering(ID);
V_coord(v,coords);
x = coords[0];
y = coords[1];
z = coords[2];
dirichlet[ID] = pSimPar->exact_solution(x,y,z);
}
}
}
}
FIter_delete(fit);
}
else{
// conventional (Dirichlet) boundary condition: specified in numeric.dat
// --------------------------------------------------------------------
FIter fit = M_faceIter( theMesh );
while ( (face = FIter_next(fit)) ){
if (!theMesh->getRefinementDepth(face)){
int flag = EN_getFlag(face);
if ( !pSimPar->isNodeFree(flag) ){
for (i=0; i<3; i++){
ID = get_AppToPETSc_Ordering(EN_id(face->get(0,i)));
dirichlet[ID] = pSimPar->getBC_Value(flag);
}
}
}
}
FIter_delete(fit);
}
}
if (!P_getSumInt(dirichlet.size()) ){
throw Exception(__LINE__,__FILE__,"Prescribed (dirichlet) nodes were not found. Simulation cannot proceed.\n");
}
// go ahead only if parallel
if (P_size()==1) return 0;
// now, all partitions must know all prescribed nodes and their flags
// first of all, let partitions aware of how many prescribed nodes exist on each one
// if processor p does not have any prescribed node let nLPN=1 because p cannot send 0 element
int nLPN = dirichlet.size();
int *recvLP = new int[P_size()];
MPI_Allgather ( &nLPN, 1, MPI_INT, recvLP, 1, MPI_INT, MPI_COMM_WORLD );
// number of global prescribed nodes
// Note that nGPN value is not necessary the real global prescribed nodes
// Nodes on partition boundary can be counted twice or more
int nGPN=0;
for (i=0; i<P_size(); i++) nGPN += recvLP[i];
// sPIDs = send prescribed IDs sPFlags = send prescribed flags
i=0;
int *sPIDs = new int[nLPN];
double *sPFlags = new double[nLPN];
for(MIter mit = dirichlet.begin(); mit != dirichlet.end(); mit++){
sPIDs[i] = mit->first;
sPFlags[i] = mit->second;
i++;
}
// rcount says how many values each rank will send
int *rcounts = recvLP;
// displs says where to start to read in recv buffer
int *displs = new int[P_size()];
displs[0] = 0;
for (i=1; i<P_size(); i++) displs[i] = displs[i-1]+recvLP[i-1];
int *rPIDs = new int[nGPN];
// get all prescribed nodes
MPI_Allgatherv(sPIDs,nLPN,MPI_INT,rPIDs,rcounts,displs,MPI_INT,MPI_COMM_WORLD);
double *rPFlags = new double[nGPN];
// get flags from all prescribed nodes
MPI_Allgatherv(sPFlags,nLPN,MPI_DOUBLE,rPFlags,rcounts,displs,MPI_DOUBLE,MPI_COMM_WORLD);
for (i=0; i<nGPN; i++) dirichlet.insert( pair<int,double>(rPIDs[i],rPFlags[i]) );
delete[] sPIDs; sPIDs=0;
delete[] sPFlags; sPFlags=0;
delete[] rPIDs; rPIDs=0;
delete[] rcounts; rcounts=0;
delete[] displs; displs=0;
return 0;
}