当前位置: 首页>>代码示例>>C++>>正文


C++ MPI_Gather函数代码示例

本文整理汇总了C++中MPI_Gather函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Gather函数的具体用法?C++ MPI_Gather怎么用?C++ MPI_Gather使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了MPI_Gather函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main ( int argc, char *argv[] ) {

  // Auxiliary variables
  int rank;
  int npcs;
  int step;
  dmn domain;
  double wtime;

  // Solution arrays
  double *g_u; /* will be allocated in ROOT only */ 
  double *t_u;
  double *t_un;

  // Initialize MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &npcs);

  // Manage Domain sizes
  domain = Manage_Domain(rank,npcs); 

  // Allocate Memory
  Manage_Memory(0,domain,&g_u,&t_u,&t_un);

  // Root mode: Build Initial Condition and scatter it to the rest of processors
  if (domain.rank==ROOT) Call_IC(2,g_u);
  MPI_Scatter(g_u, domain.size, MPI_DOUBLE, t_u+NX*NY, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);

  // Exchage Halo regions
  Manage_Comms(domain,&t_u); MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the starting time.
  if (rank==ROOT) wtime=MPI_Wtime();

  // Asynchronous MPI Solver
  for (step = 0; step < NO_STEPS; step+=2) {
    // print iteration in ROOT mode
    if (rank==ROOT && step%10000==0) printf("  Step %d of %d\n",step,(int)NO_STEPS);
    
    // Exchange Boundaries and compute stencil
    Call_Laplace(domain,&t_u,&t_un); Manage_Comms(domain,&t_un); // 1st iter
    Call_Laplace(domain,&t_un,&t_u); Manage_Comms(domain,&t_u ); // 2nd iter
  }
  MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the final time.
  if (rank==ROOT) {
    wtime = MPI_Wtime()-wtime;
    printf ("\n Wall clock elapsed seconds = %f\n\n", wtime );
  }
  
  // Gather solutions to ROOT and write solution in ROOT mode
  MPI_Gather(t_u+NX*NY, domain.size, MPI_DOUBLE, g_u, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
  if (rank==ROOT) Save_Results(g_u);

  // Free Memory
  Manage_Memory(1,domain,&g_u,&t_u,&t_un); MPI_Barrier(MPI_COMM_WORLD);

  // Terminate MPI.
  MPI_Finalize();

  // ROOT mode: Terminate.
  if (rank==ROOT) {
    printf ("HEAT_MPI:\n" );
    printf ("  Normal end of execution.\n\n" );
  }

  return 0;
}
开发者ID:Haider-BA,项目名称:Matlab2CPP,代码行数:70,代码来源:main.cpp

示例2: main

int main (int argc, char *argv[])
{
    int err;
    double time, time_limit, time_maxMsg;

    int iter, iter_limit;
    size_t size, messStart, messStop, mem_limit;
    int testFlags, ndims, partsize;
    int k;

    char  hostname[256];
    char* hostnames;

    int root = 0;

    struct argList args;
    /* process the command-line arguments, printing usage info on error */
    if (!processArgs(argc, argv, &args)) { usage(); }
    iter       = args.iters;
    messStart  = args.messStart;
    messStop   = args.messStop;
    mem_limit  = args.memLimit;
    time_limit = args.timeLimit;
    testFlags  = args.testFlags;
    check_buffers = args.checkBuffers;
    ndims      = args.ndims;
    partsize   = args.partSize; 

    /* initialize MPI */
    err = MPI_Init(&argc, &argv);
    if (err) { printf("Error in MPI_Init\n"); exit(1); }

    /* determine who we are in the MPI world */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank_local);
    MPI_Comm_size(MPI_COMM_WORLD, &rank_count);

#ifdef PRINT_ENV
   /* Print environment as part of Sequoia SOW MPI requirements */
   extern void printEnv(void);
   if (rank_local == 0) { printEnv(); }
#endif 

    /* mark start of mpiBench output */
    if (rank_local == 0) { printf("START mpiBench_Bcast v%s\n", VERS); }

    /* collect hostnames of all the processes and print rank layout */
    gethostname(hostname, sizeof(hostname));
    hostnames = (char*) _ALLOC_MAIN_(sizeof(hostname)*rank_count, "Hostname array");
    MPI_Gather(hostname, sizeof(hostname), MPI_CHAR, hostnames, sizeof(hostname), MPI_CHAR, 0, MPI_COMM_WORLD);
    if (rank_local == 0) {
        for(k=0; k<rank_count; k++) {
            printf("%d : %s\n", k, &hostnames[k*sizeof(hostname)]);
        }
    }

    /* allocate message buffers and initailize timing functions */
    while(messStop*((size_t)rank_count)*2 > mem_limit && messStop > 0) messStop /= 2;
    buffer_size = messStop * rank_count;
    sbuffer   = (char*) _ALLOC_MAIN_(messStop    * rank_count, "Send Buffer");
    rbuffer   = (char*) _ALLOC_MAIN_(messStop    * rank_count, "Receive Buffer");
    sendcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Counts");
    sdispls    = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Displacements");
    recvcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Counts");
    rdispls    = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Displacements");

    /*time_maxMsg = 2*time_limit; */
    time_maxMsg = 0.0;

    /* if partsize was specified, calculate the number of partions we need */
    int partitions = 0;
    if (partsize > 0) {
        /* keep dividing comm in half until we get to partsize */
        int currentsize = rank_count;
        while (currentsize >= partsize) {
            partitions++;
            currentsize >>= 1;
        }
    }
开发者ID:8l,项目名称:insieme,代码行数:78,代码来源:bCast.c

示例3: main


//.........这里部分代码省略.........
		{
			for (i=0; i<ARRAY_SIZE; i++)
			{
				for (j=ARRAY_SIZE; j>=0; j--)
				{
					p[0] = player[i];	
					p[1] = player[j];		
				
					for(q=0; q<NUM_GAMES; q++)
					{
						b2d = ((p[0].history[0]*8) + (p[0].history[1]*4) + (p[0].history[2]*2) + p[0].history[3]);
						b2d = ((p[1].history[0]*8) + (p[1].history[1]*4) + (p[1].history[2]*2) + p[1].history[3]);
									
						Strategy(p[0], b2d);
						Strategy(p[1], b2d);
						Fitness(p);

						for (s=4; s>0; s--)
						{
							p[0].history[s] = p[0].history[s-1];
							p[1].history[s] = p[1].history[s-1];
						} 	
						p[0].history[0] = p[0].move;
						p[1].history[0] = p[1].move;
					}

					player[i] = p[0];
					player[j] = p[1];
				}
			}
		}

		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Gather(sub_arrays, ARRAY_SIZE, mpi_pop, player, ARRAY_SIZE, mpi_pop, 0, MPI_COMM_WORLD);	

/*-----------------------Perform Selection-----------------------------*/		
	
		if (world_rank == 0)
		{
			for(count=0; count<2; count++)
			{
				int sumFitness = 0;
				for (i=0; i<POPSIZE; i++)
				{
					sumFitness += p[i].fitness;
					int RANDOM = lrand48() % sumFitness;
					if (sumFitness >= RANDOM)
					{
						p[count] =  player[i];
					}
				}
			}
/*------------------------Crossover-------------------------------------*/

			if (RANDOM2 < CROSSOVER)
			{
				temp [0] = p[0].history[2];
				temp [1] = p[0].history[3];
				temp2[0] = p[1].history[2];
				temp2[1] = p[1].history[3];
				p[0].history[2] = temp2[0];
				p[0].history[3] = temp2[1];
				p[1].history[2] = temp[ 0];
				p[1].history[3] = temp[ 1];
			}
开发者ID:CaillaRose,项目名称:GeneticAlg,代码行数:66,代码来源:dilemma.c

示例4: main

int main(int argc, char * argv[])
{
    int rank, np;
    int * D;
    int * a;
    int i;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    int res=-1;
    int * results;

    srand(rank + time(0));

    for(i = 20; i<100; i+=2)
    {

        // the matrix that contains the compatatibilies
        D = (int*) malloc( sizeof(int)*i*i );
        // the array that contains a solution
        a = (int*) malloc( sizeof(int)*i );

        initArray(a, -1, i);

        if(rank==0)
        {
            //initialize the matrix
            genMatrix(D, i);

            // allocate the array to receive the gold
            results = (int*) malloc( sizeof(int)*np );
        }

        // generate a solution
        genSolution(a, i);

        //send compatibily matrix and initial solution to other processes
        MPI_Bcast(D, sizeof(int)*i*i, MPI_BYTE, 0, MPI_COMM_WORLD);
        //MPI_Bcast(a, sizeof(int)*i, MPI_BYTE, 0, MPI_COMM_WORLD);

        res = alg2(i, D, a, rank);

        //MPI_Barrier(MPI_COMM_WORLD);

        MPI_Gather(&res, 1, MPI_INT, results, 1, MPI_INT, 0, MPI_COMM_WORLD);

        if(rank==0)
        {
            printf("%d\t%d\n", i, getMin(results, np) );

            // clean
            free(results);
        }

        free(D);
        free(a);

    }

    MPI_Finalize();

    return 0;
}
开发者ID:Onumis,项目名称:Simulated-Annealing,代码行数:65,代码来源:main.c

示例5: main

int main(int argc, char* argv[]){
	int rank, size, n, i, j, elementiXproc, stage, length, next;
	orderedAfterSwap *m;
	char *binary;
	FILE *file;
	float *elementi, *mieiElementi, *result;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if(argc<2) {
		printf("Numero argomenti non sufficiente: %d richiesto %d", argc-1, 1);
		MPI_Abort(MPI_COMM_WORLD, 0);
		return 1;
	}

	if(rank==0) {
		writeFile();
		file = fopen(argv[1],"rb");

		if(file==NULL) {
			printf("Non è stato possibile aprire il file: %s", argv[1]);
			MPI_Abort(MPI_COMM_WORLD, 0);
			return 1;
		}

		fread(&n, sizeof(int), 1, file);
		elementiXproc = n/size;
		mieiElementi = malloc(sizeof(float)*elementiXproc);
		elementi = malloc(sizeof(float)*elementiXproc);
		fread(mieiElementi, sizeof(float), elementiXproc, file);

		for(i=1; i<size; i++){
			MPI_Send (&elementiXproc, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
			fread(elementi, sizeof(float), elementiXproc, file);
			MPI_Send (elementi, elementiXproc, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
		}
		fclose(file);
		result = malloc(sizeof(float)*n);
	}

	else {
		MPI_Recv (&elementiXproc, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		mieiElementi = malloc(sizeof(float)*elementiXproc);
		MPI_Recv (mieiElementi, elementiXproc, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		elementi = malloc(sizeof(float)*elementiXproc);
	}

	qsort(mieiElementi, elementiXproc, sizeof(float), floatcomp);
	length = log(size)/log(2);
	binary = intToBinary(rank,length);
	for(stage=0; stage<length; stage++) {
		if(binary[stage]=='0'){
			binary[stage] = '1';
			next = binaryToInt(binary, length);
			binary[stage] = '0';
			MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
			MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			m = swapMin(mieiElementi,elementi,elementiXproc);
			mieiElementi = m->mieiElementi;
		}
		else {
			binary[stage] = '0';
			next = binaryToInt(binary, length);
			binary[stage] = '1';
			MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
			m = swapMax(mieiElementi,elementi,elementiXproc);
			mieiElementi = m->mieiElementi;
		}
	}

	MPI_Gather(mieiElementi, elementiXproc, MPI_FLOAT, result, elementiXproc, MPI_FLOAT, 0, MPI_COMM_WORLD);

	if(rank==0){
		printf("[ ");
		for(j=0; j<n; j++) {
			printf("%f ", result[j]);
		}
		printf("] \n");
		free(result);
	}

	free(m);
	free(binary);
	free(mieiElementi);
	free(elementi);

	MPI_Finalize();
	return 0;
}
开发者ID:Dani7B,项目名称:CPD,代码行数:92,代码来源:Bitonic.c

示例6: DisplayGoL

int DisplayGoL(int N, int effective_cols_size, int matrix[N][effective_cols_size], int rank)
{

    int realColumnSize = effective_cols_size-2;
    int arraySize = N * realColumnSize;
    int tempArray[arraySize];
    int count = 0;
    int r, c;
    int displaymatrix[N][N];
    int tempTempArray[N*N];
    int currentGatherTime = 0;
    
    struct timeval send1s, send1e; 
    int tSend;
    //printf("\nEFFECTIVE COL SIXE :%d",effective_cols_size);
	for(c=1;c<effective_cols_size-1;c++){
		for(r=0;r<N;r++){
				tempArray[count] = matrix[r][c];
				count++;
				//printf("SETTING RANK:%d, INDEX: %d and %d, VALUE: %d\n", rank, r,c, tempArray[count-1]);
		}
	}

    gettimeofday(&send1s, NULL);
if(rank==0)
	{
	MPI_Gather(tempArray, N * (realColumnSize), MPI_INT,  tempTempArray,N * (realColumnSize), MPI_INT, 0, MPI_COMM_WORLD);
	}
else
	{
	MPI_Gather(tempArray, N * (realColumnSize), MPI_INT,  NULL,0, MPI_INT, 0, MPI_COMM_WORLD);
	}
    gettimeofday(&send1e, NULL);
    currentGatherTime += (send1e.tv_sec-send1s.tv_sec)*1000 + (send1e.tv_usec-send1s.tv_usec)/1000;
    //printf("%d", currentGatherTime);
            int q = 0;

           // for(q=0; q< N*realColumnSize; q++){
           //     printf("RANK: %d, INDEX: %d, VALUE: %d\n", rank, q, tempArray[q]);
           // }

            if(rank==0){
                // If the rank is 0 we will need to gather from the array
                // put it into a matrix and
                for(c=0;c<N*N;c++){

                    displaymatrix[c%N][c/N] = tempTempArray[c];
                    //printf("INDEX 22:  %d, VALUE:  %d\n", c, tempTempArray[c]);

                }
              //  printf("\n \n GATHER AT RANK %d\n",rank);
			  for (r = 0; r < N; r++) {
				  for (c = 0; c < N; c++)
					  printf("V_G-%d-%d = %d  ",r,c, displaymatrix[r][c]);
				  printf("\n");
			  }

            }




    return currentGatherTime;
   //return;
}
开发者ID:purohitsumit,项目名称:GameOfLife,代码行数:65,代码来源:Game_Of_Life.c

示例7: main


//.........这里部分代码省略.........
  col_matrix   = (int*)malloc((lngth*lngth) * sizeof(int));
  res_matrix   = (int*)malloc((lngth*lngth) * sizeof(int));
  
  if(q>1)
    chnkd_MPI_Recv(local_matrix, lngth*lngth, MPI_INT, 0);
  else
    local_matrix = d_graph;
    
  p_row = ( rank / q );
  p_col = ( rank % q );
    
  //CREATE COMMUNICATORS 
  MPI_Group MPI_GROUP_WORLD;
  MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
  MPI_Group row_group, col_group;
  MPI_Comm row_comm, col_comm, grid_comm;
  int tmp_row, tmp_col, proc;
  int row_process_ranks[q], col_process_ranks[q];
    
  for(proc = 0; proc < q; proc++){   
    row_process_ranks[proc] = (p_row * q) + proc;
    col_process_ranks[proc] = ((p_col + proc*q) %(q*q));
  }    
  radixsort(col_process_ranks, q);
  radixsort(row_process_ranks, q);

  MPI_Group_incl(MPI_GROUP_WORLD, q, row_process_ranks, &row_group);  
  MPI_Group_incl(MPI_GROUP_WORLD, q, col_process_ranks, &col_group);  
     
  MPI_Comm_create(MPI_COMM_WORLD, row_group, &row_comm);  
  MPI_Comm_create(MPI_COMM_WORLD, col_group, &col_comm);  

  if ((rank / q) == (rank % q)) {
      memcpy(row_matrix, local_matrix, (lngth*lngth) * sizeof(int));
  }
  int ln,d,flag;
  int step, rotation_src, rotation_dest, src;
  int count = 0;
  memcpy(res_matrix, local_matrix, (lngth*lngth) * sizeof(int));
  rotation_src = (p_row + 1) % q;
  rotation_dest = ((p_row - 1) + q) % q;
  ln = (lngth*q) << 1;
  start = MPI_Wtime();  

  for (d = 2; d < ln; d = d << 1) {
    memcpy(col_matrix, local_matrix, (lngth*lngth) * sizeof(int));
    for ( step = 0;  step < q;  step++) {
      src = (p_row +  step) % q;
      count++;
      if (src == p_col) {
	MPI_Bcast(local_matrix, lngth*lngth, MPI_INT, src, row_comm);
	floyd_warshall( local_matrix, col_matrix, res_matrix, lngth);
      } else {
	MPI_Bcast(row_matrix, lngth*lngth, MPI_INT, src, row_comm);
	floyd_warshall( row_matrix, col_matrix, res_matrix, lngth);
      }  
      if( step < q-1) 
        MPI_Sendrecv_replace(col_matrix, lngth*lngth, MPI_INT, rotation_dest, STD_TAG,rotation_src, STD_TAG, col_comm, MPI_STATUS_IGNORE);
  	
    }
    memcpy(local_matrix, res_matrix, (lngth*lngth) * sizeof(int));
  }
  
  
  int *sol;
  sol = malloc(N*N*sizeof(int));  
  
  MPI_Gather(res_matrix, lngth*lngth, MPI_INT, sol,  lngth*lngth, MPI_INT, 0, MPI_COMM_WORLD);
  
  if (rank == 0) {
    finish = MPI_Wtime();
    printf("Tempo de execução %f\n",finish - start);
  }
 
  if (rank == 0) {
    int row, col, pos_x, pos_y, pos, tmp_y, tmp_x;

    for (i = 0; i < P; i++) {
      pos_x = i / q;
      pos_y = i % q;
      pos = i * lngth*lngth;

      for (row = 0; row < lngth; row++) {
	for (col = 0; col < lngth; col++) {
          tmp_x = GET_MTRX_POS(pos_x,row,lngth);
          tmp_y = GET_MTRX_POS(pos_y,col,lngth);
          
	  if (sol[GET_MTRX_POS(row,col,lngth) + pos] == INF)
	    d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = 0;
	  else
	    d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = sol[GET_MTRX_POS(row,col,lngth) + pos];
	}
      }
    }
    prints_matrix(d_graph,N);
  }
  
  MPI_Finalize();
  return 0;
}
开发者ID:LopesManuel,项目名称:MPI-Floyd-Warshall-C,代码行数:101,代码来源:floyd.c

示例8: AllgatherDomains

void AllgatherDomains(std::set<int> &setOfDomain){
	int i = 0;
	int numLDomains = (int)setOfDomain.size();
	int domainsarray[numLDomains];
	std::set<int>::iterator iter = setOfDomain.begin();
	for (;iter != setOfDomain.end(); iter++) domainsarray[i++] = *iter;

	int numGDomains[P_size()];
	MPI_Gather(&numLDomains,1,MPI_INT,numGDomains,1,MPI_INT,0,MPI_COMM_WORLD);

	//	if (!P_pid()){
	//		for(i=0; i<P_size(); i++) printf("rank %d receives %d domains from rank %d\n",P_pid(),numGDomains[i],i);
	//	}

	// allocate enough space to receive nodes from all processors
	int *recv_buffer2, *displacements;
	int totalDoms = 0;
	if ( !P_pid() ){
		for(i=0; i<P_size(); i++) totalDoms += numGDomains[i];
		recv_buffer2 = new int[totalDoms]; // only root processor allocates memory
		displacements = new int[P_size()];
		displacements[0] = 0;
		for (int i=1; i<P_size(); i++) displacements[i] = displacements[i-1] + numGDomains[i-1];
	}


	// now it's time to send nodes to root processor
	MPI_Gatherv(domainsarray,numLDomains,MPI_INT,
			recv_buffer2,numGDomains,displacements,MPI_INT,
			0,MPI_COMM_WORLD);
	//	if (!P_pid()){
	//		for(i=0; i<totalDoms; i++) printf("rank %d domains %d\n",P_pid(),recv_buffer2[i]);
	//	}

	// let's filter domains flags to avoid repeated values
	setOfDomain.clear();
	if (!P_pid()){
		for(i=0; i<totalDoms; i++) setOfDomain.insert( recv_buffer2[i] );
	}
	//	printf("rank %d setOfDomain.size() = %d\n",P_pid(),setOfDomain.size());

	//	if (!P_pid()){
	//		for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) printf("rank %d domains %d\n",P_pid(),*iter);
	//	}

	// Send these domains flags to all processes

	i = 0;
	int numGDomains2 = (int)setOfDomain.size();
	numGDomains2 = P_getSumInt(numGDomains2);
	int domainsGarray[numGDomains2];

	//	if (!P_pid()){
	for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) domainsGarray[i++] = *iter;
	//	}

	MPI_Bcast(domainsGarray,numGDomains2,MPI_INT,0,MPI_COMM_WORLD);

	for(i=0; i<numGDomains2; i++) setOfDomain.insert( domainsGarray[i] );
	//printf("rank %d numGDomains2 %d\n",P_pid(),numGDomains2);

	//	for(i=0; i<numGDomains2; i++) printf("rank %d domains %d\n",P_pid(),domainsGarray[i]);
}
开发者ID:andreabduque,项目名称:padmec-amr,代码行数:63,代码来源:EBFV1__pre-processors.cpp

示例9: trainOneEpochDenseCPU

void trainOneEpochDenseCPU(int itask, float *data, float *numerator,
                           float *denominator, float *codebook,
                           unsigned int nSomX, unsigned int nSomY,
                           unsigned int nDimensions, unsigned int nVectors,
                           unsigned int nVectorsPerRank, float radius,
                           float scale, string mapType, int *globalBmus)
{
    unsigned int p1[2] = {0, 0};
    unsigned int *bmus = new unsigned int[nVectorsPerRank*2];

    #pragma omp parallel default(shared) private(p1)
    {
        #pragma omp for
        for (unsigned int n = 0; n < nVectorsPerRank; n++) {
            if (itask*nVectorsPerRank+n<nVectors) {
                /// get the best matching unit
                get_bmu_coord(codebook, data, nSomY, nSomX,
                              nDimensions, p1, n);
                bmus[2*n] = p1[0]; bmus[2*n+1] = p1[1];
              }
        }
    }

    float *localNumerator = new float[nSomY*nSomX*nDimensions];
    float *localDenominator = new float[nSomY*nSomX];

    #pragma omp parallel default(shared)
    {
        #pragma omp for
        for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
            for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
                localDenominator[som_y*nSomX + som_x] = 0.0;
                for (unsigned int d = 0; d < nDimensions; d++)
                    localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] = 0.0;
            }
        }
        /// Accumulate denoms and numers
        #pragma omp for
        for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
            for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
                for (unsigned int n = 0; n < nVectorsPerRank; n++) {
                    if (itask*nVectorsPerRank+n<nVectors) {
                        float dist = 0.0f;
                        if (mapType == "planar") {
                            dist = euclideanDistanceOnPlanarMap(som_x, som_y, bmus[2*n], bmus[2*n+1]);
                        } else if (mapType == "toroid") {
                            dist = euclideanDistanceOnToroidMap(som_x, som_y, bmus[2*n], bmus[2*n+1], nSomX, nSomY);
                        }
                        float neighbor_fuct = getWeight(dist, radius, scale);
                        
                        for (unsigned int d = 0; d < nDimensions; d++) {
                            localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] +=
                                1.0f * neighbor_fuct
                                * (*(data + n*nDimensions + d));
                        }
                        localDenominator[som_y*nSomX + som_x] += neighbor_fuct;
                    }
                }
            }
        }
    }
#ifdef HAVE_MPI
    MPI_Reduce(localNumerator, numerator,
               nSomY*nSomX*nDimensions, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
    MPI_Reduce(localDenominator, denominator,
               nSomY*nSomX, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
    MPI_Gather(bmus, nVectorsPerRank*2, MPI_INT, globalBmus, nVectorsPerRank*2, MPI_INT, 0, MPI_COMM_WORLD);

#else
    for (unsigned int i=0; i < nSomY*nSomX*nDimensions; ++i) {
        numerator[i] = localNumerator[i];
    }
    for (unsigned int i=0; i < nSomY*nSomX; ++i) {
        denominator[i] = localDenominator[i];
    }
    for (unsigned int i=0; i < 2*nVectorsPerRank; ++i) {
      globalBmus[i]=bmus[i];
    }
#endif
    delete [] bmus;
    delete [] localNumerator;
    delete [] localDenominator;
}
开发者ID:xgdgsc,项目名称:somoclu,代码行数:83,代码来源:denseCpuKernels.cpp

示例10: main

int main(int argc, char** argv) {
    // Initialize the MPI environment
    MPI_Init(&argc, &argv);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

    char string_buffer[LEN];
    char* rbuf = NULL;
    if(world_rank == 0)
        rbuf = malloc(world_size * LEN * sizeof(char));

    // Get the name of the processor
    char processor_name[50];
    int name_len = 50;
    if(gethostname(processor_name, name_len) != 0) {
        printf("Error with hostname");
        exit(1);
    }

    // Get current time on host
    struct timeval time;
    if(gettimeofday(&time, NULL) != 0) {
        printf("Error with time");
        exit(1);
    }

    // Generate output
    time_t curtime = time.tv_sec;
    char time_buffer[30];
    strftime(time_buffer, 30, "%Y-%m-%d %T.", localtime(&curtime));
    sprintf(string_buffer, "%s: %s%li", processor_name, time_buffer, time.tv_usec);

    // Gather output
    int rc = MPI_Gather(string_buffer, LEN, MPI_CHAR, rbuf, LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
    if(rc != MPI_SUCCESS) {
        printf("Error while gathering, rc is: %d", rc);
        exit(1);
    }

    // Print output
    if(world_rank == 0) {
        for(int i = 0; i < world_size; ++i)
            printf("%.*s\n", LEN, rbuf + LEN * i);
    }

    // Get microseconds
    int usec = time.tv_usec;
    int * rbuf_usec;
    if(world_rank == 0)
        rbuf_usec = malloc(world_size * sizeof(int));
    
    // Reduce microseconds
    if(MPI_Reduce(&usec, rbuf_usec, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD) != MPI_SUCCESS){
        printf("Error in MPI_Reduce\n");
        exit(1);
    }

    // Print microseconds
    if(world_rank == 0)
        printf("%d\n", usec);

    if(MPI_Barrier(MPI_COMM_WORLD) != MPI_SUCCESS){
        printf("Error with barrier");
        exit(1);
    }

    printf("Rang %d beendet jetzt!\n", world_rank);
	
    // Finalize the MPI environment.
    MPI_Finalize();

}
开发者ID:sKeLeTr0n,项目名称:uni,代码行数:78,代码来源:timempi2.c

示例11: fprintf

//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void Image_Exchanger::sync_fragment_info(OverLap_FootPrint* ofp, 
                                         ImageFragment_Tile* ift,
                                         int nviewer)
{

#ifdef _DEBUG7
    fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__);
#endif


    std::vector<int> infobuf;
    int count = ofp->save_overlap_info(infobuf);

#ifdef _DEBUG6
    fprintf(stderr, "%d: %s: olcount=%d, olbuffer size=%ld\n", 
            m_rank, __func__, count, infobuf.size());
#endif    

    int c = infobuf.size();

//     fprintf(stderr, "%d: nviewer=%d, gather MPI_INT %d\n", 
//             m_rank, nviewer, c);


    memset(m_rcounts, 0, m_runsize*sizeof(unsigned int));

    if(nviewer == 1) 
    {
        MPI_Gather(&c, 1, MPI_INT, 
               m_rcounts, 1, MPI_INT, 
               0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Allgather(&c, 1, MPI_INT, 
                      m_rcounts, 1, MPI_INT, 
                      MPI_COMM_WORLD);
    }


    // vector throws a length_error if resized above max_size
    //terminate called after throwing an instance of 'std::length_error'
    //what():  vector::_M_fill_insert

    std::vector<int> ainfobuf(1, 0);
    memset(m_rdispls, 0, m_runsize*sizeof(unsigned int));



    if( (nviewer == 1 && m_rank==0) || (nviewer > 1) )
    {
        int total = 0;
        for(int i=0; i<m_runsize; i++) total += m_rcounts[i];

//         fprintf(stderr, "std::vector max size=%ld, resize to %d\n", 
//                 ainfobuf.max_size(), total);
        assert(total > 0);

        ainfobuf.resize(total, 0);
    }



    for(int i=0; i<m_runsize-1; i++) 
        m_rdispls[i+1] = m_rdispls[i] + m_rcounts[i];

    //to make &infobuf[0] a legal call
    if(c == 0) infobuf.resize(1);

    if(nviewer == 1) 
    {
        MPI_Gatherv(&infobuf[0], c, MPI_INT,
                    &ainfobuf[0], m_rcounts, m_rdispls, 
                    MPI_INT,
                    0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Allgatherv(&infobuf[0], c, MPI_INT,
                       &ainfobuf[0], (int*)m_rcounts, (int*)m_rdispls, 
                       MPI_INT,
                       MPI_COMM_WORLD);
    }

    //fprintf(stderr, "MPI_SUCCESS on sync frag info\n");

    //only viewer need to have all fragments and count for recv
    //non-viewer only need count send for its own fragments
    if(m_rank < nviewer)
    {
        ift->retrieve_fragments(ainfobuf);
    }
    else if(c > 0)
    {
        ift->retrieve_fragments(infobuf);
    }

//.........这里部分代码省略.........
开发者ID:jinghuage,项目名称:pcaster,代码行数:101,代码来源:image_exchanger.cpp

示例12: main


//.........这里部分代码省略.........
        /* Scatter data to local ranks */
        MPI_Scatter(data, chunkSize*N, MPI_FLOAT,
                    local_data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        /* Compute time for distributing data */
        if(task_id == 0){
            time[2] = MPI_Wtime();
            printf("Group 1: Scattering 1_im1(row) to each processor takes %f s.\n", time[2] - time[1]);
        }

        /* Do 1_im1 2d FFT */
        /* Row FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each row for im1 */
                temp_data[j].r = local_data[i][j];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[i][j] = temp_data[j].r;
        }

        /* Gather all the data and distribute in columns */
        if(task_id == 0){
            time[3] = MPI_Wtime();
            printf("Group 1: FFT each row for 1_im1 takes %f s.\n", time[3] - time[2]);
        }

        /* Gather all the data of 1_im1 */
        MPI_Gather(local_data, chunkSize*N, MPI_FLOAT,
                    data, chunkSize*N, MPI_FLOAT,
                    0, taskcomm);

        if(task_id == 0){
            time[4] = MPI_Wtime();
            printf("Group 1: Gathering all the data of 1_im1(row) takes %f s.\n", time[4] - time[3]);
        }

        /* Scatter all the data to column local data */
        MPI_Scatter(data, chunkSize, column,
                    local_data, chunkSize, column,
                    0, taskcomm);

        if(task_id == 0){
            time[5] = MPI_Wtime();
            printf("Group 1: Scattering 1_im1(column) to each processor takes %f s.\n", time[5] - time[4]);
        }

        /* Column FFT */
        for(i = 0; i < chunkSize; i++){
            for(j = 0; j < N; j++){
                /* FFT each column for im1 */
                temp_data[j].r = local_data[j][i];
                temp_data[j].i = 0;
            }

            c_fft1d(temp_data, N, -1);

            for(j = 0; j < N; j++)
                local_data[j][i] = temp_data[j].r;
        }
开发者ID:ttang10,项目名称:FFT_2D_CONVOLUTION,代码行数:66,代码来源:task_para.c

示例13: online_measurement


//.........这里部分代码省略.........

    /* now we bring it to normal format */
    /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
    convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);

    /* now we sum only over local space for every t */
    for(t = 0; t < T; t++) {
        j = g_ipt[t][0][0][0];
        res = 0.;
        respa = 0.;
        resp4 = 0.;
        for(i = j; i < j+LX*LY*LZ; i++) {
            res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], g_spinor_field[DUM_MATRIX][j]);
            _gamma0(phi, g_spinor_field[DUM_MATRIX][j]);
            respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], phi);
            _gamma5(phi, phi);
            resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][j], phi);
        }

#if defined MPI
        MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        res = mpi_res;
        MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        respa = mpi_respa;
        MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
        resp4 = mpi_resp4;
        sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#else
        Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
        Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#endif
    }

#ifdef MPI
    /* some gymnastics needed in case of parallelisation */
    if(g_mpi_time_rank == 0) {
        MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
        MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
        MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
    }
#endif

    /* and write everything into a file */
    if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
        ofs = fopen(filename, "w");
        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cpp[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt], 0.);

        fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cpa[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt], 0.);

        fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0], 0.);
        for(t = 1; t < g_nproc_t*T/2; t++) {
            tt = (t0+t)%(g_nproc_t*T);
            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt]);
            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
            fprintf( ofs, "%e\n", Cp4[tt]);
        }
        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
        fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt], 0.);
        fclose(ofs);
    }
#ifdef MPI
    if(g_mpi_time_rank == 0) {
        free(Cpp);
        free(Cpa);
        free(Cp4);
    }
    free(sCpp);
    free(sCpa);
    free(sCp4);
#else
    free(Cpp);
    free(Cpa);
    free(Cp4);
#endif
    etime = gettime();

    if(g_proc_id == 0 && g_debug_level > 0) {
        printf("ONLINE: measurement done int t/s = %1.4e\n", etime - atime);
    }
    return;
}
开发者ID:ggscorzato,项目名称:tmLQCD,代码行数:101,代码来源:online_measurement.c

示例14: cuda_mpi_send_forces

  void cuda_mpi_send_forces(float *host_forces,
                            float *host_torques,
                            CUDA_fluid_composition * host_composition){
  int n_part;
  int g, pnode;
  Cell *cell;
  int c;
  int i;  
  int *sizes;
  sizes = (int *) Utils::malloc(sizeof(int)*n_nodes);
  n_part = cells_get_n_particles();
  /* first collect number of particles on each node */
  MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);

  /* call slave functions to provide the slave data */
  if(this_node > 0) {
    cuda_mpi_send_forces_slave();
  }
  else{
    /* fetch particle informations into 'result' */
    g = 0;
    for (pnode = 0; pnode < n_nodes; pnode++) {
      if (sizes[pnode] > 0) {
        if (pnode == 0) {
          for (c = 0; c < local_cells.n; c++) {
            int npart;  
            cell = local_cells.cell[c];
            npart = cell->n;
            for (i=0;i<npart;i++) { 
              cell->part[i].f.f[0] += (double)host_forces[(i+g)*3+0];
              cell->part[i].f.f[1] += (double)host_forces[(i+g)*3+1];
              cell->part[i].f.f[2] += (double)host_forces[(i+g)*3+2];
#ifdef ROTATION
              cell->part[i].f.torque[0] += (double)host_torques[(i+g)*3+0];
              cell->part[i].f.torque[1] += (double)host_torques[(i+g)*3+1];
              cell->part[i].f.torque[2] += (double)host_torques[(i+g)*3+2];
#endif

#ifdef SHANCHEN
              for (int ii=0;ii<LB_COMPONENTS;ii++) {
                cell->part[i].r.composition[ii] = (double)host_composition[i+g].weight[ii];
              }
#endif
            }
            g += npart;
          }
        }
        else {
          /* and send it back to the slave node */

          MPI_Send(&host_forces[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#ifdef ROTATION          
          MPI_Send(&host_torques[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#endif
#ifdef SHANCHEN
          MPI_Send(&host_composition[g], sizes[pnode]*sizeof(CUDA_fluid_composition), MPI_BYTE, pnode, REQ_CUDAGETPARTS, comm_cart);      
#endif
          g += sizes[pnode];
        }
      }
    }
  }
  COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node));

  free(sizes);
}
开发者ID:Haider-BA,项目名称:espresso,代码行数:66,代码来源:cuda_interface.cpp

示例15: main

int main(int argc, char **argv)
{
	int num1, num2, proc_num, proc_rank, comp_result, i;
	int buf1[10], buf2[10], buf_result[10]; 
	MPI_Status status;

	MPI_Init( &argc, &argv );
	MPI_Comm_size( MPI_COMM_WORLD, &proc_num );
	MPI_Comm_rank( MPI_COMM_WORLD, &proc_rank );

	if( 10 != proc_num ) // проверка на 10 процессов
	{
		if( 0 == proc_rank ) printf("Wrong number of processes!\n");
                MPI_Finalize();
		return 0;
        }

	
	if( 0 == proc_rank ) // считываем десятичные числа в 0 процессе, они известны только ему
	{
		scanf("%d%d", &num1, &num2 );

        	if( num1>1000 ) // ограничения 
                	num1 = MAX_NUM;

        	if( num2>1000 )
                	num2 = MAX_NUM;

		MPI_Send( &num1, 1, MPI_INT, 1, 0, MPI_COMM_WORLD ); // сделать другой коммуникатор - разослать только им?
		MPI_Send( &num2, 1, MPI_INT, 2, 0, MPI_COMM_WORLD ); // посылаем числа процессу 1 и 2
	}
	
	if( 1 == proc_rank ) // перевод в бинарный вид в этих процессах
	{
		MPI_Recv( &num1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
		for( i=0;i<10;i++)
			buf1[i] = *( dec_to_bin( num1 ) + i );

		printf("The first number is: ");
		
		for( i=0;i<10;i++)
			printf("%d ", buf1[i]);
		
		printf("\n");
	}
	
	if( 2 == proc_rank)
	{
		MPI_Recv( &num2, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
		for(i=0;i<10;i++)
			buf2[i] = *( dec_to_bin( num2 ) + 1 );

		printf("The second number is: ");

		for(i=0;i<10;i++)
			printf("%d ", buf2[i] );
	
		printf("\n");
	}

	MPI_Bcast( buf1, 10, MPI_INT, 1, MPI_COMM_WORLD ); // теперь бинарный вид у всех
	MPI_Bcast( buf2, 10, MPI_INT, 2, MPI_COMM_WORLD );

	comp_result = ( buf1[proc_rank] == buf2[proc_rank] ) ? 0 : 1; 	// каждый процесс пар-но сравнивает разряд

	MPI_Barrier( MPI_COMM_WORLD );
	
	MPI_Gather( &comp_result, 1, MPI_INT, buf_result, 1, MPI_INT, 0, MPI_COMM_WORLD );	// рез-т сравнения на 0 процессе
	
	if ( 0 == proc_rank )
	{
		for(i=0;i<10;i++)
			printf("%d ", buf_result[i] ); 
		i = 0;
		int flag = 1;

		while( 0 == buf_result[i] ) // По порядку анализируем разряды чисел - равны или не равны
		{
			i++;
			
			if( 10 == i)
			{
				printf("Numbers are equal!\n");
				flag = 0;
			}
		} 
		
		if(flag)
		{
			if( buf1[i] > buf2[i] )
				printf("\nFirst number is bigger!\n");
			else printf("\nSecond number is bigger!\n");
		}
	}

	MPI_Finalize();
	return 0;
}
开发者ID:SkySpecial,项目名称:MPI-studying,代码行数:98,代码来源:MPI_Bin_comparator2.c


注:本文中的MPI_Gather函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。