当前位置: 首页>>代码示例>>C++>>正文


C++ MPI_Allreduce函数代码示例

本文整理汇总了C++中MPI_Allreduce函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Allreduce函数的具体用法?C++ MPI_Allreduce怎么用?C++ MPI_Allreduce使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了MPI_Allreduce函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main


//.........这里部分代码省略.........
  MPI_Bcast( &nprows, 1, MPI_INT, 0, MPI_COMM_WORLD );
  MPI_Bcast( &npcols, 1, MPI_INT, 0, MPI_COMM_WORLD );

  if ( nprows * npcols != nprocs ){
    printf( "mesh not of right size\n" );
    exit( 0 );
  }
  
  /* Figure out what my index is */
  mycol = me / nprows;
  myrow = me % nprows;

  /* create a communicator for the row of which I am part */
  MPI_Comm_split( MPI_COMM_WORLD, myrow, mycol, &comm_row );

  /* create a communicator for the column of which I am part */
  MPI_Comm_split( MPI_COMM_WORLD, mycol, myrow, &comm_col );

  /* create buffers into which to hold the global A, B, C (everyone will have a copy) */
  global_A = ( double * ) malloc ( sizeof( double ) * n * n );
  global_B = ( double * ) malloc ( sizeof( double ) * n * n );
  global_C = ( double * ) malloc ( sizeof( double ) * n * n );

  /* create random matrices on node zero and share with all nodes */
  if ( me == 0 ){
    random_matrix( n, n, global_A, n );
    random_matrix( n, n, global_B, n );
    random_matrix( n, n, global_C, n );
  }
  MPI_Bcast( global_A, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
  MPI_Bcast( global_B, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
  MPI_Bcast( global_C, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );

  /* compute local matrix sizes */
  local_m   = n / nprows + ( myrow < n % nprows ? 1 : 0 );
  local_n   = n / npcols + ( mycol < n % npcols ? 1 : 0 );

  /* create buffer into which to hold the ocal A, B, C */
  local_A = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
  local_B = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
  local_C = ( double * ) malloc ( sizeof( double ) * local_m * local_n );

  /* copy the local parts */
  CopyMatrixGlobalToLocal( n, n, 
			   global_A, n, 
			   local_A, local_m, 
			   comm_row, comm_col );
  CopyMatrixGlobalToLocal( n, n, 
			   global_B, n, 
			   local_B, local_m, 
			   comm_row, comm_col );
  CopyMatrixGlobalToLocal( n, n, 
			   global_C, n, 
			   local_C, local_m, 
			   comm_row, comm_col );

  /* Compute parallel matrix-matrix multiply */
  ParallelMMult( n, n, n, 
		 local_A, local_m, 
		 local_B, local_m, 
		 local_C, local_m, 
		 comm_row, comm_col );

  /* Compute sequential matrix-matrix multiply on all nodes */
  dgemm_( "N", "N", &n, &n, &n,
  	  &d_one, global_A, &n, global_B, &n,
  	  &d_one, global_C, &n );

  local_C_ref = ( double * ) malloc ( sizeof( double ) * local_m * local_n );

  CopyMatrixGlobalToLocal( n, n, 
			   global_C, n, 
			   local_C_ref, local_m, 
			   comm_row, comm_col );

  local_diff = compare_matrices( local_m, local_n, 
				 local_C, local_m, 
				 local_C_ref, local_m );

  MPI_Allreduce( &local_diff, &diff, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );

  if ( me == 0 )
    printf("\ndiff = %le\n", diff );

  free( global_A );
  free( global_B );
  free( global_C );
  free( local_A );
  free( local_B );
  free( local_C );
  free( local_C_ref);

  MPI_Comm_free( &comm_row );
  MPI_Comm_free( &comm_col );

  /* Cleanup up the MPI environment */
  MPI_Finalize();

  exit( 0 );
}
开发者ID:scottenriquez,项目名称:parallel-matrix-multiply,代码行数:101,代码来源:driver.c

示例2: triangulateAndFormProl

static PetscErrorCode triangulateAndFormProl(IS selected_2,PetscInt data_stride,PetscReal coords[],PetscInt nselected_1,const PetscInt clid_lid_1[],const PetscCoarsenData *agg_lists_1,
                                             const PetscInt crsGID[],PetscInt bs,Mat a_Prol,PetscReal *a_worst_best)
{
#if defined(PETSC_HAVE_TRIANGLE)
  PetscErrorCode       ierr;
  PetscInt             jj,tid,tt,idx,nselected_2;
  struct triangulateio in,mid;
  const PetscInt       *selected_idx_2;
  PetscMPIInt          rank;
  PetscInt             Istart,Iend,nFineLoc,myFine0;
  int                  kk,nPlotPts,sid;
  MPI_Comm             comm;
  PetscReal            tm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
  if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
    *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
  } else *a_worst_best = 0.0;
  ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
  if (tm > 0.0) {
    *a_worst_best = 100.0;
    PetscFunctionReturn(0);
  }
  ierr     = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
  nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
  nPlotPts = nFineLoc; /* locals */
  /* traingle */
  /* Define input points - in*/
  in.numberofpoints          = nselected_2;
  in.numberofpointattributes = 0;
  /* get nselected points */
  ierr = PetscMalloc1(2*nselected_2, &in.pointlist);CHKERRQ(ierr);
  ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);

  for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
    PetscInt lid = selected_idx_2[kk];
    in.pointlist[sid]   = coords[lid];
    in.pointlist[sid+1] = coords[data_stride + lid];
    if (lid>=nFineLoc) nPlotPts++;
  }
  if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);

  in.numberofsegments      = 0;
  in.numberofedges         = 0;
  in.numberofholes         = 0;
  in.numberofregions       = 0;
  in.trianglelist          = 0;
  in.segmentmarkerlist     = 0;
  in.pointattributelist    = 0;
  in.pointmarkerlist       = 0;
  in.triangleattributelist = 0;
  in.trianglearealist      = 0;
  in.segmentlist           = 0;
  in.holelist              = 0;
  in.regionlist            = 0;
  in.edgelist              = 0;
  in.edgemarkerlist        = 0;
  in.normlist              = 0;

  /* triangulate */
  mid.pointlist = 0;            /* Not needed if -N switch used. */
  /* Not needed if -N switch used or number of point attributes is zero: */
  mid.pointattributelist = 0;
  mid.pointmarkerlist    = 0; /* Not needed if -N or -B switch used. */
  mid.trianglelist       = 0;    /* Not needed if -E switch used. */
  /* Not needed if -E switch used or number of triangle attributes is zero: */
  mid.triangleattributelist = 0;
  mid.neighborlist          = 0; /* Needed only if -n switch used. */
  /* Needed only if segments are output (-p or -c) and -P not used: */
  mid.segmentlist = 0;
  /* Needed only if segments are output (-p or -c) and -P and -B not used: */
  mid.segmentmarkerlist = 0;
  mid.edgelist          = 0;    /* Needed only if -e switch used. */
  mid.edgemarkerlist    = 0; /* Needed if -e used and -B not used. */
  mid.numberoftriangles = 0;

  /* Triangulate the points.  Switches are chosen to read and write a  */
  /*   PSLG (p), preserve the convex hull (c), number everything from  */
  /*   zero (z), assign a regional attribute to each element (A), and  */
  /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
  /*   neighbor list (n).                                            */
  if (nselected_2 != 0) { /* inactive processor */
    char args[] = "npczQ"; /* c is needed ? */
    triangulate(args, &in, &mid, (struct triangulateio*) NULL);
    /* output .poly files for 'showme' */
    if (!PETSC_TRUE) {
      static int level = 1;
      FILE       *file; char fname[32];

      sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
      /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
      fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }
      /*One line: <# of segments> <# of boundary markers (0 or 1)> */
//.........这里部分代码省略.........
开发者ID:pombredanne,项目名称:petsc,代码行数:101,代码来源:geo.c

示例3: isFinished

		bool isFinished() {
			int ret;
			MPI_Allreduce(&active, &ret, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
			return ret == 0;
		}
开发者ID:qinglan233,项目名称:PDC-2016-project,代码行数:5,代码来源:Communicator.hpp

示例4: poisson_main


//.........这里部分代码省略.........

  double solve_time = fei::utils::cpu_time() - start_solve_time;

  if (localProc == 0) {
    FEI_COUT << "FEI cpu-times:" << FEI_ENDL
	 << "    init. phase: " << iTime << FEI_ENDL
	 << "    load  phase: " << lTime << FEI_ENDL
	 << "    solve  time: " << sTime << FEI_ENDL;
  }

  double norm = 0.0;
  FEI_COUT.setf(IOS_SCIENTIFIC, IOS_FLOATFIELD);
  CHK_ERR( fei->residualNorm(1, 1, &fieldIDs[0], &norm) );
  if (localProc == 0) {
    FEI_COUT << "returned residual norm: " << norm << FEI_ENDL;
  }

  int itersTaken = 0;
  CHK_ERR( fei->iterations(itersTaken) );

  //
  //We oughtta make sure the solution we just computed is correct...
  //

  int numNodes = 0;
  CHK_ERR( fei->getNumLocalNodes(numNodes) );

  double maxErr = 0.0;
  if (numNodes > 0) {
    int lenNodeIDs = numNodes;
    GlobalID* nodeIDs = new GlobalID[lenNodeIDs];
    double* soln = new double[lenNodeIDs];
    if (nodeIDs != NULL && soln != NULL) {
      CHK_ERR( fei->getLocalNodeIDList(numNodes, nodeIDs, lenNodeIDs) );

      int fieldID = 1;
      CHK_ERR( fei->getNodalFieldSolution(fieldID, numNodes, nodeIDs, soln));

      for(int i=0; i<numNodes; i++) {
	int nID = (int)nodeIDs[i];
	double x = (1.0* ((nID-1)%(L+1)))/L;
	double y = (1.0* ((nID-1)/(L+1)))/L;

	double exactSoln = x*x + y*y;
	double error = std::abs(exactSoln - soln[i]);
	if (maxErr < error) maxErr = error;
      }

      delete [] nodeIDs;
      delete [] soln;
    }
    else {
      fei::console_out() << "allocation of nodeIDs or soln failed." << FEI_ENDL; 
    }

  }

#ifndef FEI_SER
  double globalMaxErr = 0.0;
  MPI_Allreduce(&maxErr, &globalMaxErr, 1, MPI_DOUBLE, MPI_MAX, comm);
  maxErr = globalMaxErr;
#endif
  bool testPassed = true;
  if (maxErr > 1.e-6) testPassed = false;

  if (testPassed && localProc == 0) {
    FEI_COUT << "poisson: TEST PASSED, maxErr = " << maxErr << ", iterations: "
	 << itersTaken << FEI_ENDL;
    //This is something the SIERRA runtest tool looks for in test output...
    FEI_COUT << "SIERRA execution successful" << FEI_ENDL;
  }
  if (testPassed == false && localProc == 0) {
    FEI_COUT << "maxErr = " << maxErr << ", TEST FAILED" << FEI_ENDL;
    FEI_COUT << "(Test is deemed to have passed if the maximum difference"
	 << " between the exact and computed solutions is 1.e-6 or less.)"
	 << FEI_ENDL;
  }

  double elapsed_cpu_time = fei::utils::cpu_time() - start_time;

  //The following IOS_... macros are defined in base/fei_macros.h
  FEI_COUT.setf(IOS_FIXED, IOS_FLOATFIELD);
  if (localProc==0) {
    FEI_COUT << "Proc0 cpu times (seconds):" << FEI_ENDL
	 << "   FEI initialize:  " << fei_init_time << FEI_ENDL
         << "   FEI load:        " << fei_load_time << FEI_ENDL
         << "      solve:        " << solve_time << FEI_ENDL
         << "Total program time: " << elapsed_cpu_time << FEI_ENDL;
  }

  wrapper.reset();
  fei.reset();
  factory.reset();
  //If Prometheus is being used, we need to make sure that the
  //LibraryWrapper inside factory is deleted before MPI_Finalize() is called.
  //(That's why we call the 'reset' methods on these shared-pointers rather
  //than just letting them destroy themselves when they go out of scope.)

  return(0);
}
开发者ID:cakeisalie,项目名称:oomphlib_003,代码行数:101,代码来源:poisson_main.cpp

示例5: cell_mindt

double cell_mindt( struct Cell *** theCells, struct Sim * theSim, struct GravMass * theGravMasses ){
    int i_m,j_m,k_m;
    double dt_m = 1.e100;//HUGE_VAL;
    double a_m,r_m,dx_m;
    double mag_vel_m;
    int i,j,k;
    for( k=sim_Nghost_min(theSim,Z_DIR) ; k<sim_N(theSim,Z_DIR)-sim_Nghost_max(theSim,Z_DIR) ; ++k ){
        double zm = sim_FacePos(theSim,k-1,Z_DIR);
        double zp = sim_FacePos(theSim,k,Z_DIR);
        double dz = zp-zm;
        for( i=sim_Nghost_min(theSim,R_DIR) ; i<sim_N(theSim,R_DIR)-sim_Nghost_max(theSim,R_DIR) ; ++i ){
            double rm = sim_FacePos(theSim,i-1,R_DIR);
            double rp = sim_FacePos(theSim,i,R_DIR);
            double dr = rp-rm;
            double r = .5*(rp+rm);
            for( j=0 ; j<sim_N_p(theSim,i) ; ++j ){
                int jm = j-1;
                if( j==0 ) jm = sim_N_p(theSim,i)-1;
                double w = .5*(theCells[k][i][j].wiph+theCells[k][i][jm].wiph);
                double dx = dr;
                double rdphi = .5*(rp+rm)*theCells[k][i][j].dphi;
                if( rdphi<dr ) {
                    dx = rdphi;
                }
                if( dx>dz ) {
                    dx = dz;
                }
                double a  = maxvel( theCells[k][i][j].prim , w , r ,theSim);
                double rho = theCells[k][i][j].prim[RHO]; 
                double Pp  = theCells[k][i][j].prim[PPP]; 


                double dt = sim_CFL(theSim)*dx/a;
                if( sim_EXPLICIT_VISCOSITY(theSim)>0.0 ){
                    double nu;
                    if (sim_VISC_CONST(theSim)==1){
                        nu = sim_EXPLICIT_VISCOSITY(theSim);
                    } else{
                        double tiph = theCells[k][i][j].tiph - 0.5*theCells[k][i][j].dphi;
                        if (sim_InitialDataType(theSim)==SHEAR){
                            double HoR = 0.1;
                            //nu = sim_EXPLICIT_VISCOSITY(theSim)*HoR*HoR*pow(fabs((r*cos(tiph))),1.5);
                            nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(fabs(r*cos(tiph)),2.0);
                            if (r*cos(tiph)>20.) nu=0.000000001;
                        } else{
                            double M0 = gravMass_M(theGravMasses,0);
                            double M1 = gravMass_M(theGravMasses,1);
                            double dist_bh0 = gravMass_dist(theGravMasses,0,r,tiph,0.);
                            double dist_bh1 = gravMass_dist(theGravMasses,1,r,tiph,0.);
                            double alpha = sim_EXPLICIT_VISCOSITY(theSim);
                            double eps = sim_G_EPS(theSim);
                            //nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(r,1.5);
                            nu = alpha*Pp/rho/sqrt(pow(dist_bh0*dist_bh0+eps*eps,-1.5)*M0+pow(dist_bh1*dist_bh1+eps*eps,-1.5)*M1);
                        }
                    }
                    double dt_visc = .25*dx*dx/nu;
                    dt = dt/( 1. + dt/dt_visc );
                }
                if( dt_m > dt ) {
                    dt_m = dt;
                }
            } 
        }
    }
    double dt2;
    MPI_Allreduce( &dt_m , &dt2 , 1 , MPI_DOUBLE , MPI_MIN , sim_comm );
    return( dt2 );

}
开发者ID:brianfarris,项目名称:Disco2,代码行数:69,代码来源:cell_mindt.c

示例6: gmx_check_thread_affinity_set

/* Check the process affinity mask and if it is found to be non-zero,
 * will honor it and disable mdrun internal affinity setting.
 * Note that this will only work on Linux as we use a GNU feature.
 */
void
gmx_check_thread_affinity_set(FILE            *fplog,
                              const t_commrec *cr,
                              gmx_hw_opt_t    *hw_opt,
                              int  gmx_unused  nthreads_hw_avail,
                              gmx_bool         bAfterOpenmpInit)
{
    GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");

    if (!bAfterOpenmpInit)
    {
        /* Check for externally set OpenMP affinity and turn off internal
         * pinning if any is found. We need to do this check early to tell
         * thread-MPI whether it should do pinning when spawning threads.
         * TODO: the above no longer holds, we should move these checks later
         */
        if (hw_opt->thread_affinity != threadaffOFF)
        {
            char *message;
            if (!gmx_omp_check_thread_affinity(&message))
            {
                /* TODO: with -pin auto we should only warn when using all cores */
                md_print_warn(cr, fplog, "%s", message);
                sfree(message);
                hw_opt->thread_affinity = threadaffOFF;
            }
        }

        /* With thread-MPI this is needed as pinning might get turned off,
         * which needs to be known before starting thread-MPI.
         * With thread-MPI hw_opt is processed here on the master rank
         * and passed to the other ranks later, so we only do this on master.
         */
        if (!SIMMASTER(cr))
        {
            return;
        }
#ifndef GMX_THREAD_MPI
        return;
#endif
    }

#ifdef HAVE_SCHED_AFFINITY
    int       ret;
    cpu_set_t mask_current;

    if (hw_opt->thread_affinity == threadaffOFF)
    {
        /* internal affinity setting is off, don't bother checking process affinity */
        return;
    }

    CPU_ZERO(&mask_current);
    if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
    {
        /* failed to query affinity mask, will just return */
        if (debug)
        {
            fprintf(debug, "Failed to query affinity mask (error %d)", ret);
        }
        return;
    }

    /* Before proceeding with the actual check, make sure that the number of
     * detected CPUs is >= the CPUs in the current set.
     * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
#ifdef CPU_COUNT
    if (nthreads_hw_avail < CPU_COUNT(&mask_current))
    {
        if (debug)
        {
            fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
                    nthreads_hw_avail, CPU_COUNT(&mask_current));
        }
        return;
    }
#endif /* CPU_COUNT */

    gmx_bool bAllSet = TRUE;
    for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
    {
        bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
    }

#ifdef GMX_LIB_MPI
    gmx_bool  bAllSet_All;

    MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
    bAllSet = bAllSet_All;
#endif

    if (!bAllSet)
    {
        if (hw_opt->thread_affinity == threadaffAUTO)
        {
            if (!bAfterOpenmpInit)
//.........这里部分代码省略.........
开发者ID:pjohansson,项目名称:gromacs,代码行数:101,代码来源:thread_affinity.cpp

示例7: hunt_problem


//.........这里部分代码省略.........
#endif
	}

	/*
	 * PRINT OUT VALUES OF EXTRA UNKNOWNS 
	 * FROM AUGMENTING CONDITIONS 
	 */

	if (nAC > 0) 
          {
	    
	    DPRINTF(stderr, "\n------------------------------\n");
	    DPRINTF(stderr, "Augmenting Conditions:    %4d\n", nAC);
	    DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);

            for (iAC = 0; iAC < nAC; iAC++)
             {
              if (augc[iAC].Type == AC_USERBC)
               {
                DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
              else if (augc[iAC].Type == AC_USERMAT  ||
                       augc[iAC].Type == AC_FLUX_MAT )
               {
                DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n",
                        augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
               }
              else if(augc[iAC].Type == AC_VOLUME)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
	      else if(augc[iAC].Type == AC_POSITION)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
               else if(augc[iAC].Type == AC_FLUX)
               {
                DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
             }
	  }

      /* Check element quality */
      good_mesh = element_quality(exo, x, ams[0]->proc_config);
开发者ID:gomatestbot,项目名称:goma,代码行数:67,代码来源:ac_hunt.c

示例8: main

int main( int argc, char **argv )
{

    MPI_Comm comm;
    int      *sbuf, *rbuf;
    int      rank, size;
    int      *sendcounts, *recvcounts, *rdispls, *sdispls;
    int      i, j, *p, err, toterr;
    
    MPI_Init( &argc, &argv );
    err = 0;
    
    comm = MPI_COMM_WORLD;

    /* Create the buffer */
    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    sbuf = (int *)malloc( size * size * sizeof(int) );
    rbuf = (int *)malloc( size * size * sizeof(int) );
    if (!sbuf || !rbuf) {
	fprintf( stderr, "Could not allocated buffers!\n" );
	MPI_Abort( comm, 1 );
    }
    
    /* Load up the buffers */
    for (i=0; i<size*size; i++) {
	sbuf[i] = i + 100*rank;
	rbuf[i] = -i;
    }

    /* Create and load the arguments to alltoallv */
    sendcounts = (int *)malloc( size * sizeof(int) );
    recvcounts = (int *)malloc( size * sizeof(int) );
    rdispls    = (int *)malloc( size * sizeof(int) );
    sdispls    = (int *)malloc( size * sizeof(int) );
    if (!sendcounts || !recvcounts || !rdispls || !sdispls) {
	fprintf( stderr, "Could not allocate arg items!\n" );
	MPI_Abort( comm, 1 );
    }
    for (i=0; i<size; i++) {
	sendcounts[i] = i;
	recvcounts[i] = rank;
	rdispls[i]    = i * rank;
	sdispls[i]    = (i * (i+1))/2;
    }
    MPI_Alltoallv( sbuf, sendcounts, sdispls, MPI_INT,
		   rbuf, recvcounts, rdispls, MPI_INT, comm );

    /* Check rbuf */
    for (i=0; i<size; i++) {
	p = rbuf + rdispls[i];
	for (j=0; j<rank; j++) {
	    if (p[j] != i * 100 + (rank*(rank+1))/2 + j) {
		fprintf( stderr, "[%d] got %d expected %d for %dth\n",
			 rank, p[j],(i*(i+1))/2 + j, j );
		err++;
	    }
	}
    }

    free( sdispls );
    free( rdispls );
    free( recvcounts );
    free( sendcounts );
    free( rbuf );
    free( sbuf );

    MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    if (rank == 0) {
	if (toterr > 0) 
	    fprintf( stderr, "Test FAILED with %d errors\n", toterr );
	else
	    fprintf( stderr, "Test passed\n" );
    }
	
    MPI_Finalize();
    return 0;
}
开发者ID:MartinLidh,项目名称:tddc78,代码行数:79,代码来源:alltoallv.c

示例9: LU_decomp

void LU_decomp(struct problem *info, struct fmatrix *X, int *reorder, MPI_Datatype pivot_type, MPI_Op best_pivot_op)
{
	MPI_Request req_spiv, req_sa, req_sm;
	MPI_Status status;
	
	number_type *m = malloc(info->blksz * sizeof(*m));
	int diag;
	for (diag = 0; diag < info->n; diag++) {
		/* we do partial pivoting, so the proc with the pivot is on this column: */
		int pivot_h = diag / info->blksz;
		int r, c, i;
		
		double start_time = MPI_Wtime();
		double start_time2;

		struct pivot pivot = { -1, 0. };
		/* choose pivot across the column */
		if (info->coords[HDIM] == pivot_h) {
			/* column with pivot in block */
			int pivot_c = diag % info->blksz;
			/* Argo doesn't want aliasing in allreduce */
			struct pivot pivot_cand = { -1, 0. };
			for (i = 0; i < info->blksz; i++) {
				if (reorder[i] > diag && fabs(CELL(X, i, pivot_c)) > fabs(pivot_cand.value)) {
					pivot_cand.row = info->blksz*info->coords[VDIM] + i;
					pivot_cand.value = CELL(X, i, pivot_c);
				}
			}
			start_time2 = MPI_Wtime();
			MPI_Allreduce(&pivot_cand, &pivot, 1, pivot_type, best_pivot_op, info->vcomm);
			pivot_allr_time += MPI_Wtime() - start_time2;
		}
		/* broadcast pivot choice across row towards the right */
		start_time2 = MPI_Wtime();
		pipeline_right(info, pivot_h, &pivot, 1, pivot_type, 45, &req_spiv);
		pivot_bcast_time += MPI_Wtime() - start_time2;
		
		pivot_time += MPI_Wtime() - start_time;
		
		/* find rank of proc with pivot on the vertical communicator */
		int pivot_v = pivot.row / info->blksz;
		
		/* fill in reorder */
		if (info->coords[VDIM] == pivot_v) {
			reorder[pivot.row % info->blksz] = diag;
		}
		
		/* calculate and distribute the ms */
		for (r = 0; r < info->blksz; r++) {
			if (reorder[r] > diag) {
				if (info->coords[HDIM] == pivot_h) {
					int pivot_c = diag % info->blksz;
					m[r] = CELL(X, r, pivot_c) / pivot.value;
					CELL(X, r, pivot_c) = m[r];
				}
				/* broadcast m towards right */
				start_time = MPI_Wtime();
				pipeline_right(info, pivot_h, &m[r], 1, MPI_number_type, 64, &req_sm);
				m_bcast_time += MPI_Wtime() - start_time;
			}
		}
		
		/* distribute the pivot row and eliminate */
		int startc = 0;
		if (info->coords[HDIM] == pivot_h) startc = (diag+1) % info->blksz;
		if (info->coords[HDIM] < pivot_h) startc = info->blksz;
		/* elimination */
		for (c = startc; c < info->blksz; c++) {
			number_type a;
			if (info->coords[VDIM] == pivot_v) {
				a = CELL(X, pivot.row % info->blksz, c);
			}
			
			start_time = MPI_Wtime();
			
			int up = (info->coords[VDIM]+info->sqp-1)%info->sqp;
			int down = (info->coords[VDIM]+1)%info->sqp;
			if (info->coords[VDIM] != pivot_v) {
				MPI_Recv(&a, 1, MPI_number_type, up, 78, info->vcomm, &status);
			}
			if (down != pivot_v) {
				MPI_Isend(&a, 1, MPI_number_type, down, 78, info->vcomm, &req_sa);
			}
 			
			a_bcast_time += MPI_Wtime() - start_time;
			
			for (r = 0; r < info->blksz; r++) {
				if (reorder[r] > diag) {
					CELL(X, r,c) -= m[r]*a;
				}
			}
			if (down != pivot_v) MPI_Wait(&req_sa, &status);
		}
	}
}
开发者ID:cstroe,项目名称:PP-MM-A03,代码行数:95,代码来源:lu2d.c

示例10: DMPlexPreallocateOperator

PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix)
{
  MPI_Comm           comm;
  MatType            mtype;
  PetscSF            sf, sfDof, sfAdj;
  PetscSection       leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj;
  PetscInt           nroots, nleaves, l, p;
  const PetscInt    *leaves;
  const PetscSFNode *remotes;
  PetscInt           dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols;
  PetscInt          *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets;
  PetscInt           adjSize;
  PetscLayout        rLayout;
  PetscInt           locRows, rStart, rEnd, r;
  PetscMPIInt        size;
  PetscBool          doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock;
  PetscBool          useAnchors;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
  PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3);
  PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4);
  PetscValidHeaderSpecific(A, MAT_CLASSID, 9);
  if (dnz)  PetscValidPointer(dnz,5);
  if (onz)  PetscValidPointer(onz,6);
  if (dnzu) PetscValidPointer(dnzu,7);
  if (onzu) PetscValidPointer(onzu,8);
  ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
  ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
  ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
  ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
  doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE;
  ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr);
  /* Create dof SF based on point SF */
  if (debug) {
    ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sf, NULL);CHKERRQ(ierr);
  }
  ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr);
  ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr);
  }
  /* Create section for dof adjacency (dof ==> # adj dof) */
  ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr);
  ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr);
  /*   Fill in the ghost dofs on the interface */
  ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr);
  /* use constraints in finding adjacency in this routine */
  ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr);
  ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr);

  /*
   section        - maps points to (# dofs, local dofs)
   sectionGlobal  - maps points to (# dofs, global dofs)
   leafSectionAdj - maps unowned local dofs to # adj dofs
   rootSectionAdj - maps   owned local dofs to # adj dofs
   adj            - adj global dofs indexed by leafSectionAdj
   rootAdj        - adj global dofs indexed by rootSectionAdj
   sf    - describes shared points across procs
   sfDof - describes shared dofs across procs
   sfAdj - describes shared adjacent dofs across procs
   ** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point.
  (0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj
       (This is done in DMPlexComputeAnchorAdjacencies())
    1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj
       Reduce those counts to rootSectionAdj (now redundantly counting some interface points)
    2. Visit owned points on interface, count adjacencies placing in rootSectionAdj
       Create sfAdj connecting rootSectionAdj and leafSectionAdj
    3. Visit unowned points on interface, write adjacencies to adj
       Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies)
    4. Visit owned points on interface, write adjacencies to rootAdj
       Remove redundancy in rootAdj
   ** The last two traversals use transitive closure
    5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj)
       Allocate memory addressed by sectionAdj (cols)
    6. Visit all owned points in the subdomain, insert dof adjacencies into cols
   ** Knowing all the column adjacencies, check ownership and sum into dnz and onz
  */

  ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr);

  for (l = 0; l < nleaves; ++l) {
    PetscInt dof, off, d, q, anDof;
    PetscInt p = leaves[l], numAdj = PETSC_DETERMINE;

    if ((p < pStart) || (p >= pEnd)) continue;
//.........这里部分代码省略.........
开发者ID:PeiLiu90,项目名称:petsc,代码行数:101,代码来源:plexpreallocate.c

示例11: MPI_Comm_rank

struct fft_plan_3d *fft_3d_create_plan(
       MPI_Comm comm, int nfast, int nmid, int nslow,
       int in_ilo, int in_ihi, int in_jlo, int in_jhi,
       int in_klo, int in_khi,
       int out_ilo, int out_ihi, int out_jlo, int out_jhi,
       int out_klo, int out_khi,
       int scaled, int permute, int *nbuf)

{
  struct fft_plan_3d *plan;
  int me,nprocs;
  int /*i,num,*/flag,remapflag;//,fftflag;
  int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
  int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
  int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
  int out_size,first_size,second_size,third_size,copy_size,scratch_size;
  int np1,np2,ip1,ip2;
  //  int list[50];

/* system specific variables */

#ifdef FFT_INTEL
  FFT_DATA dummy;
#endif
#ifdef FFT_T3E
  FFT_DATA dummy[5];
  int isign,isys;
  double scalef;
#endif

/* query MPI info */

  MPI_Comm_rank(comm,&me);
  MPI_Comm_size(comm,&nprocs);

/* compute division of procs in 2 dimensions not on-processor */

  bifactor(nprocs,&np1,&np2);
  ip1 = me % np1;
  ip2 = me/np1;

/* allocate memory for plan data struct */

  plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
  if (plan == NULL) return NULL;

/* remap from initial distribution to layout needed for 1st set of 1d FFTs
   not needed if all procs own entire fast axis initially
   first indices = distribution after 1st set of FFTs */

  if (in_ilo == 0 && in_ihi == nfast-1)
    flag = 0;
  else
    flag = 1;

  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);

  if (remapflag == 0) {
    first_ilo = in_ilo;
    first_ihi = in_ihi;
    first_jlo = in_jlo;
    first_jhi = in_jhi;
    first_klo = in_klo;
    first_khi = in_khi;
    plan->pre_plan = NULL;
  }
  else {
    first_ilo = 0;
    first_ihi = nfast - 1;
    first_jlo = ip1*nmid/np1;
    first_jhi = (ip1+1)*nmid/np1 - 1;
    first_klo = ip2*nslow/np2;
    first_khi = (ip2+1)*nslow/np2 - 1;
    plan->pre_plan =
      remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
			   FFT_PRECISION,0,0,2);
    if (plan->pre_plan == NULL) return NULL;
  }

/* 1d FFTs along fast axis */

  plan->length1 = nfast;
  plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1);

/* remap from 1st to 2nd FFT
   choose which axis is split over np1 vs np2 to minimize communication
   second indices = distribution after 2nd set of FFTs */

  second_ilo = ip1*nfast/np1;
  second_ihi = (ip1+1)*nfast/np1 - 1;
  second_jlo = 0;
  second_jhi = nmid - 1;
  second_klo = ip2*nslow/np2;
  second_khi = (ip2+1)*nslow/np2 - 1;
  plan->mid1_plan =
      remap_3d_create_plan(comm,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
//.........这里部分代码省略.........
开发者ID:darien0,项目名称:Mara,代码行数:101,代码来源:fft_3d.c

示例12: run_bfs


//.........这里部分代码省略.........
  int64_t global_queue_summary_size = g_global_queue_summary_size;
  int64_t global_queue_size = g_global_queue_size;

#define SWIZZLE_VERTEX(c) (((int64_t)(VERTEX_OWNER(c)) << lg_local_queue_size) | (int64_t)(VERTEX_LOCAL(c)))
#if 0
  int64_t* restrict column_swizzled = (int64_t*)xmalloc(nlocaledges * sizeof(int64_t));
  {
    size_t i;
    for (i = 0; i < nlocaledges; ++i) {
      int64_t c = column[i];
      column_swizzled[i] = SWIZZLE_VERTEX(c);
    }
  }
#endif

  unsigned long* restrict in_queue = g_in_queue;
  memset(in_queue, 0, global_queue_size * sizeof(unsigned long));
  unsigned long* restrict in_queue_summary = g_in_queue_summary;
  memset(in_queue_summary, 0, global_queue_summary_size * sizeof(unsigned long));
  unsigned long* restrict out_queue = g_out_queue;
  unsigned long* restrict out_queue_summary = g_out_queue_summary;
  unsigned long* restrict visited = g_visited;
  memset(visited, 0, local_queue_size * sizeof(unsigned long));

#define SET_IN(v) do {int64_t vs = SWIZZLE_VERTEX(v); size_t word_idx = vs / ulong_bits; int bit_idx = vs % ulong_bits; unsigned long mask = (1UL << bit_idx); in_queue_summary[word_idx / ulong_bits] |= (1UL << (word_idx % ulong_bits)); in_queue[word_idx] |= mask;} while (0)
#define TEST_IN(vs) (((in_queue_summary[vs / ulong_bits / ulong_bits] & (1UL << ((vs / ulong_bits) % ulong_bits))) != 0) && ((in_queue[vs / ulong_bits] & (1UL << (vs % ulong_bits))) != 0))
#define TEST_VISITED_LOCAL(v) ((visited[(v) / ulong_bits] & (1UL << ((v) % ulong_bits))) != 0)
// #define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); __sync_fetch_and_or(&visited[word_idx], mask); __sync_fetch_and_or(&out_queue[word_idx], mask);} while (0)
#define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); visited[word_idx] |= mask; out_queue[word_idx] |= mask;} while (0)

  SET_IN(root);
  {ptrdiff_t i; _Pragma("omp parallel for schedule(static)") for (i = 0; i < nlocalverts; ++i) pred[i] = -1;}
  if (VERTEX_OWNER(root) == rank) {
    pred[VERTEX_LOCAL(root)] = root;
    SET_VISITED_LOCAL(VERTEX_LOCAL(root));
  }
  uint16_t cur_level = 0;
  while (1) {
    ++cur_level;
#if 0
    if (rank == 0) fprintf(stderr, "BFS level %" PRIu16 "\n", cur_level);
#endif
    memset(out_queue, 0, (nlocalverts + ulong_bits - 1) / ulong_bits * sizeof(unsigned long));
    // memset(out_queue_summary, 0, (nlocalverts + ulong_bits_squared - 1) / ulong_bits_squared * sizeof(unsigned long));
    ptrdiff_t i, ii;
#if 0
#pragma omp parallel for schedule(static)
    for (i = 0; i < global_queue_summary_size; ++i) {
      unsigned long val = 0UL;
      int j;
      unsigned long mask = 1UL;
      for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
        if (in_queue[i * ulong_bits + j]) val |= mask;
      }
      in_queue_summary[i] = val;
    }
#endif
    unsigned long not_done = 0;
#pragma omp parallel for schedule(static) reduction(|:not_done)
    for (ii = 0; ii < nlocalverts; ii += ulong_bits) {
      size_t i, i_end = ii + ulong_bits;
      if (i_end > nlocalverts) i_end = nlocalverts;
      for (i = ii; i < i_end; ++i) {
        if (!TEST_VISITED_LOCAL(i)) {
          size_t j, j_end = rowstarts[i + 1];
          for (j = rowstarts[i]; j < j_end; ++j) {
            int64_t v1 = column[j];
            int64_t v1_swizzled = SWIZZLE_VERTEX(v1);
            if (TEST_IN(v1_swizzled)) {
              pred[i] = (v1 & INT64_C(0xFFFFFFFFFFFF)) | ((int64_t)cur_level << 48);
              not_done |= 1;
              SET_VISITED_LOCAL(i);
              break;
            }
          }
        }
      }
    }
#if 1
#pragma omp parallel for schedule(static)
    for (i = 0; i < local_queue_summary_size; ++i) {
      unsigned long val = 0UL;
      int j;
      unsigned long mask = 1UL;
      for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
        unsigned long full_val = out_queue[i * ulong_bits + j];
        visited[i * ulong_bits + j] |= full_val;
        if (full_val) val |= mask;
      }
      out_queue_summary[i] = val;
      // not_done |= val;
    }
#endif
    MPI_Allreduce(MPI_IN_PLACE, &not_done, 1, MPI_UNSIGNED_LONG, MPI_BOR, MPI_COMM_WORLD);
    if (not_done == 0) break;
    MPI_Allgather(out_queue, local_queue_size, MPI_UNSIGNED_LONG, in_queue, local_queue_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
    MPI_Allgather(out_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, in_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
  }
  deallocate_memory();
}
开发者ID:ShiZhan,项目名称:graph500-win32,代码行数:101,代码来源:bfs_replicated.c

示例13: star_density


//.........这里部分代码省略.........
	}

      /* do local particles and prepare export list */
      for(nexport = 0; i >= 0; i = NextActiveParticle[i])
	{
	  if(P[i].Type == 4)
	    {
	      if(star_density_evaluate(i, 0, &nexport, Send_count) < 0)
		break;
	    }
	}

#ifdef MYSORT
      mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#else
      qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#endif

      MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD);

      for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
	{
	  Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask];
	  nimport += Recv_count[j];

	  if(j > 0)
	    {
	      Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
	      Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
	    }
	}

      StarDataGet = (struct stardata_in *) mymalloc(nimport * sizeof(struct stardata_in));
      StarDataIn = (struct stardata_in *) mymalloc(nexport * sizeof(struct stardata_in));

      /* prepare particle data for export */
      for(j = 0; j < nexport; j++)
	{
	  place = DataIndexTable[j].Index;

	  StarDataIn[j].Pos[0] = P[place].Pos[0];
	  StarDataIn[j].Pos[1] = P[place].Pos[1];
	  StarDataIn[j].Pos[2] = P[place].Pos[2];
	  StarDataIn[j].Hsml = PPP[place].Hsml;
	  StarDataIn[j].Density = P[place].DensAroundStar;
	  StarDataIn[j].Mass = P[place].Mass;

	  memcpy(StarDataIn[j].NodeList,
		 DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int));

	}

      /* exchange particle data */
      for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
	{
	  sendTask = ThisTask;
	  recvTask = ThisTask ^ ngrp;

	  if(recvTask < NTask)
	    {
	      if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
		{
		  /* get the particles */
		  MPI_Sendrecv(&StarDataIn[Send_offset[recvTask]],
			       Send_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
			       recvTask, TAG_DENS_A,
			       &StarDataGet[Recv_offset[recvTask]],
			       Recv_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
			       recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		}
	    }
	}

      myfree(StarDataIn);


      /* now do the particles that were sent to us */

      for(j = 0; j < nimport; j++)
	star_density_evaluate(j, 1, &dummy, &dummy);

      /* check whether this is the last iteration */
      if(i < 0)
	ndone_flag = 1;
      else
	ndone_flag = 0;

      MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

      myfree(StarDataGet);
    }
  while(ndone < NTask);

  myfree(DataNodeList);
  myfree(DataIndexTable);
  myfree(Ngblist);

#endif //for EDDINGTON_TENSOR_STARS

}
开发者ID:surftour,项目名称:astrotools,代码行数:101,代码来源:get_Je.c

示例14: heavyEdgeMatchAgg


//.........这里部分代码省略.........
            n /= 2;
            ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
            /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
            /* save requests */
            sbuffs1[nSend1] = sbuff;
            request = (MPI_Request*)sbuff;
            sbuff = pt = (PetscInt*)(request+1);
            *pt++ = n; *pt++ = rank;

            ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt lid0, cpid, gid;
              ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr);
              gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              *pt++ = gid; *pt++ = lid0;
            }
            /* send request tag1 [n, proc, n*[gid1,lid0] ] */
            ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr);
            /* post recieve */
            request = (MPI_Request*)pt;
            rreqs2[nSend1] = request; /* cache recv request */
            pt = (PetscInt*)(request+1);
            ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
            /* clear list */
            ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
            nSend1++;
          }
        }
        /* recieve requests, send response, clear lists */
        kk = nactive_edges;
        ierr = MPI_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,wcomm);CHKERRQ(ierr); /* not correct syncronization and global */
        nSend2 = 0;
        while(1){
#define BF_SZ 10000
          PetscMPIInt flag,count;
          PetscInt    rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
          MPI_Request *request;
          ierr = MPI_Iprobe(MPI_ANY_SOURCE, tag1, wcomm, &flag, &status);CHKERRQ(ierr);
          if (!flag) break;
          ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
          if (count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for recieve: %d",count);
          proc = status.MPI_SOURCE;
          /* recieve request tag1 [n, proc, n*[gid1,lid0] ] */
          ierr = MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, wcomm, &status);CHKERRQ(ierr);
          /* count sends */
          pt = rbuff; count3 = count2 = 0;
          n = *pt++; kk = *pt++;           assert(kk==proc);
          while(n--){
            PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;  assert(lid1>=0 && lid1<nloc);
            if (lid_matched[lid1]){
              PetscPrintf(PETSC_COMM_SELF,"\t *** [%d]%s %d) ERROR recieved deleted gid %d, deleted by (lid) %d from proc %d\n",rank,__FUNCT__,sub_it,gid1,kk);
              PetscSleep(1);
            }
            assert(!lid_matched[lid1]);
            lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
            ierr = PetscCDSizeAt(agg_llists, lid1, &kk);CHKERRQ(ierr);
            count2 += kk + 2;
            count3++; /* number of verts requested (n) */
          }
          assert(pt-rbuff==count);
          if (count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %d",count2);
          /* send tag2 *[lid0, n, n*[gid] ] */
          ierr = PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
开发者ID:erdc-cm,项目名称:petsc-dev,代码行数:67,代码来源:hem.c

示例15: read_exoII_file


//.........这里部分代码省略.........

  /* allocate temporary storage for items needing global reduction.   */
  /* nemesis does not store most element block info about blocks for  */
  /* which the processor owns no elements.                            */
  /* we, however, use this information in migration, so we need to    */
  /* accumulate it for all element blocks.    kdd 2/2001              */

  if (mesh->num_el_blks > 0) {
    nnodes = (int *) malloc(2 * mesh->num_el_blks * sizeof(int));
    if (!nnodes) {
      Gen_Error(0, "fatal: insufficient memory");
      return 0;
    }
    etypes = nnodes + mesh->num_el_blks;
  }

  /* get the element block information */
  for (i = 0; i < mesh->num_el_blks; i++) {

    /* allocate space for name */
    mesh->eb_names[i] = (char *) malloc((MAX_STR_LENGTH+1) * sizeof(char));
    if (!mesh->eb_names[i]) {
      Gen_Error(0, "fatal: insufficient memory");
      return 0;
    }

    if (ex_get_elem_block(pexoid, mesh->eb_ids[i], mesh->eb_names[i],
                          &(mesh->eb_cnts[i]), &(nnodes[i]),
                          &(mesh->eb_nattrs[i])) < 0) {
      Gen_Error(0, "fatal: Error returned from ex_get_elem_block");
      return 0;
    }

    if (mesh->eb_cnts[i] > 0) {
      if ((etypes[i] =  (int) get_elem_type(mesh->eb_names[i],
                                            nnodes[i],
                                            mesh->num_dims)) == E_TYPE_ERROR) {
        Gen_Error(0, "fatal: could not get element type");
        return 0;
      }
    }
    else etypes[i] = (int) NULL_EL;
  }

  /* Perform reduction on necessary fields of element blocks.  kdd 2/2001 */
  MPI_Allreduce(nnodes, mesh->eb_nnodes, mesh->num_el_blks, MPI_INT, MPI_MAX, 
                MPI_COMM_WORLD);
  MPI_Allreduce(etypes, mesh->eb_etypes, mesh->num_el_blks, MPI_INT, MPI_MIN, 
                MPI_COMM_WORLD);
  for (i = 0; i < mesh->num_el_blks; i++) {
    strcpy(mesh->eb_names[i], get_elem_name(mesh->eb_etypes[i]));
  }
  free(nnodes);

  /*
   * allocate memory for the elements
   * allocate a little extra for element migration latter
   */
  mesh->elem_array_len = mesh->num_elems + 5;
  mesh->elements = (ELEM_INFO_PTR) malloc (mesh->elem_array_len 
                                         * sizeof(ELEM_INFO));
  if (!(mesh->elements)) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }

  /*
   * intialize all of the element structs as unused by
   * setting the globalID to -1
   */
  for (i = 0; i < mesh->elem_array_len; i++) 
    initialize_element(&(mesh->elements[i]));

  /* read the information for the individual elements */
  if (!read_elem_info(pexoid, Proc, prob, mesh)) {
    Gen_Error(0, "fatal: Error returned from read_elem_info");
    return 0;
  }

  /* read the communication information */
  if (!read_comm_map_info(pexoid, Proc, prob, mesh)) {
    Gen_Error(0, "fatal: Error returned from read_comm_map_info");
    return 0;
  }

  /* Close the parallel file */
  if(ex_close (pexoid) < 0) {
    Gen_Error(0, "fatal: Error returned from ex_close");
    return 0;
  }

  /* print out the distributed mesh */
  if (Debug_Driver > 3)
    print_distributed_mesh(Proc, Num_Proc, mesh);

  DEBUG_TRACE_END(Proc, yo);
  return 1;

#endif /* ZOLTAN_NEMESIS */
}
开发者ID:xunzhang,项目名称:ESMF_Regridding,代码行数:101,代码来源:dr_exoII_io.c


注:本文中的MPI_Allreduce函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。