当前位置: 首页>>代码示例>>C++>>正文


C++ MPI_Wtime函数代码示例

本文整理汇总了C++中MPI_Wtime函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Wtime函数的具体用法?C++ MPI_Wtime怎么用?C++ MPI_Wtime使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了MPI_Wtime函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors, total_errors;
    double **buf_bvec, **src_bvec, *src_buf;
    int count[2], src_stride, trg_stride, stride_level;
    double scaling, time;

    MPI_Init(&argc, &argv);
    ARMCI_Init();

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    buf_bvec = (double **) malloc(sizeof(double *) * nranks);
    src_bvec = (double **) malloc(sizeof(double *) * nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    ARMCI_Malloc((void **) buf_bvec, bufsize);
    ARMCI_Malloc((void **) src_bvec, bufsize);
    src_buf = src_bvec[rank];

    if (rank == 0)
        printf("ARMCI Strided DLA Accumulate Test:\n");

    ARMCI_Access_begin(buf_bvec[rank]);
    ARMCI_Access_begin(src_buf);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(buf_bvec[rank] + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    ARMCI_Access_end(src_buf);
    ARMCI_Access_end(buf_bvec[rank]);

    scaling = 2.0;

    src_stride = XDIM * sizeof(double);
    trg_stride = XDIM * sizeof(double);
    stride_level = 1;

    count[1] = YDIM;
    count[0] = XDIM * sizeof(double);

    ARMCI_Barrier();
    time = MPI_Wtime();

    peer = (rank+1) % nranks;

    for (i = 0; i < ITERATIONS; i++) {

      ARMCI_AccS(ARMCI_ACC_DBL,
          (void *) &scaling,
          src_buf,
          &src_stride,
          (void *) buf_bvec[peer],
          &trg_stride,
          count,
          stride_level,
          peer);
    }

    ARMCI_Barrier();
    time = MPI_Wtime() - time;

    if (rank == 0) printf("Time: %f sec\n", time);

    ARMCI_Access_begin(buf_bvec[rank]);
    for (i = errors = 0; i < XDIM; i++) {
      for (j = 0; j < YDIM; j++) {
        const double actual   = *(buf_bvec[rank] + i + j*XDIM);
        const double expected = (1.0 + rank) + scaling * (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS);
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    ARMCI_Access_end(buf_bvec[rank]);

    MPI_Allreduce(&errors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    ARMCI_Free((void *) buf_bvec[rank]);
    ARMCI_Free((void *) src_bvec[rank]);

    free(buf_bvec);
    free(src_bvec);

    ARMCI_Finalize();
    MPI_Finalize();

    if (total_errors == 0) {
      if (rank == 0) printf("Success.\n");
      return 0;
    } else {
      if (rank == 0) printf("Fail.\n");
      return 1;
    }
}
开发者ID:OngOngoing,项目名称:219351_homework,代码行数:100,代码来源:test_accs_dla.c

示例2: master

unsigned int master(unsigned int base_dim, unsigned int max_fact, 
		    unsigned int** exponents, mpz_t * As,
		    int comm_size, unsigned int print_fact) {

  unsigned int fact_count = 0;

  MPI_Status status;

  int count;
  int source;
  
  /* Buffer per ricevere gli esponenti */
  unsigned int* buffer_exp;
  /* Buffer per ricevere (A + s) */
  unsigned char buffer_As[BUFFER_DIM];
  init_vector(& buffer_exp, base_dim);

  double t1 = MPI_Wtime();
  double t2;

  int fact_per_rank[comm_size];
  for(int i = 0; i < comm_size; ++i)
    fact_per_rank[i] = 0;

  while(fact_count < max_fact + base_dim) {
    /* Ricevo il vettore di esponenti */
    MPI_Recv(buffer_exp, base_dim, MPI_UNSIGNED,
	     MPI_ANY_SOURCE, ROW_TAG, 
	     MPI_COMM_WORLD, &status);
    source = status.MPI_SOURCE;
    
    for(unsigned int i = 0; i < base_dim; ++i) 
      set_matrix(exponents, fact_count, i, buffer_exp[i]);
    
    /* Ricevo l'mpz contenente (A + s) */
    MPI_Recv(buffer_As, BUFFER_DIM, MPI_UNSIGNED_CHAR, source, 
	     AS_TAG, MPI_COMM_WORLD, &status);
    MPI_Get_count(&status, MPI_UNSIGNED_CHAR, &count);
    mpz_import(As[fact_count], count, 1, 1, 1, 0, buffer_As);
    
    ++fact_count;
    ++fact_per_rank[source];

    if(fact_count % print_fact == 0) {
      t2 = MPI_Wtime() - t1;
      
      printf("#%d/%d in %.6f seconds\n", fact_count, max_fact + base_dim, t2);
    }
  }
  
  /* Spedisco '1' agli slave per indicare la terminazione */
  char stop_signal = '1';
  for(unsigned int i = 1; i < comm_size; ++i)
    MPI_Send(&stop_signal, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD);
  
  printf("#Sending stop_signal\n");

  printf("#Fattorizzazioni per ranks:\n#");
  for(int i = 1; i < comm_size; ++i)
    printf("%d \t", i);
  printf("\n#");
  for(int i = 1; i < comm_size; ++i)
    printf("%d \t", fact_per_rank[i]);
  printf("\n");
 
  return fact_count;
}
开发者ID:UnProgrammatore,项目名称:CCQ,代码行数:67,代码来源:quadratic_sieve.c

示例3: estimate_reciprocal

/* Estimate the reciprocal space part error of the SPME Ewald sum. */
static real estimate_reciprocal(
        t_inputinfo       *info,
        rvec               x[], /* array of particles */
        real               q[], /* array of charges */
        int                nr,  /* number of charges = size of the charge array */
        FILE  gmx_unused  *fp_out,
        gmx_bool           bVerbose,
        unsigned int       seed,     /* The seed for the random number generator */
        int               *nsamples, /* Return the number of samples used if Monte Carlo
                                      * algorithm is used for self energy error estimate */
        t_commrec         *cr)
{
    real     e_rec   = 0; /* reciprocal error estimate */
    real     e_rec1  = 0; /* Error estimate term 1*/
    real     e_rec2  = 0; /* Error estimate term 2*/
    real     e_rec3  = 0; /* Error estimate term 3 */
    real     e_rec3x = 0; /* part of Error estimate term 3 in x */
    real     e_rec3y = 0; /* part of Error estimate term 3 in y */
    real     e_rec3z = 0; /* part of Error estimate term 3 in z */
    int      i, ci;
    int      nx, ny, nz;  /* grid coordinates */
    real     q2_all = 0;  /* sum of squared charges */
    rvec     gridpx;      /* reciprocal grid point in x direction*/
    rvec     gridpxy;     /* reciprocal grid point in x and y direction*/
    rvec     gridp;       /* complete reciprocal grid point in 3 directions*/
    rvec     tmpvec;      /* template to create points from basis vectors */
    rvec     tmpvec2;     /* template to create points from basis vectors */
    real     coeff  = 0;  /* variable to compute coefficients of the error estimate */
    real     coeff2 = 0;  /* variable to compute coefficients of the error estimate */
    real     tmp    = 0;  /* variables to compute different factors from vectors */
    real     tmp1   = 0;
    real     tmp2   = 0;
    gmx_bool bFraction;

    /* Random number generator */
    gmx_rng_t rng     = NULL;
    int      *numbers = NULL;

    /* Index variables for parallel work distribution */
    int startglobal, stopglobal;
    int startlocal, stoplocal;
    int x_per_core;
    int xtot;

#ifdef TAKETIME
    double t0 = 0.0;
    double t1 = 0.0;
#endif

    rng = gmx_rng_init(seed);

    clear_rvec(gridpx);
    clear_rvec(gridpxy);
    clear_rvec(gridp);
    clear_rvec(tmpvec);
    clear_rvec(tmpvec2);

    for (i = 0; i < nr; i++)
    {
        q2_all += q[i]*q[i];
    }

    /* Calculate indices for work distribution */
    startglobal = -info->nkx[0]/2;
    stopglobal  = info->nkx[0]/2;
    xtot        = stopglobal*2+1;
    if (PAR(cr))
    {
        x_per_core = static_cast<int>(ceil(static_cast<real>(xtot) / cr->nnodes));
        startlocal = startglobal + x_per_core*cr->nodeid;
        stoplocal  = startlocal + x_per_core -1;
        if (stoplocal > stopglobal)
        {
            stoplocal = stopglobal;
        }
    }
    else
    {
        startlocal = startglobal;
        stoplocal  = stopglobal;
        x_per_core = xtot;
    }
/*
   #ifdef GMX_LIB_MPI
    MPI_Barrier(MPI_COMM_WORLD);
   #endif
 */

#ifdef GMX_LIB_MPI
#ifdef TAKETIME
    if (MASTER(cr))
    {
        t0 = MPI_Wtime();
    }
#endif
#endif

    if (MASTER(cr))
    {
//.........这里部分代码省略.........
开发者ID:Marcello-Sega,项目名称:gromacs,代码行数:101,代码来源:gmx_pme_error.cpp

示例4: main

int main( int argc, char *argv[] )
{
    int errs = 0;
    int *ranks;
    int *ranksout;
    MPI_Group gworld, grev, gself;
    MPI_Comm  comm;
    MPI_Comm  commrev;
    int rank, size, i;
    double start, end, time1, time2;

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

    ranks    = malloc(size*sizeof(int));
    ranksout = malloc(size*sizeof(int));
    if (!ranks || !ranksout) {
        fprintf(stderr, "out of memory\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* generate a comm with the rank order reversed */
    MPI_Comm_split(comm, 0, (size-rank-1), &commrev);
    MPI_Comm_group(commrev, &grev);
    MPI_Comm_group(MPI_COMM_SELF, &gself);
    MPI_Comm_group(comm, &gworld);

    /* sanity check correctness first */
    for (i=0; i < size; i++) {
        ranks[i] = i;
        ranksout[i] = -1;
    }
    MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
    for (i=0; i < size; i++) {
        if (ranksout[i] != (size-i-1)) {
            if (rank == 0)
                printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i, (size-rank-1), ranksout[i]);
            ++errs;
        }
    }
    MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
    for (i=0; i < size; i++) {
        int expected = (i == (size-rank-1) ? 0 : MPI_UNDEFINED);
        if (ranksout[i] != expected) {
            if (rank == 0)
                printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected, ranksout[i]);
            ++errs;
        }
    }

    /* now compare relative performance */

    /* we needs lots of procs to get a group large enough to have meaningful
     * numbers.  On most testing machines this means that we're oversubscribing
     * cores in a big way, which might perturb the timing results.  So we make
     * sure everyone started up and then everyone but rank 0 goes to sleep to
     * let rank 0 do all the timings. */
    MPI_Barrier(comm);

    if (rank != 0) {
        sleep(10);
    }
    else /* rank==0 */ {
        sleep(1); /* try to avoid timing while everyone else is making syscalls */

        MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter*/
        start = MPI_Wtime();
        for (i = 0; i < NUM_LOOPS; ++i) {
            MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
        }
        end = MPI_Wtime();
        time1 = end - start;

        MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter*/
        start = MPI_Wtime();
        for (i = 0; i < NUM_LOOPS; ++i) {
            MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
        }
        end = MPI_Wtime();
        time2 = end - start;

        /* complain if the "gworld" time exceeds 2x the "gself" time */
        if (fabs(time1 - time2) > (2.00 * time2)) {
            printf("too much difference in MPI_Group_translate_ranks performance:\n");
            printf("time1=%f time2=%f\n", time1, time2);
            printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1-time2)/time2));
            if (time1 < time2) {
                printf("also, (time1<time2) is surprising...\n");
            }
            ++errs;
        }
    }

    free(ranks);
    free(ranksout);

//.........这里部分代码省略.........
开发者ID:OngOngoing,项目名称:219351_homework,代码行数:101,代码来源:gtranksperf.c

示例5: index_jd

void index_jd(int * nr_of_eigenvalues_ov, 
	      const int max_iterations, const double precision_ov, char *conf_filename, 
	      const int nstore, const int method){
  
  complex *eval;
  spinor  *eigenvectors_ov, *eigenvectors_ov_;
  spinor  *lowvectors, *lowvectors_;
  int i=0 , k=0, returncode=0, index = 0, determined = 0, signed_index = 0;
  char filename[120];
  FILE * ifs = NULL;
  matrix_mult Operator[2];
  double absdifference;
  const int N2 = VOLUMEPLUSRAND;

#ifdef MPI
  double atime, etime;
#endif
  double lowestmodes[20];
  int intsign, max_iter, first_blocksize = 1;
  int * idx = NULL;

  /**********************
   * For Jacobi-Davidson 
   **********************/
  int verbosity = 3, converged = 0, blocksize = 1, blockwise = 0;
  int solver_it_max = 50, j_max, j_min, v0dim = 0;
  double * eigenvalues_ov = NULL;
  double decay_min = 1.7, threshold_min = 1.e-3, prec;

  WRITER *writer=NULL;
  spinor *s;
  double sqnorm;
  paramsPropagatorFormat *propagatorFormat = NULL;
  
  double ap_eps_sq;
  int switch_on_adaptive_precision = 0;
  double ov_s = 0;

  /**********************                                                 
   * General variables                                                    
   **********************/

  eval= calloc((*nr_of_eigenvalues_ov),sizeof(complex));
  shift = 0.0;

  //  ov_s = 0.5*(1./g_kappa - 8.) - 1.;
  ap_eps_sq = precision_ov*precision_ov; 

#if (defined SSE || defined SSE2 )
  eigenvectors_ov_= calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov)+1, sizeof(spinor)); 
  eigenvectors_ov = (spinor *)(((unsigned long int)(eigenvectors_ov_)+ALIGN_BASE)&~ALIGN_BASE);
  lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND+1, sizeof(spinor));
  lowvectors = (spinor *)(((unsigned long int)(lowvectors_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  //  eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
  eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
  lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND, sizeof(spinor));
  eigenvectors_ov = eigenvectors_ov_;
  lowvectors = lowvectors_;
#endif

  //  idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
  idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
  Operator[0]=&Dov_proj_plus;
  Operator[1]=&Dov_proj_minus;
  
  if(g_proc_id == g_stdio_proc){
    printf("Computing first the two lowest modes in the positive and negative chirality sector, respectively\n");
    if(switch_on_adaptive_precision == 1) {
      printf("We have switched on adaptive precision with ap_eps_sq = %e!\n", ap_eps_sq);
    }
    printf("We have set the mass to zero within this computation!\n");
    fflush(stdout);
  }

  prec = precision_ov; 
  j_min = 8; j_max = 16;
  max_iter = 70;

#ifdef MPI
  atime = MPI_Wtime();
#endif

  v0dim = first_blocksize;
  blocksize = v0dim;
  for(intsign = 0; intsign < 2; intsign++){
    converged = 0;
    if(g_proc_id == g_stdio_proc){
      printf("%s chirality sector: \n", intsign ? "negative" : "positive");
      fflush(stdout);
    }
    if(max_iter == 70){
      /********************************************************************
       *
       * We need random start spinor fields, but they must be half zero,
       * that's why we apply the Projektor once
       *
       ********************************************************************/
      for(i = 0; i < first_blocksize; i++) {
	random_spinor_field(&lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND],N2,0);
//.........这里部分代码省略.........
开发者ID:palao,项目名称:tmLQCD,代码行数:101,代码来源:index_jd.c

示例6: main

int main(int argc, char** argv) {
  const int PING_PONG_LIMIT = 10;
  double t_start, t_end, t_total, tLoop, t_tick;
  double MPI_Wtime(void);
  int tests, maxTest = 10, i;
  int k[9] = {1,4,16,64,256,1024,4096,16384,65536};

  // Initialize the MPI environment
  MPI_Init(NULL, NULL);
  // Find out rank, size
  int world_rank;
  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
  int world_size;
  MPI_Comm_size(MPI_COMM_WORLD, &world_size);

  // We are assuming at least 2 processes for this task
  if (world_size != 2) {
    fprintf(stderr, "World size must be two for %s\n", argv[0]);
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  // Get the name of the processor
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  int name_len;
  MPI_Get_processor_name(processor_name, &name_len);

  // Initialize the outer loop for 2^k where k = 2,4,6,8,10,12,14,16,18
  for (int p = 0; p < sizeof(k)/sizeof(k[0]); p++) {
  int A[k[p]]; // Vector of integers

  // Populate A with ints (4 bytes each)
  for (i = 0; i < k[p]; i++) {
    A[i] = i;
  }

  // This is for loop timing
  tLoop = 1.0e10;
  for (tests = 0; tests < maxTest; tests++) { // begin timing
  t_start = MPI_Wtime();
  // t_tick = MPI_Wtick();
  int ping_pong_count = 0;
  int partner_rank = (world_rank + 1) % 2;
  while (ping_pong_count < PING_PONG_LIMIT) {
    if (world_rank == ping_pong_count % 2) {
      // Increment the ping pong count before you send it
      ping_pong_count++;
      MPI_Send(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD);
      printf("World rank %d sent and incremented ping_pong_count %d to partner rank %d\n",
             world_rank, ping_pong_count, partner_rank);
             printf("The size of A is: %lu\n", sizeof(A));
             printf("P is: %lu\n", sizeof(k));
    } else {
      MPI_Recv(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD,
               MPI_STATUS_IGNORE);
      printf("World rank %d received ping_pong_count %d from partner rank %d\n",
             world_rank, ping_pong_count, partner_rank);
    }
  }
}
t_end = MPI_Wtime();
t_total = t_end - t_start;
if (t_total < tLoop) tLoop = t_total;


  printf("That took %f seconds\n", tLoop);
  printf("Number of processes in MPI_COMM_WORLD: %d\n", world_size);
  printf("Name of processor %s\n", processor_name);
  printf("The size of A is: %lu\n", sizeof(A));
  printf("P is: %lu\n", sizeof(k));
  } // ends the outer loop for m

    MPI_Finalize();


  // return 0;
}
开发者ID:harterj,项目名称:mth699,代码行数:76,代码来源:pingPong.c

示例7: main

int32_t main(int32_t argc, char *argv[])
{
	int32_t rankID = 0, nRanks = 1;
	char rankName[MAX_STRING_LENGTH];
	gethostname(rankName, MAX_STRING_LENGTH);
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rankID);
	MPI_Comm_size(MPI_COMM_WORLD, &nRanks);

	char *target_path = argv[1], *target = NULL;
	int64_t target_length = 0;
	target_length = get_filesize(target_path);
	if(target_length < 0)
	{
		printf("\nError: Cannot read target file [ %s ]\n", target_path);
		MPI_Finalize();
		exit(-1);
	}
	if(rankID == 0)
	{
		printf("--------------------------------------------------\n");
		printf("- Read target: [ %s ]\n", target_path);
		target = (char*)malloc(sizeof(char)*target_length);
		double read_time = 0;
		read_time -= MPI_Wtime();
		read_targetfile(target, target_length, target_path);
		read_time += MPI_Wtime();
		printf("- Target length: %ld (read time: %lf secs)\n", target_length, read_time);
		printf("--------------------------------------------------\n");
	}

	char *pattern = argv[2];
	int64_t pattern_length = 0;
	if(pattern == NULL)
	{
		printf("\nError: Cannot read pattern [ %s ]\n", pattern);
		free(target);
		MPI_Finalize();
		exit(-1);
	}
	pattern_length = strlen(pattern);
	if(rankID == 0)
	{
		printf("- Pattern: [ %s ]\n", pattern);
		printf("- Pattern length: %ld\n", pattern_length);
		printf("--------------------------------------------------\n");
	}
	int32_t* BCS = (int32_t*)malloc(ALPHABET_LEN * sizeof(int32_t));
	int32_t* GSS = (int32_t*)malloc(pattern_length * sizeof(int32_t));;
	make_BCS(BCS, pattern, pattern_length);
	make_GSS(GSS, pattern, pattern_length);

	int64_t found_count = 0;
	double search_time = 0;
	if(rankID == 0)
	{
		search_time -= MPI_Wtime();
	}
// DO NOT EDIT UPPER CODE //
//==============================================================================================================//

	int64_t mpi_found_count = 0;
	char* chunk = NULL;
	if(argv[3] == NULL)
	{
		printf("\nError: Check chunk size [ %s ]\n", argv[3]);
		free(target);
		free(BCS);
		free(GSS);
		MPI_Finalize();
		exit(-1);
	}

	if(rankID == 0) printf("\ttarget_length = %ld\n", target_length);
	int64_t nChunksPerRank = atoi(argv[3]);
	//각 rank에 몇개의 문자열 덩어리를 줄것인가 결정
	int64_t nTotalChunks = (nRanks-1) * nChunksPerRank; 
	//rank 0은 검사하지않으므로 nRanks - 1
	//문자열은 총 nTotalChunks개로 쪼개진다.
	if(rankID == 0) printf("\tnTotalChunks = %ld\n", nTotalChunks);
	
	int64_t overlap_length = (pattern_length - 1) * (nTotalChunks - 1);
	//쪼개진 덩어리 중 마지막 1개는 겹치는 부분이 없으므로 nTotalChunks - 1
	//문자열은 최악의 경우 pattern의 첫글자가 하나의 코어 
	//나머지 글자가 하나의 코어에 분배되는 경우이므로 pattern_length - 1

	if(rankID == 0) printf("\toverlap_length = %ld\n", overlap_length);
	int64_t quotient = (target_length + overlap_length) / nTotalChunks; 
	//각 코어당 최악의 경우를 방지하기 위해 덩어리마다 pattern_length - 1을 추가
	//즉 target_length + overlap_length가 되고 이를 정해진 nChunksPerRank씩
	//각 코어에 분배하기 위하여 nTotalChunks로 나누어 준다.

	if(rankID == 0) printf("\tquotient = %ld\n", quotient);
	int64_t remainder = (target_length + overlap_length) - (quotient * nTotalChunks);
	//나누는 경우에 나누어 떨어지지 않는 경우가 있으므로 나머지를 따로 처리해준다.
	
	if(rankID == 0) printf("\tremainder = %ld\n\n", remainder);

	int64_t chunkID = 0;
	int64_t* chunk_length = (int64_t*)malloc((nTotalChunks+1)*sizeof(int64_t)); 
//.........这里部分代码省略.........
开发者ID:hakhyunkim123,项目名称:SuperComputingProblem,代码行数:101,代码来源:Main_mpi.c

示例8: main

int main(int argc, char* argv[]) {
	/****************************************************/
	/*   Parameter declaration                          */
	/****************************************************/
	// Index params
	int i,j,k = 0;
	
	// Directory path params
	char input_path[1024] = "/projects/isgs/lidar/champaign/las";
	char scratch_path[1024] = "/gpfs_scratch/ncasler/data/tmp";
	char out_path[1024] = "";
	char tmp_path[1024] = "";

	// MPI Params
	int world_size, world_rank, mpi_err;
	MPI_Comm world_comm = MPI_COMM_WORLD;
	MPI_Info info = MPI_INFO_NULL;
	MPI_Status status;
	MPI_Request request;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(world_comm, &world_size);
	MPI_Comm_rank(world_comm, &world_rank);
	MPI_Errhandler_set(world_comm, MPI_ERRORS_RETURN);
	double starttime, endtime;
	// Set memory limit for grid allocations
	long buffer_lim = 8000000000;
	size_t max_size = buffer_lim / sizeof(Pixel);
	// File specific parameters
	int n_files, file_off, file_blk, file_end = 0;
	char ext[5] = ".las";

	// Metadata
	double g_mins[3] = {DBL_MAX,DBL_MAX,DBL_MAX}; // Global min coord
	double g_maxs[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX}; // Global max coord
	double l_mins[3] = {DBL_MAX,DBL_MAX,DBL_MAX}; // Local min coord
	double l_maxs[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX}; // Local max coord

	// File specific params
	FileCollection *g_files = NULL; // Global file list
	int g_n_files = 0; // Global file count
	int l_n_files = 0; // Local file count
	BBox* g_file_bbox = NULL; // Global array of file bboxes
	BBox* l_file_bbox = NULL; // Local array of file bboxes
	// Grid specific params
	struct Point* origin = new Point();
	DType datatype = DT_Float32;
	struct Grid* g_grid = NULL; // Global grid
	int g_cols = 0; // Global grid column count
	int g_rows = 0; // Global grid row count
	
	//Specify resolution > should be parameterized in prod
	double res = 5.0f;
	// Block scheme params
	struct BlockScheme* blk_scheme = NULL;
	int *l_blks = NULL; // Local block array
	int *g_blks = NULL; // Global block array
	int l_n_blks = 0; // Local block count
	int g_n_blks = 0; // Global block count
	int *blk_n_files = NULL; // Array holding block specific file counts
	// Las specific params
	las_file las;
	long long g_n_pts = 0; // Global point count
	long long l_n_pts = 0; // Local point count

	/**************************************************/
	/*       Begin first file scan                    */
	/**************************************************/
	starttime = MPI_Wtime();
	g_files = new FileCollection(&input_path[0], &ext[0]);
	// Check the number of available LAS files
	g_n_files = g_files->countFiles();
	g_file_bbox = new BBox[g_n_files];
	l_file_bbox = new BBox[l_n_files];
	// Create tif output dir
	sprintf(&tmp_path[0], "%s/blocks", scratch_path);
	printf("[%i] Using tmp dir: %s\n", tmp_path);
	struct stat st = {0};
	if (stat(tmp_path, &st) == -1) 
		mkdir(tmp_path, 0700);
	// Set file Block size
	file_blk = ceil((float)g_n_files /(float)world_size);
	file_off = file_blk * world_rank;
	
	
	
	if (file_off + file_blk > g_n_files)
		file_end = g_n_files - file_off;
	else
		file_end = file_off + file_blk;

	// Read subset of file paths from dir
	l_n_files = g_files->getMetadata(file_off, file_end);

	// Scan metadata from files
	for (i = file_off; i < file_end; i++) {
		las.open(g_files->fileList[i]);
		l_n_pts = l_n_pts + (long long) las.points_count();
		compareMin(l_mins, las.minimums());
		compareMax(l_maxs, las.maximums());
		double *tmp_min = las.minimums();
//.........这里部分代码省略.........
开发者ID:jwend,项目名称:p_points2grid,代码行数:101,代码来源:blockLas.cpp

示例9: main

int main (int argc,char **argv)
{
   double time,time_seq, time_par;
   int rank, size;

   char *tracefile;

   MPI_Init(&argc,&argv);
   
   tracefile = getenv("TVTRACE");
   if( tracefile != NULL ){
      printf( "tv tracefile=%s\n", tracefile );
      MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0);      
   }
   else{
      MPI_Pcontrol(TRACEFILES, NULL, "trace", 0);
   }
   MPI_Pcontrol(TRACELEVEL, 1, 1, 1);
   MPI_Pcontrol(TRACENODE, 1000000, 1, 1);


   MPI_Comm_rank (MPI_COMM_WORLD,&rank);
   MPI_Comm_size (MPI_COMM_WORLD,&size);


   if( !rank ){
      double *a,*b,*c, *c0;
      int i,i1,j,k;
      int ann;
      MPI_Status *st;
      MPI_Request *rq,rq1;
      rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) );
      st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) );


      ann=an/size+((an%size)?1:0);
//      printf("[%d]ann=%d\n", rank, ann );

      a=(double*) malloc(am*an*sizeof(double));
      b=(double*) malloc(am*bm*sizeof(double));
      c=(double*) malloc(an*bm*sizeof(double));
      for(i=0;i<am*an;i++)
        a[i]=rand()%301;
      for(i=0;i<am*bm;i++)
        b[i]=rand()%251;
      printf( "Data ready [%d]\n", rank );
     
      c0 = (double*)malloc(an*bm*sizeof(double));

     
      time = MPI_Wtime();  
      for (i=0; i<an; i++)
         for (j=0; j<bm; j++)
         {
            double s = 0.0;
            for (k=0; k<am; k++)
              s+= a[i*am+k]*b[k*bm+j];
            c0[i*bm+j] = s;
      } 
      time = MPI_Wtime() - time;
      printf("Time seq[%d] = %lf\n", rank, time );
      time_seq = time;

      MPI_Barrier( MPI_COMM_WORLD );
      time=MPI_Wtime();

      MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      printf( "Data Bcast [%d]\n", rank );

      for( i1=0, j=1; j<size; j++, i1+=ann*am ){
         printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j );
         MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 );
         MPI_Request_free( &rq1 ); 
         printf( "Data Send [%d] =>> %d\n", rank, j );
      }
      printf( "Data Send [%d]\n", rank );
      
      MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 );
      MPI_Request_free( &rq1 ); 
      
      printf( "Data Send [%d] =>> %d\n", rank, j );


      for(i=(i1/am);i<an;i++)
        for(j=0;j<bm;j++){
          double s=0.0;
          for(k=0;k<am;k++)
             s+=a[i*am+k]*b[k*bm+j];
          c[i*bm+j]=s;
        }

      printf( "Job done  [%d]\n", rank );
      for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){
         printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j );
         MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) );
      }         
      MPI_Waitall( size-1, rq, st );
      
      time=MPI_Wtime()-time;
      printf("time [%d]=%12.8lf\n",rank,time);
//.........这里部分代码省略.........
开发者ID:arkuzmin,项目名称:ppp,代码行数:101,代码来源:10.c

示例10: main

int main(int argc, char *argv[])
{

const int PNUM = 2; //number of processes
const int MSIZE = 4; //matrix size

int rank,value,size;
int namelen;

double time1,time2;

srand(time(NULL));

MPI_Init(&argc, &argv);

time1 = MPI_Wtime();



char processor_name[MPI_MAX_PROCESSOR_NAME];


MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name,&namelen);
MPI_Status status;

int A[MSIZE][MSIZE];
int B[MSIZE];
int C[MSIZE];

if(rank==0){

		int a=0;
        for(int i=0;i<MSIZE;i++)
		{
            for(int j=0;j<MSIZE;j++)
			{
				A[i][j]=++a;
				B[i]=2;
				C[i]=0;
			}
		}
}


	MPI_Bcast(&A,MSIZE*MSIZE,MPI_INT,0,MPI_COMM_WORLD);
	MPI_Bcast(&B,MSIZE,MPI_INT,0,MPI_COMM_WORLD);
	
	int F=0;
    for(int i=rank*MSIZE/PNUM;i<(rank*MSIZE/PNUM)+2;i++)
	{
		 for(int j=0;j<MSIZE;j++)
		{
			 F+=A[i][j]*B[i];
			
        }
		C[i]=F;
		F=0;
	}

	if(rank==1){
	time2 = MPI_Wtime();
	std::cout<<"\nElasped Time: "<<time2-time1;
	MPI_Send(&C[rank*MSIZE/PNUM],MSIZE/2,MPI_INT,0,0,MPI_COMM_WORLD);

	}
	if(rank==0)
	{
    for(int i=1;i<PNUM;i++)  MPI_Recv(&C[i*MSIZE/PNUM],MSIZE/2,MPI_INT,i,0,MPI_COMM_WORLD,&status);
	

        for(int i=0;i<MSIZE;i++)
		{
        
		std::cout<<C[i]<<" ";
		std::cout<<std::endl;
		}


	}

MPI_Finalize();

return 0;
}
开发者ID:iqnev,项目名称:MPI,代码行数:86,代码来源:mpisum-v2.cpp

示例11: save_to_file

void node_server::start_run(bool scf) {
	integer output_cnt;

	if (!hydro_on) {
		save_to_file("X.chk");
		diagnostics();
		return;
	}
	printf("%e %e\n", grid::get_A(), grid::get_B());
	if (scf) {
		run_scf();
		set_pivot();
		printf("Adjusting velocities:\n");
		auto diag = diagnostics();
		space_vector dv;
		dv[XDIM] = -diag.grid_sum[sx_i] / diag.grid_sum[rho_i];
		dv[YDIM] = -diag.grid_sum[sy_i] / diag.grid_sum[rho_i];
		dv[ZDIM] = -diag.grid_sum[sz_i] / diag.grid_sum[rho_i];
		this->velocity_inc(dv);
		save_to_file("scf.chk");
	}

	printf("Starting...\n");
	solve_gravity(false);
	int ngrids = regrid(me.get_gid(), false);

	real output_dt = opts.output_dt;

	printf("OMEGA = %e, output_dt = %e\n", grid::get_omega(), output_dt);
	real& t = current_time;
	integer step_num = 0;

	auto fut_ptr = me.get_ptr();
	node_server* root_ptr = fut_ptr.get();

	output_cnt = root_ptr->get_rotation_count() / output_dt;
	hpx::future<void> diag_fut = hpx::make_ready_future();
	hpx::future<void> step_fut = hpx::make_ready_future();
	profiler_output (stdout);
	real bench_start, bench_stop;
	while (current_time < opts.stop_time) {
		auto time_start = std::chrono::high_resolution_clock::now();
		if (root_ptr->get_rotation_count() / output_dt >= output_cnt) {
			//	if (step_num != 0) {

			char* fname;

			if (asprintf(&fname, "X.%i.chk", int(output_cnt))) {
			}
			save_to_file(fname);
			free(fname);
			if (asprintf(&fname, "X.%i.silo", int(output_cnt))) {
			}
			output(fname, output_cnt, false);
			free(fname);
			//	SYSTEM(std::string("cp *.dat ./dat_back/\n"));
			//	}
			++output_cnt;

		}
		if (step_num == 0) {
			bench_start = MPI_Wtime();
		}

		//	break;
		auto ts_fut = hpx::async([=]() {return timestep_driver();});
		step();
		real dt = ts_fut.get();
		real omega_dot = 0.0, omega = 0.0, theta = 0.0, theta_dot = 0.0;
		if (opts.problem == DWD) {
			auto diags = diagnostics();

			const real dx = diags.secondary_com[XDIM] - diags.primary_com[XDIM];
			const real dy = diags.secondary_com[YDIM] - diags.primary_com[YDIM];
			const real dx_dot = diags.secondary_com_dot[XDIM] - diags.primary_com_dot[XDIM];
			const real dy_dot = diags.secondary_com_dot[YDIM] - diags.primary_com_dot[YDIM];
			theta = atan2(dy, dx);
			omega = grid::get_omega();
			theta_dot = (dy_dot * dx - dx_dot * dy) / (dx * dx + dy * dy) - omega;
			const real w0 = grid::get_omega() * 100.0;
			const real theta_dot_dot = (2.0 * w0 * theta_dot + w0 * w0 * theta);
			omega_dot = theta_dot_dot;
			omega += omega_dot * dt;
//		omega_dot += theta_dot_dot*dt;
			grid::set_omega(omega);
		}
		double time_elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - time_start).count();
		step_fut.get();
		step_fut =
			hpx::async(
				[=]() {
					FILE* fp = fopen( "step.dat", "at");
					fprintf(fp, "%i %e %e %e %e %e %e %e %e %i\n", int(step_num), double(t), double(dt), time_elapsed, rotational_time, theta, theta_dot, omega, omega_dot, int(ngrids));
					fclose(fp);
				});
		printf("%i %e %e %e %e %e %e %e %e\n", int(step_num), double(t), double(dt), time_elapsed, rotational_time, theta, theta_dot, omega, omega_dot);

//		t += dt;
		++step_num;

//.........这里部分代码省略.........
开发者ID:dmarce1,项目名称:octotiger,代码行数:101,代码来源:node_server_actions_3.cpp

示例12: main

int main(int argc, char* argv[])
{
	int i,n, length;
	int *inmsg,*outmsg;
	int mypid,mysize;
	int rc;
	int sint;
	double start, finish,time;
	double bw;
	MPI_Status status;

	sint=sizeof(int);

	rc=MPI_Init(&argc,&argv);
	rc|=MPI_Comm_size(MPI_COMM_WORLD,&mysize);
	rc|=MPI_Comm_rank(MPI_COMM_WORLD,&mypid);

	if(mysize!=2)
	{
		fprintf(stderr,"now we only test message passing time between 2 processes\n");
		exit(1);
	}

	length=1;

	inmsg=(int *) malloc(MAXLENGTH*sizeof(int));
	outmsg=(int *) malloc(MAXLENGTH*sizeof(int));

	//synchronize the process, so the MPI_Barrier will return only if all the processes all called it
    
    rc=MPI_Barrier(MPI_COMM_WORLD);

    if(mypid==0)
    {
        for(i=1;i<=4;i++)
        {
        	time=0.00000000000000;
        	printf("\n\nDoing time test for:\n");
        	printf("Message length=%d int value\n",length);
        	printf("Message size =%d Bytes\n",sint*length);
        	printf("Number of Reps=%d\n",REPS);

        	start=MPI_Wtime();
        	for(n=1;n<=REPS;n++)
        	{
        		rc=MPI_Send(&outmsg[0],length,MPI_INT,1,0,MPI_COMM_WORLD);
        		rc= MPI_Recv(&inmsg[0],length,MPI_INT,1,0,MPI_COMM_WORLD,&status);

        	}
        	finish=MPI_Wtime();

        	//calculate the average time and bandwidth

        	time=finish-start;
        	printf("***delivering message avg= %f Sec for lengthSize=%d\n",time/REPS,length);
            bw=2.0*REPS*sint*length/time;

            printf("*** bandwidth=%f Byte/Sec\n",bw);

            //increase the length 
            length=100*length;
        }    
    }
    //task 1 processing now
    if(mypid==1)
    {
      for(i=1;i<=4;i++)
      {
      	for(n=1;n<=REPS;n++)
      	{
      		rc=MPI_Recv(&inmsg[0],length,MPI_INT,0,0,MPI_COMM_WORLD,&status);
      		rc = MPI_Send(&outmsg[0],length,MPI_INT,0,0,MPI_COMM_WORLD);

      	}
      	length=100*length;
      }
    }


  MPI_Finalize();
  exit(0);
}
开发者ID:YanLiang1102,项目名称:Parallel-Programming-Yan,代码行数:82,代码来源:messageTransferTimeTestingMPI.c

示例13: main

int main(int argc, char **argv) 
{
  int i, j, n, N=50, b=1, p;
  double *A, *v, *w;

  /* Iniciar MPI */
  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD,&np);
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
	
  /* Extracción de argumentos */
  if (argc > 1) { /* El usuario ha indicado el valor de n */
     if ((N = atoi(argv[1])) < 0) N = 50;
  }
  if (argc > 2) { /* El usuario ha indicado el valor de b */
     if ((b = atoi(argv[2])) < 0) b = 1;
  }
  if (b>=N/np) { /* Valor de b incorrecto */
    printf("Error: ancho de banda excesivo, N/np=%d, b=%d\n", N/np, b);
    MPI_Finalize();
    exit(1);
  }

  p = 0;
  for (i=0; i<np; i++) {
    n = N/np;
    if (i<N%np) n++;
    if (i==me) break;
    p += n;
  }
  printf("[Proc %d] tamaño local: n=%d, fila inicial: p=%d\n", me, n, p);

  /* Reserva de memoria */
  A = (double*)calloc(N*n,sizeof(double));
  v = (double*)calloc(n+b,sizeof(double)); //tamaño solo aumenta por b
  w = (double*)calloc(n+b,sizeof(double)); //tamaño solo aumenta por b

  /* Inicializar datos */
  for(i=0; i<n; i++) A[i*N+(i+p)] = 2*b;
  for(i=0; i<n; i++) {
    for(j=0; j<N; j++) {
      if (i+p<j && abs(i+p-j)<=b) A[i*N+j] = -1.0;
    }
  }
  for(i=0; i<n; i++) v[i] = 1.0;

  /* Multiplicación de matrices */
  
  double t1,t2;
  
  MPI_Barrier(MPI_COMM_WORLD);
  t1 = MPI_Wtime();
  parmatvec(N,n,p,b,A,v,w);
  
  MPI_Barrier(MPI_COMM_WORLD);
  t2 = MPI_Wtime();  
  
  if (me==0) printf("Tiempo transcurrido: %f s.\n", t2-t1);
  
  /* Imprimir solución */
  for(i=0; i<n; i++) printf("w[%d] = %g\n", i+p, w[i]); //Solo imprimimos w sin la parte de arriba.

  free(A);
  free(v);
  free(w);

  MPI_Finalize();
  return 0;
}
开发者ID:MihaiLupoiu,项目名称:TPP,代码行数:69,代码来源:matvecmpi.c

示例14: main

int main(int argc, char *argv[]) {

	int i, n, nlocal;
	int numprocs, myrank;
	MPI_File f; char* filename = "input/8";
	MPI_Status status;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

	if(MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
		fprintf(stderr, "Cannot open file %s\n", filename);
		MPI_Abort(MPI_COMM_WORLD, FILE_NOT_FOUND);
		MPI_Finalize();
		return 1;
	}
	MPI_File_seek(f, 0, MPI_SEEK_SET);
	MPI_File_read(f, &n, 1, MPI_INT, &status);
	nlocal = n/numprocs; if(myrank == numprocs - 1) nlocal = nlocal + n % numprocs;

	int *a = (int *)malloc(nlocal * n * sizeof(int));
	MPI_File_seek(f, (myrank * nlocal * n + 1) * sizeof(int), MPI_SEEK_SET);
	MPI_File_read(f, &a[0], nlocal * n, MPI_INT, &status);
	MPI_File_close(&f);

// 	int j;
//	if(myrank == 3) {
//		for(i = 0; i < nlocal; i++) {
//			for(j = 0; j < n; j++) {
//				printf("%d ", a[i * n +j]);
//			}
//			printf("\n");
//		}
//	}

	double start = MPI_Wtime();
	floyd_all_pairs_sp_1d(n, nlocal, a);
	double stop = MPI_Wtime();
	printf("[%d] Completed in %1.3f seconds\n", myrank, stop-start);

//	if(myrank == 3) {
//		for(i = 0; i < nlocal; i++) {
//			for(j = 0; j < n; j++) {
//				printf("%d ", a[i * n +j]);
//			}
//			printf("\n");
//		}
//	}
	if(MPI_File_open(MPI_COMM_WORLD, "output/8", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
			printf("Cannot open file %s\n", "out");
			MPI_Abort(MPI_COMM_WORLD, FILE_NOT_FOUND);
			MPI_Finalize();
			return 1;
	}
	if(myrank == 0) {
		MPI_File_seek(f, 0, MPI_SEEK_SET);
		MPI_File_write(f, &n, 1, MPI_INT, &status);
	}
	for(i = 0; i < nlocal; i++) {
		MPI_File_seek(f, (myrank * nlocal * n + 1) * sizeof(int), MPI_SEEK_SET);
		MPI_File_write(f, &a[0], nlocal * n, MPI_INT, &status);
	}

	MPI_File_close(&f);
	free(a);

	MPI_Finalize();
	return 0;
}
开发者ID:dungtn,项目名称:mpi-floyd,代码行数:70,代码来源:floyd1d.c

示例15: main


//.........这里部分代码省略.........

		gsl_matrix_memcpy(L, AtA);
		gsl_matrix_add(L, rhoI);
		gsl_linalg_cholesky_decomp(L);

		gsl_matrix_free(AtA);
		gsl_matrix_free(rhoI);
	} else {
		/* L = chol(I + 1/rho*AAt) */
		L = gsl_matrix_calloc(m,m);

		gsl_matrix *AAt = gsl_matrix_calloc(m,m);
		gsl_blas_dsyrk(CblasLower, CblasNoTrans, 1, A, 0, AAt);
		gsl_matrix_scale(AAt, 1/rho);

		gsl_matrix *eye = gsl_matrix_calloc(m,m);
		gsl_matrix_set_identity(eye);

		gsl_matrix_memcpy(L, AAt);
		gsl_matrix_add(L, eye);
		gsl_linalg_cholesky_decomp(L);

		gsl_matrix_free(AAt);
		gsl_matrix_free(eye);
	}

	/* Main ADMM solver loop */

	int iter = 0;
	if (rank == 0) {
		printf("%3s %10s %10s %10s %10s %10s\n", "#", "r norm", "eps_pri", "s norm", "eps_dual", "objective");		
	}
    double startAllTime, endAllTime;
	startAllTime = MPI_Wtime();
	while (iter < MAX_ITER) {

        /* u-update: u = u + x - z */
		gsl_vector_sub(x, z);
		gsl_vector_add(u, x);

		/* x-update: x = (A^T A + rho I) \ (A^T b + rho z - y) */
		gsl_vector_memcpy(q, z);
		gsl_vector_sub(q, u);
		gsl_vector_scale(q, rho);
		gsl_vector_add(q, Atb);   // q = A^T b + rho*(z - u)


        double tmp, tmpq;
		gsl_blas_ddot(x, x, &tmp);
		gsl_blas_ddot(q, q, &tmpq);

		if (skinny) {
			/* x = U \ (L \ q) */
			gsl_linalg_cholesky_solve(L, q, x);
		} else {
			/* x = q/rho - 1/rho^2 * A^T * (U \ (L \ (A*q))) */
			gsl_blas_dgemv(CblasNoTrans, 1, A, q, 0, Aq);
			gsl_linalg_cholesky_solve(L, Aq, p);
			gsl_blas_dgemv(CblasTrans, 1, A, p, 0, x); /* now x = A^T * (U \ (L \ (A*q)) */
			gsl_vector_scale(x, -1/(rho*rho));
			gsl_vector_scale(q, 1/rho);
			gsl_vector_add(x, q);
		}

		/*
		 * Message-passing: compute the global sum over all processors of the
开发者ID:uta-smile,项目名称:MPI_LASSO_Example,代码行数:67,代码来源:lasso.c


注:本文中的MPI_Wtime函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。