本文整理汇总了C++中MPI_Wtime函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Wtime函数的具体用法?C++ MPI_Wtime怎么用?C++ MPI_Wtime使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Wtime函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char **argv) {
int i, j, rank, nranks, peer, bufsize, errors, total_errors;
double **buf_bvec, **src_bvec, *src_buf;
int count[2], src_stride, trg_stride, stride_level;
double scaling, time;
MPI_Init(&argc, &argv);
ARMCI_Init();
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
buf_bvec = (double **) malloc(sizeof(double *) * nranks);
src_bvec = (double **) malloc(sizeof(double *) * nranks);
bufsize = XDIM * YDIM * sizeof(double);
ARMCI_Malloc((void **) buf_bvec, bufsize);
ARMCI_Malloc((void **) src_bvec, bufsize);
src_buf = src_bvec[rank];
if (rank == 0)
printf("ARMCI Strided DLA Accumulate Test:\n");
ARMCI_Access_begin(buf_bvec[rank]);
ARMCI_Access_begin(src_buf);
for (i = 0; i < XDIM*YDIM; i++) {
*(buf_bvec[rank] + i) = 1.0 + rank;
*(src_buf + i) = 1.0 + rank;
}
ARMCI_Access_end(src_buf);
ARMCI_Access_end(buf_bvec[rank]);
scaling = 2.0;
src_stride = XDIM * sizeof(double);
trg_stride = XDIM * sizeof(double);
stride_level = 1;
count[1] = YDIM;
count[0] = XDIM * sizeof(double);
ARMCI_Barrier();
time = MPI_Wtime();
peer = (rank+1) % nranks;
for (i = 0; i < ITERATIONS; i++) {
ARMCI_AccS(ARMCI_ACC_DBL,
(void *) &scaling,
src_buf,
&src_stride,
(void *) buf_bvec[peer],
&trg_stride,
count,
stride_level,
peer);
}
ARMCI_Barrier();
time = MPI_Wtime() - time;
if (rank == 0) printf("Time: %f sec\n", time);
ARMCI_Access_begin(buf_bvec[rank]);
for (i = errors = 0; i < XDIM; i++) {
for (j = 0; j < YDIM; j++) {
const double actual = *(buf_bvec[rank] + i + j*XDIM);
const double expected = (1.0 + rank) + scaling * (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS);
if (actual - expected > 1e-10) {
printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
rank, j, i, expected, actual);
errors++;
fflush(stdout);
}
}
}
ARMCI_Access_end(buf_bvec[rank]);
MPI_Allreduce(&errors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
ARMCI_Free((void *) buf_bvec[rank]);
ARMCI_Free((void *) src_bvec[rank]);
free(buf_bvec);
free(src_bvec);
ARMCI_Finalize();
MPI_Finalize();
if (total_errors == 0) {
if (rank == 0) printf("Success.\n");
return 0;
} else {
if (rank == 0) printf("Fail.\n");
return 1;
}
}
示例2: master
unsigned int master(unsigned int base_dim, unsigned int max_fact,
unsigned int** exponents, mpz_t * As,
int comm_size, unsigned int print_fact) {
unsigned int fact_count = 0;
MPI_Status status;
int count;
int source;
/* Buffer per ricevere gli esponenti */
unsigned int* buffer_exp;
/* Buffer per ricevere (A + s) */
unsigned char buffer_As[BUFFER_DIM];
init_vector(& buffer_exp, base_dim);
double t1 = MPI_Wtime();
double t2;
int fact_per_rank[comm_size];
for(int i = 0; i < comm_size; ++i)
fact_per_rank[i] = 0;
while(fact_count < max_fact + base_dim) {
/* Ricevo il vettore di esponenti */
MPI_Recv(buffer_exp, base_dim, MPI_UNSIGNED,
MPI_ANY_SOURCE, ROW_TAG,
MPI_COMM_WORLD, &status);
source = status.MPI_SOURCE;
for(unsigned int i = 0; i < base_dim; ++i)
set_matrix(exponents, fact_count, i, buffer_exp[i]);
/* Ricevo l'mpz contenente (A + s) */
MPI_Recv(buffer_As, BUFFER_DIM, MPI_UNSIGNED_CHAR, source,
AS_TAG, MPI_COMM_WORLD, &status);
MPI_Get_count(&status, MPI_UNSIGNED_CHAR, &count);
mpz_import(As[fact_count], count, 1, 1, 1, 0, buffer_As);
++fact_count;
++fact_per_rank[source];
if(fact_count % print_fact == 0) {
t2 = MPI_Wtime() - t1;
printf("#%d/%d in %.6f seconds\n", fact_count, max_fact + base_dim, t2);
}
}
/* Spedisco '1' agli slave per indicare la terminazione */
char stop_signal = '1';
for(unsigned int i = 1; i < comm_size; ++i)
MPI_Send(&stop_signal, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD);
printf("#Sending stop_signal\n");
printf("#Fattorizzazioni per ranks:\n#");
for(int i = 1; i < comm_size; ++i)
printf("%d \t", i);
printf("\n#");
for(int i = 1; i < comm_size; ++i)
printf("%d \t", fact_per_rank[i]);
printf("\n");
return fact_count;
}
示例3: estimate_reciprocal
/* Estimate the reciprocal space part error of the SPME Ewald sum. */
static real estimate_reciprocal(
t_inputinfo *info,
rvec x[], /* array of particles */
real q[], /* array of charges */
int nr, /* number of charges = size of the charge array */
FILE gmx_unused *fp_out,
gmx_bool bVerbose,
unsigned int seed, /* The seed for the random number generator */
int *nsamples, /* Return the number of samples used if Monte Carlo
* algorithm is used for self energy error estimate */
t_commrec *cr)
{
real e_rec = 0; /* reciprocal error estimate */
real e_rec1 = 0; /* Error estimate term 1*/
real e_rec2 = 0; /* Error estimate term 2*/
real e_rec3 = 0; /* Error estimate term 3 */
real e_rec3x = 0; /* part of Error estimate term 3 in x */
real e_rec3y = 0; /* part of Error estimate term 3 in y */
real e_rec3z = 0; /* part of Error estimate term 3 in z */
int i, ci;
int nx, ny, nz; /* grid coordinates */
real q2_all = 0; /* sum of squared charges */
rvec gridpx; /* reciprocal grid point in x direction*/
rvec gridpxy; /* reciprocal grid point in x and y direction*/
rvec gridp; /* complete reciprocal grid point in 3 directions*/
rvec tmpvec; /* template to create points from basis vectors */
rvec tmpvec2; /* template to create points from basis vectors */
real coeff = 0; /* variable to compute coefficients of the error estimate */
real coeff2 = 0; /* variable to compute coefficients of the error estimate */
real tmp = 0; /* variables to compute different factors from vectors */
real tmp1 = 0;
real tmp2 = 0;
gmx_bool bFraction;
/* Random number generator */
gmx_rng_t rng = NULL;
int *numbers = NULL;
/* Index variables for parallel work distribution */
int startglobal, stopglobal;
int startlocal, stoplocal;
int x_per_core;
int xtot;
#ifdef TAKETIME
double t0 = 0.0;
double t1 = 0.0;
#endif
rng = gmx_rng_init(seed);
clear_rvec(gridpx);
clear_rvec(gridpxy);
clear_rvec(gridp);
clear_rvec(tmpvec);
clear_rvec(tmpvec2);
for (i = 0; i < nr; i++)
{
q2_all += q[i]*q[i];
}
/* Calculate indices for work distribution */
startglobal = -info->nkx[0]/2;
stopglobal = info->nkx[0]/2;
xtot = stopglobal*2+1;
if (PAR(cr))
{
x_per_core = static_cast<int>(ceil(static_cast<real>(xtot) / cr->nnodes));
startlocal = startglobal + x_per_core*cr->nodeid;
stoplocal = startlocal + x_per_core -1;
if (stoplocal > stopglobal)
{
stoplocal = stopglobal;
}
}
else
{
startlocal = startglobal;
stoplocal = stopglobal;
x_per_core = xtot;
}
/*
#ifdef GMX_LIB_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
*/
#ifdef GMX_LIB_MPI
#ifdef TAKETIME
if (MASTER(cr))
{
t0 = MPI_Wtime();
}
#endif
#endif
if (MASTER(cr))
{
//.........这里部分代码省略.........
示例4: main
int main( int argc, char *argv[] )
{
int errs = 0;
int *ranks;
int *ranksout;
MPI_Group gworld, grev, gself;
MPI_Comm comm;
MPI_Comm commrev;
int rank, size, i;
double start, end, time1, time2;
MTest_Init( &argc, &argv );
comm = MPI_COMM_WORLD;
MPI_Comm_size( comm, &size );
MPI_Comm_rank( comm, &rank );
ranks = malloc(size*sizeof(int));
ranksout = malloc(size*sizeof(int));
if (!ranks || !ranksout) {
fprintf(stderr, "out of memory\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
/* generate a comm with the rank order reversed */
MPI_Comm_split(comm, 0, (size-rank-1), &commrev);
MPI_Comm_group(commrev, &grev);
MPI_Comm_group(MPI_COMM_SELF, &gself);
MPI_Comm_group(comm, &gworld);
/* sanity check correctness first */
for (i=0; i < size; i++) {
ranks[i] = i;
ranksout[i] = -1;
}
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
for (i=0; i < size; i++) {
if (ranksout[i] != (size-i-1)) {
if (rank == 0)
printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i, (size-rank-1), ranksout[i]);
++errs;
}
}
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
for (i=0; i < size; i++) {
int expected = (i == (size-rank-1) ? 0 : MPI_UNDEFINED);
if (ranksout[i] != expected) {
if (rank == 0)
printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected, ranksout[i]);
++errs;
}
}
/* now compare relative performance */
/* we needs lots of procs to get a group large enough to have meaningful
* numbers. On most testing machines this means that we're oversubscribing
* cores in a big way, which might perturb the timing results. So we make
* sure everyone started up and then everyone but rank 0 goes to sleep to
* let rank 0 do all the timings. */
MPI_Barrier(comm);
if (rank != 0) {
sleep(10);
}
else /* rank==0 */ {
sleep(1); /* try to avoid timing while everyone else is making syscalls */
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter*/
start = MPI_Wtime();
for (i = 0; i < NUM_LOOPS; ++i) {
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
}
end = MPI_Wtime();
time1 = end - start;
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter*/
start = MPI_Wtime();
for (i = 0; i < NUM_LOOPS; ++i) {
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
}
end = MPI_Wtime();
time2 = end - start;
/* complain if the "gworld" time exceeds 2x the "gself" time */
if (fabs(time1 - time2) > (2.00 * time2)) {
printf("too much difference in MPI_Group_translate_ranks performance:\n");
printf("time1=%f time2=%f\n", time1, time2);
printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1-time2)/time2));
if (time1 < time2) {
printf("also, (time1<time2) is surprising...\n");
}
++errs;
}
}
free(ranks);
free(ranksout);
//.........这里部分代码省略.........
示例5: index_jd
void index_jd(int * nr_of_eigenvalues_ov,
const int max_iterations, const double precision_ov, char *conf_filename,
const int nstore, const int method){
complex *eval;
spinor *eigenvectors_ov, *eigenvectors_ov_;
spinor *lowvectors, *lowvectors_;
int i=0 , k=0, returncode=0, index = 0, determined = 0, signed_index = 0;
char filename[120];
FILE * ifs = NULL;
matrix_mult Operator[2];
double absdifference;
const int N2 = VOLUMEPLUSRAND;
#ifdef MPI
double atime, etime;
#endif
double lowestmodes[20];
int intsign, max_iter, first_blocksize = 1;
int * idx = NULL;
/**********************
* For Jacobi-Davidson
**********************/
int verbosity = 3, converged = 0, blocksize = 1, blockwise = 0;
int solver_it_max = 50, j_max, j_min, v0dim = 0;
double * eigenvalues_ov = NULL;
double decay_min = 1.7, threshold_min = 1.e-3, prec;
WRITER *writer=NULL;
spinor *s;
double sqnorm;
paramsPropagatorFormat *propagatorFormat = NULL;
double ap_eps_sq;
int switch_on_adaptive_precision = 0;
double ov_s = 0;
/**********************
* General variables
**********************/
eval= calloc((*nr_of_eigenvalues_ov),sizeof(complex));
shift = 0.0;
// ov_s = 0.5*(1./g_kappa - 8.) - 1.;
ap_eps_sq = precision_ov*precision_ov;
#if (defined SSE || defined SSE2 )
eigenvectors_ov_= calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov)+1, sizeof(spinor));
eigenvectors_ov = (spinor *)(((unsigned long int)(eigenvectors_ov_)+ALIGN_BASE)&~ALIGN_BASE);
lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND+1, sizeof(spinor));
lowvectors = (spinor *)(((unsigned long int)(lowvectors_)+ALIGN_BASE)&~ALIGN_BASE);
#else
// eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND, sizeof(spinor));
eigenvectors_ov = eigenvectors_ov_;
lowvectors = lowvectors_;
#endif
// idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
Operator[0]=&Dov_proj_plus;
Operator[1]=&Dov_proj_minus;
if(g_proc_id == g_stdio_proc){
printf("Computing first the two lowest modes in the positive and negative chirality sector, respectively\n");
if(switch_on_adaptive_precision == 1) {
printf("We have switched on adaptive precision with ap_eps_sq = %e!\n", ap_eps_sq);
}
printf("We have set the mass to zero within this computation!\n");
fflush(stdout);
}
prec = precision_ov;
j_min = 8; j_max = 16;
max_iter = 70;
#ifdef MPI
atime = MPI_Wtime();
#endif
v0dim = first_blocksize;
blocksize = v0dim;
for(intsign = 0; intsign < 2; intsign++){
converged = 0;
if(g_proc_id == g_stdio_proc){
printf("%s chirality sector: \n", intsign ? "negative" : "positive");
fflush(stdout);
}
if(max_iter == 70){
/********************************************************************
*
* We need random start spinor fields, but they must be half zero,
* that's why we apply the Projektor once
*
********************************************************************/
for(i = 0; i < first_blocksize; i++) {
random_spinor_field(&lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND],N2,0);
//.........这里部分代码省略.........
示例6: main
int main(int argc, char** argv) {
const int PING_PONG_LIMIT = 10;
double t_start, t_end, t_total, tLoop, t_tick;
double MPI_Wtime(void);
int tests, maxTest = 10, i;
int k[9] = {1,4,16,64,256,1024,4096,16384,65536};
// Initialize the MPI environment
MPI_Init(NULL, NULL);
// Find out rank, size
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// We are assuming at least 2 processes for this task
if (world_size != 2) {
fprintf(stderr, "World size must be two for %s\n", argv[0]);
MPI_Abort(MPI_COMM_WORLD, 1);
}
// Get the name of the processor
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
MPI_Get_processor_name(processor_name, &name_len);
// Initialize the outer loop for 2^k where k = 2,4,6,8,10,12,14,16,18
for (int p = 0; p < sizeof(k)/sizeof(k[0]); p++) {
int A[k[p]]; // Vector of integers
// Populate A with ints (4 bytes each)
for (i = 0; i < k[p]; i++) {
A[i] = i;
}
// This is for loop timing
tLoop = 1.0e10;
for (tests = 0; tests < maxTest; tests++) { // begin timing
t_start = MPI_Wtime();
// t_tick = MPI_Wtick();
int ping_pong_count = 0;
int partner_rank = (world_rank + 1) % 2;
while (ping_pong_count < PING_PONG_LIMIT) {
if (world_rank == ping_pong_count % 2) {
// Increment the ping pong count before you send it
ping_pong_count++;
MPI_Send(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD);
printf("World rank %d sent and incremented ping_pong_count %d to partner rank %d\n",
world_rank, ping_pong_count, partner_rank);
printf("The size of A is: %lu\n", sizeof(A));
printf("P is: %lu\n", sizeof(k));
} else {
MPI_Recv(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
printf("World rank %d received ping_pong_count %d from partner rank %d\n",
world_rank, ping_pong_count, partner_rank);
}
}
}
t_end = MPI_Wtime();
t_total = t_end - t_start;
if (t_total < tLoop) tLoop = t_total;
printf("That took %f seconds\n", tLoop);
printf("Number of processes in MPI_COMM_WORLD: %d\n", world_size);
printf("Name of processor %s\n", processor_name);
printf("The size of A is: %lu\n", sizeof(A));
printf("P is: %lu\n", sizeof(k));
} // ends the outer loop for m
MPI_Finalize();
// return 0;
}
示例7: main
int32_t main(int32_t argc, char *argv[])
{
int32_t rankID = 0, nRanks = 1;
char rankName[MAX_STRING_LENGTH];
gethostname(rankName, MAX_STRING_LENGTH);
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rankID);
MPI_Comm_size(MPI_COMM_WORLD, &nRanks);
char *target_path = argv[1], *target = NULL;
int64_t target_length = 0;
target_length = get_filesize(target_path);
if(target_length < 0)
{
printf("\nError: Cannot read target file [ %s ]\n", target_path);
MPI_Finalize();
exit(-1);
}
if(rankID == 0)
{
printf("--------------------------------------------------\n");
printf("- Read target: [ %s ]\n", target_path);
target = (char*)malloc(sizeof(char)*target_length);
double read_time = 0;
read_time -= MPI_Wtime();
read_targetfile(target, target_length, target_path);
read_time += MPI_Wtime();
printf("- Target length: %ld (read time: %lf secs)\n", target_length, read_time);
printf("--------------------------------------------------\n");
}
char *pattern = argv[2];
int64_t pattern_length = 0;
if(pattern == NULL)
{
printf("\nError: Cannot read pattern [ %s ]\n", pattern);
free(target);
MPI_Finalize();
exit(-1);
}
pattern_length = strlen(pattern);
if(rankID == 0)
{
printf("- Pattern: [ %s ]\n", pattern);
printf("- Pattern length: %ld\n", pattern_length);
printf("--------------------------------------------------\n");
}
int32_t* BCS = (int32_t*)malloc(ALPHABET_LEN * sizeof(int32_t));
int32_t* GSS = (int32_t*)malloc(pattern_length * sizeof(int32_t));;
make_BCS(BCS, pattern, pattern_length);
make_GSS(GSS, pattern, pattern_length);
int64_t found_count = 0;
double search_time = 0;
if(rankID == 0)
{
search_time -= MPI_Wtime();
}
// DO NOT EDIT UPPER CODE //
//==============================================================================================================//
int64_t mpi_found_count = 0;
char* chunk = NULL;
if(argv[3] == NULL)
{
printf("\nError: Check chunk size [ %s ]\n", argv[3]);
free(target);
free(BCS);
free(GSS);
MPI_Finalize();
exit(-1);
}
if(rankID == 0) printf("\ttarget_length = %ld\n", target_length);
int64_t nChunksPerRank = atoi(argv[3]);
//각 rank에 몇개의 문자열 덩어리를 줄것인가 결정
int64_t nTotalChunks = (nRanks-1) * nChunksPerRank;
//rank 0은 검사하지않으므로 nRanks - 1
//문자열은 총 nTotalChunks개로 쪼개진다.
if(rankID == 0) printf("\tnTotalChunks = %ld\n", nTotalChunks);
int64_t overlap_length = (pattern_length - 1) * (nTotalChunks - 1);
//쪼개진 덩어리 중 마지막 1개는 겹치는 부분이 없으므로 nTotalChunks - 1
//문자열은 최악의 경우 pattern의 첫글자가 하나의 코어
//나머지 글자가 하나의 코어에 분배되는 경우이므로 pattern_length - 1
if(rankID == 0) printf("\toverlap_length = %ld\n", overlap_length);
int64_t quotient = (target_length + overlap_length) / nTotalChunks;
//각 코어당 최악의 경우를 방지하기 위해 덩어리마다 pattern_length - 1을 추가
//즉 target_length + overlap_length가 되고 이를 정해진 nChunksPerRank씩
//각 코어에 분배하기 위하여 nTotalChunks로 나누어 준다.
if(rankID == 0) printf("\tquotient = %ld\n", quotient);
int64_t remainder = (target_length + overlap_length) - (quotient * nTotalChunks);
//나누는 경우에 나누어 떨어지지 않는 경우가 있으므로 나머지를 따로 처리해준다.
if(rankID == 0) printf("\tremainder = %ld\n\n", remainder);
int64_t chunkID = 0;
int64_t* chunk_length = (int64_t*)malloc((nTotalChunks+1)*sizeof(int64_t));
//.........这里部分代码省略.........
示例8: main
int main(int argc, char* argv[]) {
/****************************************************/
/* Parameter declaration */
/****************************************************/
// Index params
int i,j,k = 0;
// Directory path params
char input_path[1024] = "/projects/isgs/lidar/champaign/las";
char scratch_path[1024] = "/gpfs_scratch/ncasler/data/tmp";
char out_path[1024] = "";
char tmp_path[1024] = "";
// MPI Params
int world_size, world_rank, mpi_err;
MPI_Comm world_comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
MPI_Status status;
MPI_Request request;
MPI_Init(&argc, &argv);
MPI_Comm_size(world_comm, &world_size);
MPI_Comm_rank(world_comm, &world_rank);
MPI_Errhandler_set(world_comm, MPI_ERRORS_RETURN);
double starttime, endtime;
// Set memory limit for grid allocations
long buffer_lim = 8000000000;
size_t max_size = buffer_lim / sizeof(Pixel);
// File specific parameters
int n_files, file_off, file_blk, file_end = 0;
char ext[5] = ".las";
// Metadata
double g_mins[3] = {DBL_MAX,DBL_MAX,DBL_MAX}; // Global min coord
double g_maxs[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX}; // Global max coord
double l_mins[3] = {DBL_MAX,DBL_MAX,DBL_MAX}; // Local min coord
double l_maxs[3] = {-DBL_MAX,-DBL_MAX,-DBL_MAX}; // Local max coord
// File specific params
FileCollection *g_files = NULL; // Global file list
int g_n_files = 0; // Global file count
int l_n_files = 0; // Local file count
BBox* g_file_bbox = NULL; // Global array of file bboxes
BBox* l_file_bbox = NULL; // Local array of file bboxes
// Grid specific params
struct Point* origin = new Point();
DType datatype = DT_Float32;
struct Grid* g_grid = NULL; // Global grid
int g_cols = 0; // Global grid column count
int g_rows = 0; // Global grid row count
//Specify resolution > should be parameterized in prod
double res = 5.0f;
// Block scheme params
struct BlockScheme* blk_scheme = NULL;
int *l_blks = NULL; // Local block array
int *g_blks = NULL; // Global block array
int l_n_blks = 0; // Local block count
int g_n_blks = 0; // Global block count
int *blk_n_files = NULL; // Array holding block specific file counts
// Las specific params
las_file las;
long long g_n_pts = 0; // Global point count
long long l_n_pts = 0; // Local point count
/**************************************************/
/* Begin first file scan */
/**************************************************/
starttime = MPI_Wtime();
g_files = new FileCollection(&input_path[0], &ext[0]);
// Check the number of available LAS files
g_n_files = g_files->countFiles();
g_file_bbox = new BBox[g_n_files];
l_file_bbox = new BBox[l_n_files];
// Create tif output dir
sprintf(&tmp_path[0], "%s/blocks", scratch_path);
printf("[%i] Using tmp dir: %s\n", tmp_path);
struct stat st = {0};
if (stat(tmp_path, &st) == -1)
mkdir(tmp_path, 0700);
// Set file Block size
file_blk = ceil((float)g_n_files /(float)world_size);
file_off = file_blk * world_rank;
if (file_off + file_blk > g_n_files)
file_end = g_n_files - file_off;
else
file_end = file_off + file_blk;
// Read subset of file paths from dir
l_n_files = g_files->getMetadata(file_off, file_end);
// Scan metadata from files
for (i = file_off; i < file_end; i++) {
las.open(g_files->fileList[i]);
l_n_pts = l_n_pts + (long long) las.points_count();
compareMin(l_mins, las.minimums());
compareMax(l_maxs, las.maximums());
double *tmp_min = las.minimums();
//.........这里部分代码省略.........
示例9: main
int main (int argc,char **argv)
{
double time,time_seq, time_par;
int rank, size;
char *tracefile;
MPI_Init(&argc,&argv);
tracefile = getenv("TVTRACE");
if( tracefile != NULL ){
printf( "tv tracefile=%s\n", tracefile );
MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0);
}
else{
MPI_Pcontrol(TRACEFILES, NULL, "trace", 0);
}
MPI_Pcontrol(TRACELEVEL, 1, 1, 1);
MPI_Pcontrol(TRACENODE, 1000000, 1, 1);
MPI_Comm_rank (MPI_COMM_WORLD,&rank);
MPI_Comm_size (MPI_COMM_WORLD,&size);
if( !rank ){
double *a,*b,*c, *c0;
int i,i1,j,k;
int ann;
MPI_Status *st;
MPI_Request *rq,rq1;
rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) );
st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) );
ann=an/size+((an%size)?1:0);
// printf("[%d]ann=%d\n", rank, ann );
a=(double*) malloc(am*an*sizeof(double));
b=(double*) malloc(am*bm*sizeof(double));
c=(double*) malloc(an*bm*sizeof(double));
for(i=0;i<am*an;i++)
a[i]=rand()%301;
for(i=0;i<am*bm;i++)
b[i]=rand()%251;
printf( "Data ready [%d]\n", rank );
c0 = (double*)malloc(an*bm*sizeof(double));
time = MPI_Wtime();
for (i=0; i<an; i++)
for (j=0; j<bm; j++)
{
double s = 0.0;
for (k=0; k<am; k++)
s+= a[i*am+k]*b[k*bm+j];
c0[i*bm+j] = s;
}
time = MPI_Wtime() - time;
printf("Time seq[%d] = %lf\n", rank, time );
time_seq = time;
MPI_Barrier( MPI_COMM_WORLD );
time=MPI_Wtime();
MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
printf( "Data Bcast [%d]\n", rank );
for( i1=0, j=1; j<size; j++, i1+=ann*am ){
printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j );
MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 );
MPI_Request_free( &rq1 );
printf( "Data Send [%d] =>> %d\n", rank, j );
}
printf( "Data Send [%d]\n", rank );
MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 );
MPI_Request_free( &rq1 );
printf( "Data Send [%d] =>> %d\n", rank, j );
for(i=(i1/am);i<an;i++)
for(j=0;j<bm;j++){
double s=0.0;
for(k=0;k<am;k++)
s+=a[i*am+k]*b[k*bm+j];
c[i*bm+j]=s;
}
printf( "Job done [%d]\n", rank );
for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){
printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j );
MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) );
}
MPI_Waitall( size-1, rq, st );
time=MPI_Wtime()-time;
printf("time [%d]=%12.8lf\n",rank,time);
//.........这里部分代码省略.........
示例10: main
int main(int argc, char *argv[])
{
const int PNUM = 2; //number of processes
const int MSIZE = 4; //matrix size
int rank,value,size;
int namelen;
double time1,time2;
srand(time(NULL));
MPI_Init(&argc, &argv);
time1 = MPI_Wtime();
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name,&namelen);
MPI_Status status;
int A[MSIZE][MSIZE];
int B[MSIZE];
int C[MSIZE];
if(rank==0){
int a=0;
for(int i=0;i<MSIZE;i++)
{
for(int j=0;j<MSIZE;j++)
{
A[i][j]=++a;
B[i]=2;
C[i]=0;
}
}
}
MPI_Bcast(&A,MSIZE*MSIZE,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(&B,MSIZE,MPI_INT,0,MPI_COMM_WORLD);
int F=0;
for(int i=rank*MSIZE/PNUM;i<(rank*MSIZE/PNUM)+2;i++)
{
for(int j=0;j<MSIZE;j++)
{
F+=A[i][j]*B[i];
}
C[i]=F;
F=0;
}
if(rank==1){
time2 = MPI_Wtime();
std::cout<<"\nElasped Time: "<<time2-time1;
MPI_Send(&C[rank*MSIZE/PNUM],MSIZE/2,MPI_INT,0,0,MPI_COMM_WORLD);
}
if(rank==0)
{
for(int i=1;i<PNUM;i++) MPI_Recv(&C[i*MSIZE/PNUM],MSIZE/2,MPI_INT,i,0,MPI_COMM_WORLD,&status);
for(int i=0;i<MSIZE;i++)
{
std::cout<<C[i]<<" ";
std::cout<<std::endl;
}
}
MPI_Finalize();
return 0;
}
示例11: save_to_file
void node_server::start_run(bool scf) {
integer output_cnt;
if (!hydro_on) {
save_to_file("X.chk");
diagnostics();
return;
}
printf("%e %e\n", grid::get_A(), grid::get_B());
if (scf) {
run_scf();
set_pivot();
printf("Adjusting velocities:\n");
auto diag = diagnostics();
space_vector dv;
dv[XDIM] = -diag.grid_sum[sx_i] / diag.grid_sum[rho_i];
dv[YDIM] = -diag.grid_sum[sy_i] / diag.grid_sum[rho_i];
dv[ZDIM] = -diag.grid_sum[sz_i] / diag.grid_sum[rho_i];
this->velocity_inc(dv);
save_to_file("scf.chk");
}
printf("Starting...\n");
solve_gravity(false);
int ngrids = regrid(me.get_gid(), false);
real output_dt = opts.output_dt;
printf("OMEGA = %e, output_dt = %e\n", grid::get_omega(), output_dt);
real& t = current_time;
integer step_num = 0;
auto fut_ptr = me.get_ptr();
node_server* root_ptr = fut_ptr.get();
output_cnt = root_ptr->get_rotation_count() / output_dt;
hpx::future<void> diag_fut = hpx::make_ready_future();
hpx::future<void> step_fut = hpx::make_ready_future();
profiler_output (stdout);
real bench_start, bench_stop;
while (current_time < opts.stop_time) {
auto time_start = std::chrono::high_resolution_clock::now();
if (root_ptr->get_rotation_count() / output_dt >= output_cnt) {
// if (step_num != 0) {
char* fname;
if (asprintf(&fname, "X.%i.chk", int(output_cnt))) {
}
save_to_file(fname);
free(fname);
if (asprintf(&fname, "X.%i.silo", int(output_cnt))) {
}
output(fname, output_cnt, false);
free(fname);
// SYSTEM(std::string("cp *.dat ./dat_back/\n"));
// }
++output_cnt;
}
if (step_num == 0) {
bench_start = MPI_Wtime();
}
// break;
auto ts_fut = hpx::async([=]() {return timestep_driver();});
step();
real dt = ts_fut.get();
real omega_dot = 0.0, omega = 0.0, theta = 0.0, theta_dot = 0.0;
if (opts.problem == DWD) {
auto diags = diagnostics();
const real dx = diags.secondary_com[XDIM] - diags.primary_com[XDIM];
const real dy = diags.secondary_com[YDIM] - diags.primary_com[YDIM];
const real dx_dot = diags.secondary_com_dot[XDIM] - diags.primary_com_dot[XDIM];
const real dy_dot = diags.secondary_com_dot[YDIM] - diags.primary_com_dot[YDIM];
theta = atan2(dy, dx);
omega = grid::get_omega();
theta_dot = (dy_dot * dx - dx_dot * dy) / (dx * dx + dy * dy) - omega;
const real w0 = grid::get_omega() * 100.0;
const real theta_dot_dot = (2.0 * w0 * theta_dot + w0 * w0 * theta);
omega_dot = theta_dot_dot;
omega += omega_dot * dt;
// omega_dot += theta_dot_dot*dt;
grid::set_omega(omega);
}
double time_elapsed = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - time_start).count();
step_fut.get();
step_fut =
hpx::async(
[=]() {
FILE* fp = fopen( "step.dat", "at");
fprintf(fp, "%i %e %e %e %e %e %e %e %e %i\n", int(step_num), double(t), double(dt), time_elapsed, rotational_time, theta, theta_dot, omega, omega_dot, int(ngrids));
fclose(fp);
});
printf("%i %e %e %e %e %e %e %e %e\n", int(step_num), double(t), double(dt), time_elapsed, rotational_time, theta, theta_dot, omega, omega_dot);
// t += dt;
++step_num;
//.........这里部分代码省略.........
示例12: main
int main(int argc, char* argv[])
{
int i,n, length;
int *inmsg,*outmsg;
int mypid,mysize;
int rc;
int sint;
double start, finish,time;
double bw;
MPI_Status status;
sint=sizeof(int);
rc=MPI_Init(&argc,&argv);
rc|=MPI_Comm_size(MPI_COMM_WORLD,&mysize);
rc|=MPI_Comm_rank(MPI_COMM_WORLD,&mypid);
if(mysize!=2)
{
fprintf(stderr,"now we only test message passing time between 2 processes\n");
exit(1);
}
length=1;
inmsg=(int *) malloc(MAXLENGTH*sizeof(int));
outmsg=(int *) malloc(MAXLENGTH*sizeof(int));
//synchronize the process, so the MPI_Barrier will return only if all the processes all called it
rc=MPI_Barrier(MPI_COMM_WORLD);
if(mypid==0)
{
for(i=1;i<=4;i++)
{
time=0.00000000000000;
printf("\n\nDoing time test for:\n");
printf("Message length=%d int value\n",length);
printf("Message size =%d Bytes\n",sint*length);
printf("Number of Reps=%d\n",REPS);
start=MPI_Wtime();
for(n=1;n<=REPS;n++)
{
rc=MPI_Send(&outmsg[0],length,MPI_INT,1,0,MPI_COMM_WORLD);
rc= MPI_Recv(&inmsg[0],length,MPI_INT,1,0,MPI_COMM_WORLD,&status);
}
finish=MPI_Wtime();
//calculate the average time and bandwidth
time=finish-start;
printf("***delivering message avg= %f Sec for lengthSize=%d\n",time/REPS,length);
bw=2.0*REPS*sint*length/time;
printf("*** bandwidth=%f Byte/Sec\n",bw);
//increase the length
length=100*length;
}
}
//task 1 processing now
if(mypid==1)
{
for(i=1;i<=4;i++)
{
for(n=1;n<=REPS;n++)
{
rc=MPI_Recv(&inmsg[0],length,MPI_INT,0,0,MPI_COMM_WORLD,&status);
rc = MPI_Send(&outmsg[0],length,MPI_INT,0,0,MPI_COMM_WORLD);
}
length=100*length;
}
}
MPI_Finalize();
exit(0);
}
示例13: main
int main(int argc, char **argv)
{
int i, j, n, N=50, b=1, p;
double *A, *v, *w;
/* Iniciar MPI */
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&np);
MPI_Comm_rank(MPI_COMM_WORLD,&me);
/* Extracción de argumentos */
if (argc > 1) { /* El usuario ha indicado el valor de n */
if ((N = atoi(argv[1])) < 0) N = 50;
}
if (argc > 2) { /* El usuario ha indicado el valor de b */
if ((b = atoi(argv[2])) < 0) b = 1;
}
if (b>=N/np) { /* Valor de b incorrecto */
printf("Error: ancho de banda excesivo, N/np=%d, b=%d\n", N/np, b);
MPI_Finalize();
exit(1);
}
p = 0;
for (i=0; i<np; i++) {
n = N/np;
if (i<N%np) n++;
if (i==me) break;
p += n;
}
printf("[Proc %d] tamaño local: n=%d, fila inicial: p=%d\n", me, n, p);
/* Reserva de memoria */
A = (double*)calloc(N*n,sizeof(double));
v = (double*)calloc(n+b,sizeof(double)); //tamaño solo aumenta por b
w = (double*)calloc(n+b,sizeof(double)); //tamaño solo aumenta por b
/* Inicializar datos */
for(i=0; i<n; i++) A[i*N+(i+p)] = 2*b;
for(i=0; i<n; i++) {
for(j=0; j<N; j++) {
if (i+p<j && abs(i+p-j)<=b) A[i*N+j] = -1.0;
}
}
for(i=0; i<n; i++) v[i] = 1.0;
/* Multiplicación de matrices */
double t1,t2;
MPI_Barrier(MPI_COMM_WORLD);
t1 = MPI_Wtime();
parmatvec(N,n,p,b,A,v,w);
MPI_Barrier(MPI_COMM_WORLD);
t2 = MPI_Wtime();
if (me==0) printf("Tiempo transcurrido: %f s.\n", t2-t1);
/* Imprimir solución */
for(i=0; i<n; i++) printf("w[%d] = %g\n", i+p, w[i]); //Solo imprimimos w sin la parte de arriba.
free(A);
free(v);
free(w);
MPI_Finalize();
return 0;
}
示例14: main
int main(int argc, char *argv[]) {
int i, n, nlocal;
int numprocs, myrank;
MPI_File f; char* filename = "input/8";
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if(MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
fprintf(stderr, "Cannot open file %s\n", filename);
MPI_Abort(MPI_COMM_WORLD, FILE_NOT_FOUND);
MPI_Finalize();
return 1;
}
MPI_File_seek(f, 0, MPI_SEEK_SET);
MPI_File_read(f, &n, 1, MPI_INT, &status);
nlocal = n/numprocs; if(myrank == numprocs - 1) nlocal = nlocal + n % numprocs;
int *a = (int *)malloc(nlocal * n * sizeof(int));
MPI_File_seek(f, (myrank * nlocal * n + 1) * sizeof(int), MPI_SEEK_SET);
MPI_File_read(f, &a[0], nlocal * n, MPI_INT, &status);
MPI_File_close(&f);
// int j;
// if(myrank == 3) {
// for(i = 0; i < nlocal; i++) {
// for(j = 0; j < n; j++) {
// printf("%d ", a[i * n +j]);
// }
// printf("\n");
// }
// }
double start = MPI_Wtime();
floyd_all_pairs_sp_1d(n, nlocal, a);
double stop = MPI_Wtime();
printf("[%d] Completed in %1.3f seconds\n", myrank, stop-start);
// if(myrank == 3) {
// for(i = 0; i < nlocal; i++) {
// for(j = 0; j < n; j++) {
// printf("%d ", a[i * n +j]);
// }
// printf("\n");
// }
// }
if(MPI_File_open(MPI_COMM_WORLD, "output/8", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
printf("Cannot open file %s\n", "out");
MPI_Abort(MPI_COMM_WORLD, FILE_NOT_FOUND);
MPI_Finalize();
return 1;
}
if(myrank == 0) {
MPI_File_seek(f, 0, MPI_SEEK_SET);
MPI_File_write(f, &n, 1, MPI_INT, &status);
}
for(i = 0; i < nlocal; i++) {
MPI_File_seek(f, (myrank * nlocal * n + 1) * sizeof(int), MPI_SEEK_SET);
MPI_File_write(f, &a[0], nlocal * n, MPI_INT, &status);
}
MPI_File_close(&f);
free(a);
MPI_Finalize();
return 0;
}
示例15: main
//.........这里部分代码省略.........
gsl_matrix_memcpy(L, AtA);
gsl_matrix_add(L, rhoI);
gsl_linalg_cholesky_decomp(L);
gsl_matrix_free(AtA);
gsl_matrix_free(rhoI);
} else {
/* L = chol(I + 1/rho*AAt) */
L = gsl_matrix_calloc(m,m);
gsl_matrix *AAt = gsl_matrix_calloc(m,m);
gsl_blas_dsyrk(CblasLower, CblasNoTrans, 1, A, 0, AAt);
gsl_matrix_scale(AAt, 1/rho);
gsl_matrix *eye = gsl_matrix_calloc(m,m);
gsl_matrix_set_identity(eye);
gsl_matrix_memcpy(L, AAt);
gsl_matrix_add(L, eye);
gsl_linalg_cholesky_decomp(L);
gsl_matrix_free(AAt);
gsl_matrix_free(eye);
}
/* Main ADMM solver loop */
int iter = 0;
if (rank == 0) {
printf("%3s %10s %10s %10s %10s %10s\n", "#", "r norm", "eps_pri", "s norm", "eps_dual", "objective");
}
double startAllTime, endAllTime;
startAllTime = MPI_Wtime();
while (iter < MAX_ITER) {
/* u-update: u = u + x - z */
gsl_vector_sub(x, z);
gsl_vector_add(u, x);
/* x-update: x = (A^T A + rho I) \ (A^T b + rho z - y) */
gsl_vector_memcpy(q, z);
gsl_vector_sub(q, u);
gsl_vector_scale(q, rho);
gsl_vector_add(q, Atb); // q = A^T b + rho*(z - u)
double tmp, tmpq;
gsl_blas_ddot(x, x, &tmp);
gsl_blas_ddot(q, q, &tmpq);
if (skinny) {
/* x = U \ (L \ q) */
gsl_linalg_cholesky_solve(L, q, x);
} else {
/* x = q/rho - 1/rho^2 * A^T * (U \ (L \ (A*q))) */
gsl_blas_dgemv(CblasNoTrans, 1, A, q, 0, Aq);
gsl_linalg_cholesky_solve(L, Aq, p);
gsl_blas_dgemv(CblasTrans, 1, A, p, 0, x); /* now x = A^T * (U \ (L \ (A*q)) */
gsl_vector_scale(x, -1/(rho*rho));
gsl_vector_scale(q, 1/rho);
gsl_vector_add(x, q);
}
/*
* Message-passing: compute the global sum over all processors of the