本文整理汇总了C++中rtclock函数的典型用法代码示例。如果您正苦于以下问题:C++ rtclock函数的具体用法?C++ rtclock怎么用?C++ rtclock使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了rtclock函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main()
{
int t, i, j, k, l;
double t_start, t_end;
init_array();
IF_TIME(t_start = rtclock());
#pragma scop
for (t=1; t<=T; t++){
for (i=1; i<=N-1; i++)
e[i] = e[i] - coeff1*(h[i]-h[i-1]);
for (i=0; i<=N-1; i++)
h[i] = h[i] - coeff2*(e[i+1]-e[i]);
}
#pragma endscop
IF_TIME(t_end = rtclock());
IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start));
if (fopen(".test", "r")) {
print_array();
}
return 0;
}
示例2: main
int main()
{
int i, j, k, t;
init_array() ;
#ifdef PERFCTR
PERF_INIT;
#endif
IF_TIME(t_start = rtclock());
/* pluto start (N) */
#pragma scop
for (i=1; i<=N-2; i++) {
for (j=1; j<=N-2; j++) {
f[i][j] = f[j][i] + f[i][j-1];
}
}
#pragma endscop
/* pluto end */
IF_TIME(t_end = rtclock());
IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start));
if (fopen(".test", "r")) {
print_array();
}
return 0;
}
示例3: main
int main() {
double t_start, t_end;
DATA_TYPE* A;
DATA_TYPE* C;
DATA_TYPE* D;
A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
D = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
fprintf(stdout, "<< Symmetric rank-k operations >>\n");
init_arrays(A, C, D);
syrkGPU(A, D);
t_start = rtclock();
syrk(A, C);
t_end = rtclock();
fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(C, D);
free(A);
free(C);
free(D);
return 0;
}
示例4: syrkGPU
void syrkGPU(DATA_TYPE* A, DATA_TYPE* D) {
int i, j;
double t_start, t_end;
t_start = rtclock();
#pragma omp target device (GPU_DEVICE)
#pragma omp target map(to: A[:N*M]) map(tofrom: D[:N*M])
{
#pragma omp parallel for
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
D[i * M + j] *= beta;
}
}
#pragma omp parallel for collapse(2)
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
int k;
for(k=0; k< M; k++) {
D[i * M + j] += alpha * A[i * M + k] * A[j * M + k];
}
}
}
}
t_end = rtclock();
fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);
}
示例5: cl_launch_kernel
void cl_launch_kernel()
{
double t_start, t_end;
int m = M;
int n = N;
size_t localWorkSize[2], globalWorkSize[2];
localWorkSize[0] = DIM_LOCAL_WORK_GROUP_X;
localWorkSize[1] = DIM_LOCAL_WORK_GROUP_Y;
globalWorkSize[0] = (size_t)ceil(((float)N) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X;
globalWorkSize[1] = (size_t)ceil(((float)M) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y;
t_start = rtclock();
// Set the arguments of the kernel
errcode = clSetKernelArg(clKernel1, 0, sizeof(cl_mem), (void *)&a_mem_obj);
errcode |= clSetKernelArg(clKernel1, 1, sizeof(cl_mem), (void *)&c_mem_obj);
errcode |= clSetKernelArg(clKernel1, 2, sizeof(DATA_TYPE), (void *)&alpha);
errcode |= clSetKernelArg(clKernel1, 3, sizeof(DATA_TYPE), (void *)&beta);
errcode |= clSetKernelArg(clKernel1, 4, sizeof(int), (void *)&m);
errcode |= clSetKernelArg(clKernel1, 5, sizeof(int), (void *)&n);
if(errcode != CL_SUCCESS) printf("Error in seting arguments1\n");
// Execute the OpenCL kernel
errcode = clEnqueueNDRangeKernel_fusion(clCommandQue, clKernel1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
//errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
if(errcode != CL_SUCCESS) printf("Error in launching kernel1\n");
// clFinish(clCommandQue);
t_end = rtclock();
fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);
fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, t_end - t_start);
}
示例6: main
int main()
{
int i, j, k, l, t;
double t_start, t_end;
init_array() ;
IF_TIME(t_start = rtclock());
#pragma scop
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
x1[i] = x1[i] + a[i][j] * y_1[j];
}
}
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
x2[i] = x2[i] + a[j][i] * y_2[j];
}
}
#pragma endscop
IF_TIME(t_end = rtclock());
IF_TIME(printf("%0.6lfs\n", t_end - t_start));
#ifdef TEST
print_array();
#endif
return 0;
}
示例7: main
int main()
{
init_arrays();
double annot_t_start=0, annot_t_end=0, annot_t_total=0;
int annot_i;
int v1,v2,o1,o2,ox;
int tv1,tv2,to1,to2,tox;
for (annot_i=0; annot_i<REPS; annot_i++)
{
annot_t_start = rtclock();
for (v1=0; v1<=V-1; v1=v1+1)
for (v2=0; v2<=V-1; v2=v2+1)
for (o1=0; o1<=O-1; o1=o1+1)
for (o2=0; o2<=O-1; o2=o2+1)
for (ox=0; ox<=O-1; ox=ox+1)
R[v1][v2][o1][o2]=R[v1][v2][o1][o2]+T[v1][ox][o1][o2]*A2[v2][ox];
annot_t_end = rtclock();
annot_t_total += annot_t_end - annot_t_start;
}
annot_t_total = annot_t_total / REPS;
printf("%f\n", annot_t_total);
return 1;
}
示例8: main
int main(int argc, char** argv)
{
double t_start, t_end;
/* Array declaration */
DATA_TYPE A[N][M];
DATA_TYPE C[N][N];
DATA_TYPE C_outputFromGpu[N][N];
/* Initialize array. */
init_arrays(A, C, C_outputFromGpu);
#pragma hmpp syrk allocate
#pragma hmpp syrk advancedload, args[a,c]
t_start = rtclock();
#pragma hmpp syrk callsite, args[a,c].advancedload=true, asynchronous
runSyrk(A, C_outputFromGpu);
#pragma hmpp syrk synchronize
t_end = rtclock();
fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
#pragma hmpp syrk delegatedstore, args[c]
#pragma hmpp syrk release
t_start = rtclock();
runSyrk(A, C);
t_end = rtclock();
fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(C, C_outputFromGpu);
return 0;
}
示例9: foo
void foo(){
int y,x,trial;
IF_TIME(t_start = rtclock());
for (trial=0;trial<10;++trial)
{
#pragma scop
for (y = 0; y <= M-1; ++y)
for(x = 0; x <= N-1; ++x) {
blurx[y][x]=in[x][y]+in[x+1][y]+in[x+2][y];
if (y >= 2)
out[x][y-2]=blurx[y-2][x]+blurx[y-1][x]+blurx[y][x];
}
#pragma endscop
}
IF_TIME(t_end = rtclock());
IF_TIME(fprintf(stdout, "%s\t\t(M=%d,N=%d) \t %0.6lfs\n", __FILE__, M, N, (t_end - t_start)/trial));
#ifdef VERIFY
for(x = 0; x <= N-1; ++x)
for(y = 0; y <= M-1; ++y)
A[x][y]=in[x][y]+in[x+1][y]+in[x+2][y];
// Stage 2: vertical blur
for(x = 0; x <= N-1; ++x)
for(y = 2; y <= M-1; ++y)
{
if(out[x][y-2] != A[x][y]+A[x][y-1]+A[x][y-2])
{
printf("blur-smo.c: Difference at (%d, %d) : %f versus %f\n", x, y, out[x][y-2], A[x][y]+A[x][y-1]+A[x][y-2]);
}
}
#endif
}
示例10: main
int main(int argc, char* argv[])
//int main(void)
{
double t_start, t_end;
DATA_TYPE* A;
DATA_TYPE* B;
DATA_TYPE* C;
DATA_TYPE* D;
DATA_TYPE* E;
DATA_TYPE* F;
DATA_TYPE* G;
DATA_TYPE* G_outputFromGpu;
if(argc==2){
printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
cpu_offset = atoi(argv[1]);
}
A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE));
B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE));
C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE));
D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE));
E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE));
F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE));
G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));
G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));
int i;
init_array(A, B, C, D);
read_cl_file();
cl_initialization_fusion();
//cl_initialization();
cl_mem_init(A, B, C, D, E, F, G);
cl_load_prog();
cl_launch_kernel();
errcode = clEnqueueReadBuffer(clCommandQue[0], g_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, G_outputFromGpu, 0, NULL, NULL);
if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
t_start = rtclock();
mm3_cpu(A, B, C, D, E, F, G);
t_end = rtclock();
fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(G, G_outputFromGpu);
cl_clean_up();
free(A);
free(B);
free(C);
free(D);
free(E);
free(F);
free(G);
free(G_outputFromGpu);
return 0;
}
示例11: main
int main(int argc, char** argv)
{
double t_start, t_end;
/* Array declaration */
DATA_TYPE A[NI][NK];
DATA_TYPE B[NK][NJ];
DATA_TYPE C[NJ][NM];
DATA_TYPE D[NM][NL];
DATA_TYPE E[NI][NJ];
DATA_TYPE E_gpu[NI][NJ];
DATA_TYPE F[NJ][NL];
DATA_TYPE F_gpu[NJ][NL];
DATA_TYPE G[NI][NL];
DATA_TYPE G_outputFromGpu[NI][NL];
/* INItialize array. */
iNIt_array(A, B, C, D);
#pragma hmpp <group1> allocate
#pragma hmpp <group1> loopa advancedload, args[a;b;e]
#pragma hmpp <group1> loopb advancedload, args[f;c;d]
#pragma hmpp <group1> loopc advancedload, args[g]
t_start = rtclock();
#pragma hmpp <group1> loopa callsite, args[a;b;e].advancedload=true, asynchronous
threeMMloopa(A, B, E_gpu);
#pragma hmpp <group1> loopa synchronize
#pragma hmpp <group1> loopb callsite, args[f;c;d].advancedload=true, asynchronous
threeMMloopb(C, D, F_gpu);
#pragma hmpp <group1> loopb synchronize
#pragma hmpp <group1> loopc callsite, args[g;e;f].advancedload=true, asynchronous
threeMMloopc(E_gpu, F_gpu, G_outputFromGpu);
#pragma hmpp <group1> loopc synchronize
t_end = rtclock();
fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
#pragma hmpp <group1> loopa delegatedstore, args[a;b]
#pragma hmpp <group1> loopb delegatedstore, args[c;d]
#pragma hmpp <group1> loopc delegatedstore, args[g;e;f]
#pragma hmpp <group1> release
t_start = rtclock();
threeMMloopa(A, B, E);
threeMMloopb(C, D, F);
threeMMloopc(E, F, G);
t_end = rtclock();
fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(G, G_outputFromGpu);
return 0;
}
示例12: main
int main(int argc, char** argv)
{
int m = M;
int n = N;
double t_start, t_end;
/* Array declaration */
DATA_TYPE float_n = 321414134.01;
DATA_TYPE data[M + 1][N + 1];
DATA_TYPE data_Gpu[M + 1][N + 1];
DATA_TYPE symmat[M + 1][M + 1];
DATA_TYPE symmat_outputFromGpu[M + 1][M + 1];
DATA_TYPE mean[M + 1];
DATA_TYPE mean_Gpu[M + 1];
/* Initialize array. */
init_arrays(data, data_Gpu);
#pragma hmpp <group1> allocate
#pragma hmpp <group1> loopa advancedload, args[pmean;pdata;pfloat_n]
#pragma hmpp <group1> loopc advancedload, args[psymmat]
t_start = rtclock();
#pragma hmpp <group1> loopa callsite, args[pmean;pdata;pfloat_n].advancedload=true, asynchronous
covarLoopa(mean_Gpu, data_Gpu, float_n);
#pragma hmpp <group1> loopa synchronize
#pragma hmpp <group1> loopb callsite, args[pdata;pmean].advancedload=true, asynchronous
covarLoopb(data_Gpu, mean_Gpu);
#pragma hmpp <group1> loopb synchronize
#pragma hmpp <group1> loopc callsite, args[psymmat;pdata].advancedload=true, asynchronous
covarLoopc(symmat_outputFromGpu, data_Gpu);
#pragma hmpp <group1> loopc synchronize
t_end = rtclock();
fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
#pragma hmpp <group1> loopb delegatedstore, args[pmean]
#pragma hmpp <group1> loopc delegatedstore, args[psymmat;pdata]
#pragma hmpp <group1> release
t_start = rtclock();
covarLoopa(mean, data, float_n);
covarLoopb(data, mean);
covarLoopc(symmat, data);
t_end = rtclock();
fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(symmat, symmat_outputFromGpu);
return 0;
}
示例13: main
int main(int argc, char* argv[])
//int main(void)
{
double t_start, t_end;
DATA_TYPE* data;
DATA_TYPE* mean;
DATA_TYPE* stddev;
DATA_TYPE* symmat;
DATA_TYPE* symmat_outputFromGpu;
if(argc==2){
printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
cpu_offset = atoi(argv[1]);
}
data = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
mean = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
stddev = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
symmat = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
symmat_outputFromGpu = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
init_arrays(data);
read_cl_file();
cl_initialization_fusion();
//cl_initialization();
cl_mem_init(data, mean, stddev, symmat);
cl_load_prog();
double start = rtclock();
cl_launch_kernel();
double end = rtclock();
fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, (end - start));
//fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, 1000*(end - start));
errcode = clEnqueueReadBuffer(clCommandQue[0], symmat_mem_obj, CL_TRUE, 0, (M+1) * (N+1) * sizeof(DATA_TYPE), symmat_outputFromGpu, 0, NULL, NULL);
if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
t_start = rtclock();
correlation(data, mean, stddev, symmat);
t_end = rtclock();
fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults(symmat, symmat_outputFromGpu);
cl_clean_up();
free(data);
free(mean);
free(stddev);
free(symmat);
free(symmat_outputFromGpu);
return 0;
}
示例14: main
int main() {
double t_start, t_end;
init_arrays();
syrkGPU();
t_start = rtclock();
syrk();
t_end = rtclock();
fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
compareResults();
return 0;
}
示例15: SpMM
void SpMM(Csr<ValueType>* m1, Csr<ValueType>* m2, int num_buckets) {
vector<FastHash<int, ValueType>* > result_map(m1->num_rows);
for (auto& v : result_map) {
v = new FastHash<int, ValueType>(num_buckets);
}
cout << "Starting SpMM..." << endl;
float res = 0;
double before = rtclock();
for(int i=0;i<m1->num_rows;i++) {
for(int j=m1->rows[i];j<m1->rows[i+1];j++) {
int cola = m1->cols[j];
__m512d a = _mm512_set1_pd(m1->vals[j]);
for(int k=m2->rows[cola];k<m2->rows[cola] + m2->row_lens[cola];k+=16) {
__m512d *pb1 = (__m512d *)(&(m2->vals[k]));
__m512d *pb2 = (__m512d *)(&(m2->vals[k]) + 8);
__m512i *pcols = (__m512i *)(&(m2->cols[k]));
__m512d c1 = _mm512_mul_pd(a, *pb1);
__m512d c2 = _mm512_mul_pd(a, *pb2);
for(int x=0;x<8;x++) {
int col = ((int *)pcols)[x];
if (col == -1) {
continue;
}
ValueType val = ((ValueType *)(&c1))[x];
result_map[i]->Reduce(col, val);
res += val;
}
for (int x = 0; x < 8; ++x) {
int col = ((int *)pcols)[x+8];
if (col == -1) {
continue;
}
ValueType val = ((ValueType *)(&c2))[x];
result_map[i]->Reduce(col, val);
res += val;
}
}
}
}
double after = rtclock();
cout << "res: " << res << endl;
cout << RED << "[****Result****] ========> *SIMD Naive* time: " << after - before << " secs." << RESET << endl;
for (auto& v : result_map) {
delete v;
}
}