本文整理汇总了C++中dim3函数的典型用法代码示例。如果您正苦于以下问题:C++ dim3函数的具体用法?C++ dim3怎么用?C++ dim3使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dim3函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: run_add
bool run_add() {
constexpr size_t N = 64;
std::vector<T> host_input(N);
std::vector<T> host_expected(N);
for (int i = 0; i < N; ++i) {
host_input[i] = (T)i;
host_expected[i] = host_input[i] + host_input[i];
}
T* input1;
hipMalloc(&input1, N * sizeof(T));
hipMemcpy(input1, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice);
T* input2;
hipMalloc(&input2, N * sizeof(T));
hipMemcpy(input2, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice);
constexpr unsigned int blocks = 1;
constexpr unsigned int threads_per_block = 1;
hipLaunchKernelGGL(add<T>, dim3(blocks), dim3(threads_per_block), 0, 0, input1, input2, N);
hipMemcpy(host_input.data(), input1, host_input.size()*sizeof(T), hipMemcpyDeviceToHost);
bool equal = true;
for (int i = 0; i < N; i++) {
equal &= (host_input[i] == host_expected[i]);
}
return equal;
}
示例2: test_gl2
int test_gl2(size_t N) {
size_t Nbytes = N*sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
// Full vadd in one large chunk, to get things started:
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipLaunchKernel(vectorADD2, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N);
HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIPCHECK (hipDeviceSynchronize());
HipTest::checkVectorADD(A_h, B_h, C_h, N);
return 0;
}
示例3: run_rint
bool run_rint() {
double *A, *Ad;
double *B, *Bd;
A = new double[N];
B = new double[N];
for (int i = 0; i < N; i++) {
A[i] = 1.345;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernelGGL(test_rint, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for (int i = 0; i < 512; i++) {
double x = round(A[i]);
if (B[i] == x) {
passed = 1;
}
}
delete[] A;
delete[] B;
hipFree(Ad);
hipFree(Bd);
if (passed == 1) {
return true;
}
assert(passed == 1);
return false;
}
示例4: main
int main(){
int A=0, *Ad;
hipMalloc((void**)&Ad, SIZE);
hipMemcpy(Ad, &A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, 0, Ad);
hipMemcpy(&A, Ad, SIZE, hipMemcpyDeviceToHost);
}
示例5: main
int main(int argc, char *argv[])
{ int warpSize, pshift;
hipDeviceProp_t devProp;
hipDeviceGetProperties(&devProp, 0);
if(strncmp(devProp.name,"Fiji",1)==0) {warpSize =64; pshift =6;}
else {warpSize =32; pshift =5;}
unsigned int Num_Threads_per_Block = 512;
unsigned int Num_Blocks_per_Grid = 1;
unsigned int Num_Warps_per_Block = Num_Threads_per_Block/warpSize;
unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize;
unsigned int* host_ballot = (unsigned int*)malloc(Num_Warps_per_Grid*sizeof(unsigned int));
unsigned int* device_ballot;
HIP_ASSERT(hipMalloc((void**)&device_ballot, Num_Warps_per_Grid*sizeof(unsigned int)));
int divergent_count =0;
for (int i=0; i<Num_Warps_per_Grid; i++) host_ballot[i] = 0;
HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyHostToDevice));
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block,pshift);
HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyDeviceToHost));
for (int i=0; i<Num_Warps_per_Grid; i++) {
if ((host_ballot[i] == 0)||(host_ballot[i]/warpSize == warpSize)) std::cout << "Warp " << i << " IS convergent- Predicate true for " << host_ballot[i]/warpSize << " threads\n";
else {std::cout << "Warp " << i << " IS divergent - Predicate true for " << host_ballot[i]/warpSize<< " threads\n";
divergent_count++;}
}
if (divergent_count==1) printf("PASSED\n"); else printf("FAILED\n");
return EXIT_SUCCESS;
}
示例6: run_lround
bool run_lround(){
double *A, *Ad;
long int *B, *Bd;
A = new double[N];
B = new long int[N];
for(int i=0;i<N;i++){
A[i] = 1.345;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, N*sizeof(long int));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_lround, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, N*sizeof(long int), hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
long int x = round(A[i]);
if(B[i] == x){
passed = 1;
}
}
free(A);
if(passed == 1){
return true;
}
assert(passed == 1);
return false;
}
示例7: run_rnorm
bool run_rnorm(){
double *A, *Ad, *B, *Bd;
A = new double[N];
B = new double[N];
double val = 0.0;
for(int i=0;i<N;i++){
A[i] = 1.0;
B[i] = 0.0;
val += 1.0;
}
val = 1/sqrt(val);
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rnorm, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
if(B[0] - val < 0.000001){
passed = 1;
}
}
free(A);
if(passed == 1){
return true;
}
assert(passed == 1);
return false;
}
示例8: run_rnorm3d
bool run_rnorm3d(){
double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd;
A = new double[N];
B = new double[N];
C = new double[N];
D = new double[N];
double val = 0.0;
for(int i=0;i<N;i++){
A[i] = 1.0;
B[i] = 2.0;
C[i] = 3.0;
}
val = 1/sqrt(1.0 + 4.0 + 9.0);
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMalloc((void**)&Dd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_rnorm3d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd);
hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
if(D[i] - val < 0.000001){
passed = 1;
}
}
free(A);
if(passed == 1){
return true;
}
assert(passed == 1);
return false;
}
示例9: run_erfinv
bool run_erfinv(){
double *A, *Ad, *B, *Bd;
A = new double[N];
B = new double[N];
for(int i=0;i<N;i++){
A[i] = -0.6;
B[i] = 0.0;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_erfinv, dim3(1), dim3(N), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
if(B[i] - A[i] < 0.000001){
passed = 1;
}
}
free(A);
if(passed == 1){
return true;
}
assert(passed == 1);
return false;
}
示例10: run_sincos
bool run_sincos(){
double *A, *Ad, *B, *C, *Bd, *Cd;
A = new double[N];
B = new double[N];
C = new double[N];
for(int i=0;i<N;i++){
A[i] = 1.0;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_sincos, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
if(B[i] == sin(1.0)){
passed = 1;
}
}
passed = 0;
for(int i=0;i<512;i++){
if(C[i] == cos(1.0)){
passed = 1;
}
}
free(A);
if(passed == 1){
return true;
}
assert(passed == 1);
return false;
}
示例11: run
void run(size_t size, hipStream_t stream1, hipStream_t stream2){
float *Ah, *Bh, *Cd, *Dd, *Eh;
float *Ahh, *Bhh, *Cdd, *Ddd, *Ehh;
HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault));
HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault));
HIPCHECK(hipMalloc(&Cd, size));
HIPCHECK(hipMalloc(&Dd, size));
HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault));
HIPCHECK(hipHostMalloc((void**)&Ahh, size, hipHostMallocDefault));
HIPCHECK(hipHostMalloc((void**)&Bhh, size, hipHostMallocDefault));
HIPCHECK(hipMalloc(&Cdd, size));
HIPCHECK(hipMalloc(&Ddd, size));
HIPCHECK(hipHostMalloc((void**)&Ehh, size, hipHostMallocDefault));
HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream1));
HIPCHECK(hipMemcpyAsync(Bhh, Ahh, size, hipMemcpyHostToHost, stream2));
HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1));
HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2));
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream1, Cd);
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream2, Cdd);
HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1));
HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2));
HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1));
HIPCHECK(hipMemcpyAsync(Ehh, Ddd, size, hipMemcpyDeviceToHost, stream2));
HIPCHECK(hipDeviceSynchronize());
HIPASSERT(Eh[10] = Ah[10] + 1.0f);
HIPASSERT(Ehh[10] = Ahh[10] + 1.0f);
}
示例12: main
int main(int argc, char *argv[])
{ int warpSize, pshift;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
if(strncmp(devProp.name,"Fiji",1)==0)
{ warpSize =64;
pshift =6;
}
else {warpSize =32; pshift=5;}
int anycount =0;
int allcount =0;
int Num_Threads_per_Block = 1024;
int Num_Blocks_per_Grid = 1;
int Num_Warps_per_Block = Num_Threads_per_Block/warpSize;
int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize;
int * host_any = ( int*)malloc(Num_Warps_per_Grid*sizeof(int));
int * host_all = ( int*)malloc(Num_Warps_per_Grid*sizeof(int));
int *device_any;
int *device_all;
HIP_ASSERT(hipMalloc((void**)&device_any,Num_Warps_per_Grid*sizeof( int)));
HIP_ASSERT(hipMalloc((void**)&device_all,Num_Warps_per_Grid*sizeof(int)));
for (int i=0; i<Num_Warps_per_Grid; i++)
{
host_any[i] = 0;
host_all[i] = 0;
}
HIP_ASSERT(hipMemcpy(device_any, host_any,sizeof(int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(device_all, host_all,sizeof(int), hipMemcpyHostToDevice));
hipLaunchKernel(warpvote, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_any, device_all ,Num_Warps_per_Block,pshift);
HIP_ASSERT(hipMemcpy(host_any, device_any, Num_Warps_per_Grid*sizeof(int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(host_all, device_all, Num_Warps_per_Grid*sizeof(int), hipMemcpyDeviceToHost));
for (int i=0; i<Num_Warps_per_Grid; i++) {
printf("warp no. %d __any = %d \n",i,host_any[i]);
printf("warp no. %d __all = %d \n",i,host_all[i]);
if (host_all[i]!=1) ++allcount;
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
if (host_any[i]!=64) ++anycount;
#else
if (host_any[i]!=1) ++anycount;
#endif
}
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
if (anycount == 1 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
#else
if (anycount == 0 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
#endif
return EXIT_SUCCESS;
}
示例13: main
int main() {
hipLaunchKernelGGL(
compileDoublePrecisionMathOnDevice,
dim3(1, 1, 1),
dim3(1, 1, 1),
0,
0,
1);
passed();
}
示例14: operator
void operator()(dim3 *grid_dim, dim3 *block_dim, int x, int y, int z)
{
if (y >= 4) {
*block_dim = dim3(128, 4, 1);
} else {
*block_dim = dim3(512, 1, 1);
}
grid_dim->x = divide_and_round_up(x, block_dim->x);
grid_dim->y = divide_and_round_up(y, block_dim->y);
grid_dim->z = divide_and_round_up(z, block_dim->z);
}
示例15:
void BlockArrangement::ArrangePrefer3dLocality(dim3* grid, dim3* block,
const uint3& volume_size)
{
if (!grid || !block)
return;
int bw = 8;
int bh = 8;
int bd = 8;
*block = dim3(bw, bh, bd);
*grid = dim3((volume_size.x + bw - 1) / bw, (volume_size.y + bh - 1) / bh,
(volume_size.z + bd - 1) / bd);
}