当前位置: 首页>>代码示例>>C++>>正文


C++ CLKernel类代码示例

本文整理汇总了C++中CLKernel的典型用法代码示例。如果您正苦于以下问题:C++ CLKernel类的具体用法?C++ CLKernel怎么用?C++ CLKernel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CLKernel类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: TEST

TEST( SLOW_testintwrapper_huge, testread ) {
    Timer timer;
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_read");
//    const int N = 4500000;
//    const int N = (4500000/512)*512;
    int N = 100000;
    int *out = new int[N];
    CLWrapper *outwrapper = cl->wrap(N, out);
    kernel->in(3)->in(7);
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    timer.timeCheck("before kernel");
    kernel->run_1d( globalSize, workgroupsize );
    timer.timeCheck("after kernel");
    outwrapper->copyToHost();
    timer.timeCheck("after copy to host");
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 4228 ) {
           cout << "out[" << i << "] != 4228: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete kernel;
    delete cl;
}
开发者ID:tkaplan,项目名称:EasyCL,代码行数:31,代码来源:testintwrapper_huge.cpp

示例2: TEST

TEST(test_scenario_te42kyfo, main) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    CLArrayFloat *out = cl->arrayFloat(5);
    CLArrayFloat *in = cl->arrayFloat(5);
    for(int i = 0; i < 5; i++) {
       (*out)[i] = 0;
    }
    for(int i = 0; i < 100; i++) {
      for(int n = 0; n < 5; n++) {
        (*in)[n] = i*n;
      }
      kernel->in(in);
      kernel->out(out);
      kernel->run_1d(5, 5);
      assertEquals(i*2 + 5, (*out)[2]);
      assertEquals(i*4 + 5, (*out)[4]);
    }
    cout << "finished" << endl;

    delete in;
    delete out;
    delete kernel;
    delete cl;
}
开发者ID:BenBBear,项目名称:EasyCL,代码行数:25,代码来源:test_scenario_te42kyfo.cpp

示例3: THClStorage_set

// this runs an entire kernel to get one value.  Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
void THClStorage_set(THClState *state, THClStorage *self, long index, float value)
{
////  cout << "set size=" << self->size << " index=" << index << " value=" << value << endl;
  THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
  THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");
//  if( self->wrapper->isDeviceDirty() ) { // we have to do this, since we're going to copy it all back again
//                                         // although I suppose we could set via a kernel perhaps
//                                         // either way, this function is pretty inefficient right now :-P
//    if(state->trace) cout << "wrapper->copyToHost() size " << self->size << endl;
//    self->wrapper->copyToHost();
//  }
//  self->data[index] = value;
//  if(state->trace) cout << "wrapper->copyToDevice() size " << self->size << endl;
//  self->wrapper->copyToDevice();

  const char *uniqueName = __FILE__ ":set";
  EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
  CLKernel *kernel = 0;
  if(cl->kernelExists(uniqueName)) {
    kernel = cl->getKernel(uniqueName);
  } else {
    TemplatedKernel kernelBuilder(cl);
    kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getSetKernelSource(), "THClStorageSet" );
  }

  kernel->inout(self->wrapper);
  kernel->in((int64_t)index);
  kernel->in(value);
  kernel->run_1d(1, 1);

  if(state->addFinish) cl->finish();
}
开发者ID:anoidgit,项目名称:cltorch,代码行数:34,代码来源:THClStorage.cpp

示例4: copy

void CopyBuffer::copy( EasyCL *cl, CLWrapper *sourceWrapper, int *target ) {
    // first we will copy it to another buffer, so we can copy it out
    int bufferSize = sourceWrapper->size();
//    float *copiedBuffer = new float[ bufferSize ];
    CLWrapper *targetWrapper = cl->wrap( bufferSize, target );
    targetWrapper->createOnDevice();

    // now copy it, via a kernel
    const string kernelSource = "\n"
        "kernel void copy( int N, global int const *source, global int *dest ) {\n"
          "  #define globalId ( get_global_id(0) )\n"
          "  if( (int)globalId < N ) {\n"
          "      dest[globalId] = source[globalId];\n"
          "  }\n"
       " }\n";
    CLKernel *kernel = cl->buildKernelFromString( kernelSource, "copy", "" );
    kernel->in( bufferSize )->in( sourceWrapper )->out( targetWrapper );
    int workgroupSize = 32;
    int numWorkgroups = ( bufferSize + workgroupSize - 1 ) / workgroupSize;
    kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
    cl->finish();
    targetWrapper->copyToHost();

    delete targetWrapper;
    delete kernel;
//    delete[] copiedBuffer;
}
开发者ID:2php,项目名称:DeepCL,代码行数:27,代码来源:CopyBuffer.cpp

示例5: TEST

TEST( testCopyLocal, basic ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();

    float a[] = { 1,2,3,4,
                  5,6,7,8,
                  9,10,11,12 };
    float b[16];
    memset(b, 0, sizeof(float)*16);

    CLKernel *kernel = makeKernel( cl );
    kernel->in( 12, a )->out( 16, b )->in( 12 );
    kernel->localFloats( 12 );
    kernel->run_1d(12,12);
    cl->finish();
    for( int i = 0; i < 3; i++ ) {
        for( int j = 0; j < 4; j++ ) {
            cout << b[i*4+j] << " ";
            EXPECT_EQ( i * 4 + j + 1, b[i*4+j] );
        }
        cout << endl;
    }
    cout << endl;
    for( int i = 12; i < 16; i++ ) {
        cout << b[i] << " ";
        EXPECT_EQ( 0, b[i] );
    }
    cout << endl;

    delete kernel;
    delete cl;
}
开发者ID:tkaplan,项目名称:DeepCL,代码行数:31,代码来源:testCopyLocal.cpp

示例6: THClStorage_get

// this runs an entire kernel to get one value.  Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
// lgfgs expects a working implementation of this method
float THClStorage_get(THClState *state, const THClStorage *self, long index)
{
////  printf("THClStorage_get\n");
  THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
  THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");

//  if( self->wrapper->isDeviceDirty() ) {
//    if(state->trace) cout << "wrapper->copyToHost()" << endl;
//    self->wrapper->copyToHost();
//  }
//  return self->data[index];

  const char *uniqueName = __FILE__ ":get";
  EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
  CLKernel *kernel = 0;
  if(cl->kernelExists(uniqueName)) {
    kernel = cl->getKernel(uniqueName);
  } else {
    TemplatedKernel kernelBuilder(cl);
    kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getGetKernelSource(), "THClStorageGet" );
  }

  float res;
  kernel->out(1, &res);
  kernel->in(self->wrapper);
  kernel->in((int64_t)index);
  kernel->run_1d(1, 1);

  if(state->addFinish) cl->finish();
  return res;
}
开发者ID:anoidgit,项目名称:cltorch,代码行数:34,代码来源:THClStorage.cpp

示例7: TEST

TEST(testfloatwrapperconst, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float in[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3;
    }
    float out[5];
    CLWrapper *inwrapper = cl->wrap(5, (float const *)in);
    CLWrapper *outwrapper = cl->wrap(5, out);
    inwrapper->copyToDevice();
    kernel->input(inwrapper);
    kernel->output(outwrapper);
    kernel->run_1d(5, 5);
    outwrapper->copyToHost();
    assertEquals(out[0] , 7);
    assertEquals(out[1] , 10);
    assertEquals(out[2] , 13);
    assertEquals(out[3] , 16);
    assertEquals(out[4] , 19);
    cout << "tests completed ok" << endl;

    delete inwrapper;
    delete outwrapper;
    delete kernel;
    delete cl;
}
开发者ID:hughperkins,项目名称:EasyCL,代码行数:33,代码来源:testfloatwrapperconst.cpp

示例8: TEST

TEST(testinout, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float inout[5];
    for(int i = 0; i < 5; i++) {
        inout[i] = i * 3;
    }
    kernel->inout(5, inout);
    size_t global = 5;
    size_t local = 5;
    kernel->run(1, &global, &local);
    assertEquals(inout[0] , 7);
    assertEquals(inout[1] , 10);
    assertEquals(inout[2] , 13);
    assertEquals(inout[3] , 16);
    assertEquals(inout[4] , 19);
    cout << "tests completed ok" << endl;

    delete kernel;
    delete cl;
}
开发者ID:BenBBear,项目名称:EasyCL,代码行数:27,代码来源:testinout.cpp

示例9: TEST

TEST( testdefines, simple ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testdefines.cl", "testDefines", 
        "-D DOUBLE -D SOME_VALUE=5" );
    float out[32];
    kernel->out( 32, out );
    kernel->run_1d( 32, 32 );
    EXPECT_EQ( 10, out[3] );

    delete kernel;
    delete cl;
}
开发者ID:hksonngan,项目名称:EasyCL,代码行数:12,代码来源:testdefines.cpp

示例10: TEST

TEST(testlocal, reduceviascratch_multipleworkgroups_ints) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups_ints");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int N = workgroupSize * numWorkgroups;
    cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
    int *myarray = new int[N];
    int sumViaCpu = 0;
    int localSumViaCpu = 0;
    int localSumViaCpu2 = 0;
    int *localSumsViaCpu = new int[numWorkgroups];
    memset(localSumsViaCpu, 0, sizeof(int)*numWorkgroups);
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 50;
        sumViaCpu += myarray[i];
        if(i < workgroupSize) {
            localSumViaCpu += myarray[i];
        }
        if(i >= workgroupSize && i < workgroupSize * 2) {
            localSumViaCpu2 += myarray[i];
        }
        int workgroupId = i / workgroupSize;
        localSumsViaCpu[workgroupId] += myarray[i];
    }
    ASSERT_EQ(localSumViaCpu, localSumsViaCpu[0]);
    ASSERT_EQ(localSumViaCpu2, localSumsViaCpu[1]);
    ASSERT_NE(myarray[0], sumViaCpu);

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
    int *a2 = new int[numWorkgroups];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
    kernel->in(a1wrapper);
    kernel->out(a2wrapper);
    kernel->localInts(workgroupSize);
    kernel->run_1d(N, workgroupSize);

    int finalSum;
    kernel->in(a2wrapper);
    kernel->out(1, &finalSum);
    kernel->localInts(workgroupSize);
    kernel->run_1d(numWorkgroups, workgroupSize);
//    timer.timeCheck("finished 2-way reduce");

    EXPECT_EQ(sumViaCpu, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}
开发者ID:brunoro,项目名称:EasyCL,代码行数:56,代码来源:testlocal.cpp

示例11: TEST

TEST( testMemset, basic ) {
    EasyCL *cl = DeepCLGtestGlobals_createEasyCL();

    CLKernel *kMemset = 0;
    // [[[cog
    // import stringify
    // stringify.write_kernel2( "kMemset", "cl/memset.cl", "cl_memset", '""' )
    // ]]]
    // generated using cog, from cl/memset.cl:
    const char * kMemsetSource =  
    "// Copyright Hugh Perkins 2015 hughperkins at gmail\n"
    "//\n"
    "// This Source Code Form is subject to the terms of the Mozilla Public License,\n"
    "// v. 2.0. If a copy of the MPL was not distributed with this file, You can\n"
    "// obtain one at http://mozilla.org/MPL/2.0/.\n"
    "\n"
    "kernel void cl_memset(global float *target, const float value, const int N) {\n"
    "    #define globalId get_global_id(0)\n"
    "    if ((int)globalId < N) {\n"
    "        target[globalId] = value;\n"
    "    }\n"
    "}\n"
    "\n"
    "";
    kMemset = cl->buildKernelFromString(kMemsetSource, "cl_memset", "", "cl/memset.cl");
    // [[[end]]]

    int N = 10000;
    float *myArray = new float[N];
    CLWrapper *myArrayWrapper = cl->wrap( N, myArray );
    myArrayWrapper->createOnDevice();
    kMemset->out( myArrayWrapper )->in( 99.0f )->in( N );
    int workgroupSize = 64;
    kMemset->run_1d( ( N + workgroupSize - 1 ) / workgroupSize * workgroupSize, workgroupSize );
    cl->finish();
    myArrayWrapper->copyToHost();
    for( int i = 0; i < 10; i++ ) {
//        cout << "myArray[" << i << "]=" << myArray[i] << endl;
    }
    for( int i = 0; i < N; i++ ) {
        EXPECT_EQ( 99.0f, myArray[i] );
    }

    delete kMemset;

    delete cl;
}
开发者ID:gattia,项目名称:DeepCL,代码行数:47,代码来源:testMemset.cpp

示例12: TEST

TEST(testfloatarray, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float in[5];
    float inout[5];
    float out[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3;
        inout[i] = i * 3;
    }
    kernel->in(5, in);
    kernel->out(5, out);
    kernel->inout(5, inout);
    kernel->run_1d(5, 5);

    for(int i = 0; i < 5; i++) {
        cout << out[i] << " ";
    }
    cout << endl;
    for(int i = 0; i < 5; i++) {
        cout << inout[i] << " ";
    }
    cout << endl;

    assertEquals(inout[0], 7);
    assertEquals(inout[1] , 10);
    assertEquals(inout[2] , 34);
    assertEquals(inout[3] , 16);
    assertEquals(inout[4], 19);
    assertEquals(out[0] , 5);
    assertEquals(out[1] , 8);
    assertEquals(out[2] , 26);
    assertEquals(out[3] , 14);
    assertEquals(out[4] , 17);
    cout << "tests completed ok" << endl;

    delete kernel;
    delete cl;
}
开发者ID:BenBBear,项目名称:EasyCL,代码行数:45,代码来源:testfloatarray.cpp

示例13: buildKernel

VIRTUAL void GpuOp::apply1_inplace( int N, CLWrapper*destinationWrapper, Op1 *op ) {
    StatefulTimer::instance()->timeCheck("GpuOp::apply inplace start" );

    string kernelName = "GpuOp::" + op->getName() + "_inplace";
    if( !cl->kernelExists( kernelName ) ) {
        buildKernel( kernelName, op, true );
    }
    CLKernel *kernel = cl->getKernel( kernelName );

    kernel->in( N );
    kernel->inout( destinationWrapper );
    int globalSize = N;
    int workgroupSize = 64;
    int numWorkgroups = ( globalSize + workgroupSize - 1 ) / workgroupSize;
    kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
    cl->finish();

    StatefulTimer::instance()->timeCheck("GpuOp::apply inplace end" );
}
开发者ID:neuroidss,项目名称:DeepCL,代码行数:19,代码来源:GpuOp.cpp

示例14: TEST

TEST( testCopyBlock, basic ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();

    float a[] = { 1,2,3,4,
                  5,6,7,8,
                  9,10,11,12 };
    float b[10];
    memset(b, 0, sizeof(float)*10);

    CLKernel *kernel = makeBasicKernel( cl );
    kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (0<<10)|1)->in((2<<10)|3);
    kernel->localFloats( 2 * 3 );
    kernel->run_1d(12,4);
//    kernel->run_1d(12,12);
    cl->finish();
    float expected[] = { 2,3,4,
                         6,7,8 }; 
    for( int i = 0; i < 2; i++ ) {
        for( int j = 0; j < 3; j++ ) {
            cout << b[i*3+j] << " ";
            EXPECT_EQ( expected[i*3+j], b[i*3+j] );
        }
        cout << endl;
    }
    cout << endl;
    for( int i = 6; i < 10; i++ ) {
        cout << b[i] << " ";
        EXPECT_EQ( 0, b[i] );
    }
    cout << endl;
        cout << endl;

    kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (1<<10)|0)->in((2<<10)|3);
    kernel->localFloats( 2 * 3 );
//    kernel->run_1d(12,4);
    kernel->run_1d(12,4);
    cl->finish();
    float expected2[] = { 5,6,7,
                         9,10,11 }; 
    for( int i = 0; i < 2; i++ ) {
        for( int j = 0; j < 3; j++ ) {
            cout << b[i*3+j] << " ";
            EXPECT_EQ( expected2[i*3+j], b[i*3+j] );
        }
        cout << endl;
    }
    cout << endl;
    for( int i = 6; i < 10; i++ ) {
        cout << b[i] << " ";
        EXPECT_EQ( 0, b[i] );
    }
    cout << endl;
        cout << endl;

    delete kernel;
    delete cl;
}
开发者ID:vzvzx,项目名称:DeepCL,代码行数:57,代码来源:testCopyBlock.cpp

示例15: CLKernel

CLKernel* CLKernel::Clone()
{
	CLKernel* pCLKernel = new CLKernel();

	pCLKernel->SetName(this->m_szName);
	pCLKernel->SetWorksize(this->m_unWorkSize);
	pCLKernel->SetPrefWorkSize(this->m_unPrefWorkSize);
	pCLKernel->SetReqdWorkSizes(this->m_pReqdWorkGroupSizes);
	pCLKernel->SetKernel(this->m_clKernel);
	pCLKernel->SetGlobalWorkGroupSize(this->m_stGlobalWorkGroupSize);
	pCLKernel->SetLocalWorkGroupSize(this->m_stLocalWorkGroupSize);

	return pCLKernel;
}
开发者ID:djm34,项目名称:skminer-amd,代码行数:14,代码来源:CLKernel.cpp


注:本文中的CLKernel类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。