本文整理汇总了C++中CLKernel::run_1d方法的典型用法代码示例。如果您正苦于以下问题:C++ CLKernel::run_1d方法的具体用法?C++ CLKernel::run_1d怎么用?C++ CLKernel::run_1d使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CLKernel
的用法示例。
在下文中一共展示了CLKernel::run_1d方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: memset
TEST( testCopyBlock, basic ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
float a[] = { 1,2,3,4,
5,6,7,8,
9,10,11,12 };
float b[10];
memset(b, 0, sizeof(float)*10);
CLKernel *kernel = makeBasicKernel( cl );
kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (0<<10)|1)->in((2<<10)|3);
kernel->localFloats( 2 * 3 );
kernel->run_1d(12,4);
// kernel->run_1d(12,12);
cl->finish();
float expected[] = { 2,3,4,
6,7,8 };
for( int i = 0; i < 2; i++ ) {
for( int j = 0; j < 3; j++ ) {
cout << b[i*3+j] << " ";
EXPECT_EQ( expected[i*3+j], b[i*3+j] );
}
cout << endl;
}
cout << endl;
for( int i = 6; i < 10; i++ ) {
cout << b[i] << " ";
EXPECT_EQ( 0, b[i] );
}
cout << endl;
cout << endl;
kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (1<<10)|0)->in((2<<10)|3);
kernel->localFloats( 2 * 3 );
// kernel->run_1d(12,4);
kernel->run_1d(12,4);
cl->finish();
float expected2[] = { 5,6,7,
9,10,11 };
for( int i = 0; i < 2; i++ ) {
for( int j = 0; j < 3; j++ ) {
cout << b[i*3+j] << " ";
EXPECT_EQ( expected2[i*3+j], b[i*3+j] );
}
cout << endl;
}
cout << endl;
for( int i = 6; i < 10; i++ ) {
cout << b[i] << " ";
EXPECT_EQ( 0, b[i] );
}
cout << endl;
cout << endl;
delete kernel;
delete cl;
}
示例2: min
TEST(SLOW_testlocal, selfdot_3levels_withoutscratch) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testlocal.cl", "selfdot_ints_withoutscratch");
int workgroupSize = min(512, cl->getMaxWorkgroupSize());
const int numWorkgroups = workgroupSize;
const int level3size = numWorkgroups / 4;
const int N = workgroupSize * numWorkgroups * level3size;
cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
int *myarray = new int[N];
for(int i = 0; i < N; i++) {
myarray[i] = ((i + 7) * 3) % 5;
}
// Timer timer;
CLWrapper *a1wrapper = cl->wrap(N, myarray);
a1wrapper->copyToDevice();
// timer.timeCheck("copied array to device");
int *second = new int[N];
CLWrapper *secondwrapper = cl->wrap(N, second);
int *a2 = new int[numWorkgroups*level3size];
CLWrapper *a2wrapper = cl->wrap(numWorkgroups * level3size, a2);
kernel->in(a1wrapper);
kernel->out(secondwrapper);
kernel->out(a2wrapper);
kernel->run_1d(N, workgroupSize);
cl->finish();
int *a3 = new int[numWorkgroups];
CLWrapper *a3wrapper = cl->wrap(level3size, a3);
kernel->in(a2wrapper);
kernel->out(secondwrapper);
kernel->out(a3wrapper);
kernel->run_1d(workgroupSize * level3size, workgroupSize);
cl->finish();
int finalSum;
kernel->in(a3wrapper);
kernel->out(secondwrapper);
kernel->out(1, &finalSum);
kernel->run_1d(level3size, level3size);
// timer.timeCheck("finished 3-level reduce");
EXPECT_EQ(-1306309159, finalSum);
delete a1wrapper;
delete a2wrapper;
delete a3wrapper;
delete secondwrapper;
delete[] a3;
delete[] second;
delete[] a2;
delete[]myarray;
delete kernel;
delete cl;
}
示例3: memset
TEST( testCopyLocal, basic ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
float a[] = { 1,2,3,4,
5,6,7,8,
9,10,11,12 };
float b[16];
memset(b, 0, sizeof(float)*16);
CLKernel *kernel = makeKernel( cl );
kernel->in( 12, a )->out( 16, b )->in( 12 );
kernel->localFloats( 12 );
kernel->run_1d(12,12);
cl->finish();
for( int i = 0; i < 3; i++ ) {
for( int j = 0; j < 4; j++ ) {
cout << b[i*4+j] << " ";
EXPECT_EQ( i * 4 + j + 1, b[i*4+j] );
}
cout << endl;
}
cout << endl;
for( int i = 12; i < 16; i++ ) {
cout << b[i] << " ";
EXPECT_EQ( 0, b[i] );
}
cout << endl;
delete kernel;
delete cl;
}
示例4: copy
void CopyBuffer::copy( EasyCL *cl, CLWrapper *sourceWrapper, int *target ) {
// first we will copy it to another buffer, so we can copy it out
int bufferSize = sourceWrapper->size();
// float *copiedBuffer = new float[ bufferSize ];
CLWrapper *targetWrapper = cl->wrap( bufferSize, target );
targetWrapper->createOnDevice();
// now copy it, via a kernel
const string kernelSource = "\n"
"kernel void copy( int N, global int const *source, global int *dest ) {\n"
" #define globalId ( get_global_id(0) )\n"
" if( (int)globalId < N ) {\n"
" dest[globalId] = source[globalId];\n"
" }\n"
" }\n";
CLKernel *kernel = cl->buildKernelFromString( kernelSource, "copy", "" );
kernel->in( bufferSize )->in( sourceWrapper )->out( targetWrapper );
int workgroupSize = 32;
int numWorkgroups = ( bufferSize + workgroupSize - 1 ) / workgroupSize;
kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
cl->finish();
targetWrapper->copyToHost();
delete targetWrapper;
delete kernel;
// delete[] copiedBuffer;
}
示例5: THClStorage_get
// this runs an entire kernel to get one value. Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
// lgfgs expects a working implementation of this method
float THClStorage_get(THClState *state, const THClStorage *self, long index)
{
//// printf("THClStorage_get\n");
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");
// if( self->wrapper->isDeviceDirty() ) {
// if(state->trace) cout << "wrapper->copyToHost()" << endl;
// self->wrapper->copyToHost();
// }
// return self->data[index];
const char *uniqueName = __FILE__ ":get";
EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
CLKernel *kernel = 0;
if(cl->kernelExists(uniqueName)) {
kernel = cl->getKernel(uniqueName);
} else {
TemplatedKernel kernelBuilder(cl);
kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getGetKernelSource(), "THClStorageGet" );
}
float res;
kernel->out(1, &res);
kernel->in(self->wrapper);
kernel->in((int64_t)index);
kernel->run_1d(1, 1);
if(state->addFinish) cl->finish();
return res;
}
示例6: THClStorage_set
// this runs an entire kernel to get one value. Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
void THClStorage_set(THClState *state, THClStorage *self, long index, float value)
{
//// cout << "set size=" << self->size << " index=" << index << " value=" << value << endl;
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");
// if( self->wrapper->isDeviceDirty() ) { // we have to do this, since we're going to copy it all back again
// // although I suppose we could set via a kernel perhaps
// // either way, this function is pretty inefficient right now :-P
// if(state->trace) cout << "wrapper->copyToHost() size " << self->size << endl;
// self->wrapper->copyToHost();
// }
// self->data[index] = value;
// if(state->trace) cout << "wrapper->copyToDevice() size " << self->size << endl;
// self->wrapper->copyToDevice();
const char *uniqueName = __FILE__ ":set";
EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
CLKernel *kernel = 0;
if(cl->kernelExists(uniqueName)) {
kernel = cl->getKernel(uniqueName);
} else {
TemplatedKernel kernelBuilder(cl);
kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getSetKernelSource(), "THClStorageSet" );
}
kernel->inout(self->wrapper);
kernel->in((int64_t)index);
kernel->in(value);
kernel->run_1d(1, 1);
if(state->addFinish) cl->finish();
}
示例7: exit
TEST( SLOW_testintwrapper_huge, testread ) {
Timer timer;
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_read");
// const int N = 4500000;
// const int N = (4500000/512)*512;
int N = 100000;
int *out = new int[N];
CLWrapper *outwrapper = cl->wrap(N, out);
kernel->in(3)->in(7);
kernel->output( outwrapper );
int globalSize = N;
int workgroupsize = cl->getMaxWorkgroupSize();
globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
timer.timeCheck("before kernel");
kernel->run_1d( globalSize, workgroupsize );
timer.timeCheck("after kernel");
outwrapper->copyToHost();
timer.timeCheck("after copy to host");
for( int i = 0; i < N; i++ ) {
if( out[i] != 4228 ) {
cout << "out[" << i << "] != 4228: " << out[i] << endl;
exit(-1);
}
}
delete outwrapper;
delete kernel;
delete cl;
}
示例8: assertEquals
TEST(test_scenario_te42kyfo, main) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
CLArrayFloat *out = cl->arrayFloat(5);
CLArrayFloat *in = cl->arrayFloat(5);
for(int i = 0; i < 5; i++) {
(*out)[i] = 0;
}
for(int i = 0; i < 100; i++) {
for(int n = 0; n < 5; n++) {
(*in)[n] = i*n;
}
kernel->in(in);
kernel->out(out);
kernel->run_1d(5, 5);
assertEquals(i*2 + 5, (*out)[2]);
assertEquals(i*4 + 5, (*out)[4]);
}
cout << "finished" << endl;
delete in;
delete out;
delete kernel;
delete cl;
}
示例9: exit
TEST(testfloatwrapperconst, main) {
if(!EasyCL::isOpenCLAvailable()) {
cout << "opencl library not found" << endl;
exit(-1);
}
cout << "found opencl library" << endl;
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
float in[5];
for(int i = 0; i < 5; i++) {
in[i] = i * 3;
}
float out[5];
CLWrapper *inwrapper = cl->wrap(5, (float const *)in);
CLWrapper *outwrapper = cl->wrap(5, out);
inwrapper->copyToDevice();
kernel->input(inwrapper);
kernel->output(outwrapper);
kernel->run_1d(5, 5);
outwrapper->copyToHost();
assertEquals(out[0] , 7);
assertEquals(out[1] , 10);
assertEquals(out[2] , 13);
assertEquals(out[3] , 16);
assertEquals(out[4] , 19);
cout << "tests completed ok" << endl;
delete inwrapper;
delete outwrapper;
delete kernel;
delete cl;
}
示例10:
TEST( testdefines, simple ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testdefines.cl", "testDefines",
"-D DOUBLE -D SOME_VALUE=5" );
float out[32];
kernel->out( 32, out );
kernel->run_1d( 32, 32 );
EXPECT_EQ( 10, out[3] );
delete kernel;
delete cl;
}
示例11:
TEST(testlocal, notUselocal) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testlocal.cl", "notUseLocal");
int workgroupSize = 64;
float *myarray = new float[workgroupSize];
kernel->in(workgroupSize);
kernel->inout(workgroupSize, myarray);
kernel->run_1d(workgroupSize, workgroupSize);
delete[]myarray;
delete kernel;
delete cl;
}
示例12: if
TEST( testMemset, basic ) {
EasyCL *cl = DeepCLGtestGlobals_createEasyCL();
CLKernel *kMemset = 0;
// [[[cog
// import stringify
// stringify.write_kernel2( "kMemset", "cl/memset.cl", "cl_memset", '""' )
// ]]]
// generated using cog, from cl/memset.cl:
const char * kMemsetSource =
"// Copyright Hugh Perkins 2015 hughperkins at gmail\n"
"//\n"
"// This Source Code Form is subject to the terms of the Mozilla Public License,\n"
"// v. 2.0. If a copy of the MPL was not distributed with this file, You can\n"
"// obtain one at http://mozilla.org/MPL/2.0/.\n"
"\n"
"kernel void cl_memset(global float *target, const float value, const int N) {\n"
" #define globalId get_global_id(0)\n"
" if ((int)globalId < N) {\n"
" target[globalId] = value;\n"
" }\n"
"}\n"
"\n"
"";
kMemset = cl->buildKernelFromString(kMemsetSource, "cl_memset", "", "cl/memset.cl");
// [[[end]]]
int N = 10000;
float *myArray = new float[N];
CLWrapper *myArrayWrapper = cl->wrap( N, myArray );
myArrayWrapper->createOnDevice();
kMemset->out( myArrayWrapper )->in( 99.0f )->in( N );
int workgroupSize = 64;
kMemset->run_1d( ( N + workgroupSize - 1 ) / workgroupSize * workgroupSize, workgroupSize );
cl->finish();
myArrayWrapper->copyToHost();
for( int i = 0; i < 10; i++ ) {
// cout << "myArray[" << i << "]=" << myArray[i] << endl;
}
for( int i = 0; i < N; i++ ) {
EXPECT_EQ( 99.0f, myArray[i] );
}
delete kMemset;
delete cl;
}
示例13: exit
TEST(testfloatarray, main) {
if(!EasyCL::isOpenCLAvailable()) {
cout << "opencl library not found" << endl;
exit(-1);
}
cout << "found opencl library" << endl;
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
float in[5];
float inout[5];
float out[5];
for(int i = 0; i < 5; i++) {
in[i] = i * 3;
inout[i] = i * 3;
}
kernel->in(5, in);
kernel->out(5, out);
kernel->inout(5, inout);
kernel->run_1d(5, 5);
for(int i = 0; i < 5; i++) {
cout << out[i] << " ";
}
cout << endl;
for(int i = 0; i < 5; i++) {
cout << inout[i] << " ";
}
cout << endl;
assertEquals(inout[0], 7);
assertEquals(inout[1] , 10);
assertEquals(inout[2] , 34);
assertEquals(inout[3] , 16);
assertEquals(inout[4], 19);
assertEquals(out[0] , 5);
assertEquals(out[1] , 8);
assertEquals(out[2] , 26);
assertEquals(out[3] , 14);
assertEquals(out[4] , 17);
cout << "tests completed ok" << endl;
delete kernel;
delete cl;
}
示例14: buildKernel
VIRTUAL void GpuOp::apply1_inplace( int N, CLWrapper*destinationWrapper, Op1 *op ) {
StatefulTimer::instance()->timeCheck("GpuOp::apply inplace start" );
string kernelName = "GpuOp::" + op->getName() + "_inplace";
if( !cl->kernelExists( kernelName ) ) {
buildKernel( kernelName, op, true );
}
CLKernel *kernel = cl->getKernel( kernelName );
kernel->in( N );
kernel->inout( destinationWrapper );
int globalSize = N;
int workgroupSize = 64;
int numWorkgroups = ( globalSize + workgroupSize - 1 ) / workgroupSize;
kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
cl->finish();
StatefulTimer::instance()->timeCheck("GpuOp::apply inplace end" );
}
示例15: makeTestPosKernel
TEST( testCopyBlock, testPos ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
float in[12];
float res[12];
in[0] = ( 3 << 10 ) | 4;
in[1] = 8;
in[2] = 14;
for( int i = 0; i < 3; i++ ) {
cout << "in[" << i << "]=" << in[i] << endl;
}
CLKernel *kernel = makeTestPosKernel( cl );
kernel->in( 12, in )->out( 12, res );
kernel->run_1d(1,1);
cl->finish();
for( int i = 0; i < 5; i++ ) {
cout << "res[" << i << "]=" << res[i] << endl;
}
}