本文整理汇总了C++中CLKernel::out方法的典型用法代码示例。如果您正苦于以下问题:C++ CLKernel::out方法的具体用法?C++ CLKernel::out怎么用?C++ CLKernel::out使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CLKernel
的用法示例。
在下文中一共展示了CLKernel::out方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: min
TEST(testlocal, reduceviascratch_multipleworkgroups_ints) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups_ints");
int workgroupSize = min(512, cl->getMaxWorkgroupSize());
const int numWorkgroups = workgroupSize;
const int N = workgroupSize * numWorkgroups;
cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
int *myarray = new int[N];
int sumViaCpu = 0;
int localSumViaCpu = 0;
int localSumViaCpu2 = 0;
int *localSumsViaCpu = new int[numWorkgroups];
memset(localSumsViaCpu, 0, sizeof(int)*numWorkgroups);
for(int i = 0; i < N; i++) {
myarray[i] = ((i + 7) * 3) % 50;
sumViaCpu += myarray[i];
if(i < workgroupSize) {
localSumViaCpu += myarray[i];
}
if(i >= workgroupSize && i < workgroupSize * 2) {
localSumViaCpu2 += myarray[i];
}
int workgroupId = i / workgroupSize;
localSumsViaCpu[workgroupId] += myarray[i];
}
ASSERT_EQ(localSumViaCpu, localSumsViaCpu[0]);
ASSERT_EQ(localSumViaCpu2, localSumsViaCpu[1]);
ASSERT_NE(myarray[0], sumViaCpu);
// Timer timer;
CLWrapper *a1wrapper = cl->wrap(N, myarray);
a1wrapper->copyToDevice();
int *a2 = new int[numWorkgroups];
CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
kernel->in(a1wrapper);
kernel->out(a2wrapper);
kernel->localInts(workgroupSize);
kernel->run_1d(N, workgroupSize);
int finalSum;
kernel->in(a2wrapper);
kernel->out(1, &finalSum);
kernel->localInts(workgroupSize);
kernel->run_1d(numWorkgroups, workgroupSize);
// timer.timeCheck("finished 2-way reduce");
EXPECT_EQ(sumViaCpu, finalSum);
delete a1wrapper;
delete a2wrapper;
delete[] a2;
delete[]myarray;
delete kernel;
delete cl;
}
示例2: assertEquals
TEST(test_scenario_te42kyfo, main) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
CLArrayFloat *out = cl->arrayFloat(5);
CLArrayFloat *in = cl->arrayFloat(5);
for(int i = 0; i < 5; i++) {
(*out)[i] = 0;
}
for(int i = 0; i < 100; i++) {
for(int n = 0; n < 5; n++) {
(*in)[n] = i*n;
}
kernel->in(in);
kernel->out(out);
kernel->run_1d(5, 5);
assertEquals(i*2 + 5, (*out)[2]);
assertEquals(i*4 + 5, (*out)[4]);
}
cout << "finished" << endl;
delete in;
delete out;
delete kernel;
delete cl;
}
示例3: THClStorage_get
// this runs an entire kernel to get one value. Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
// lgfgs expects a working implementation of this method
float THClStorage_get(THClState *state, const THClStorage *self, long index)
{
//// printf("THClStorage_get\n");
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");
// if( self->wrapper->isDeviceDirty() ) {
// if(state->trace) cout << "wrapper->copyToHost()" << endl;
// self->wrapper->copyToHost();
// }
// return self->data[index];
const char *uniqueName = __FILE__ ":get";
EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
CLKernel *kernel = 0;
if(cl->kernelExists(uniqueName)) {
kernel = cl->getKernel(uniqueName);
} else {
TemplatedKernel kernelBuilder(cl);
kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getGetKernelSource(), "THClStorageGet" );
}
float res;
kernel->out(1, &res);
kernel->in(self->wrapper);
kernel->in((int64_t)index);
kernel->run_1d(1, 1);
if(state->addFinish) cl->finish();
return res;
}
示例4:
TEST( testdefines, simple ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testdefines.cl", "testDefines",
"-D DOUBLE -D SOME_VALUE=5" );
float out[32];
kernel->out( 32, out );
kernel->run_1d( 32, 32 );
EXPECT_EQ( 10, out[3] );
delete kernel;
delete cl;
}
示例5: if
TEST( testMemset, basic ) {
EasyCL *cl = DeepCLGtestGlobals_createEasyCL();
CLKernel *kMemset = 0;
// [[[cog
// import stringify
// stringify.write_kernel2( "kMemset", "cl/memset.cl", "cl_memset", '""' )
// ]]]
// generated using cog, from cl/memset.cl:
const char * kMemsetSource =
"// Copyright Hugh Perkins 2015 hughperkins at gmail\n"
"//\n"
"// This Source Code Form is subject to the terms of the Mozilla Public License,\n"
"// v. 2.0. If a copy of the MPL was not distributed with this file, You can\n"
"// obtain one at http://mozilla.org/MPL/2.0/.\n"
"\n"
"kernel void cl_memset(global float *target, const float value, const int N) {\n"
" #define globalId get_global_id(0)\n"
" if ((int)globalId < N) {\n"
" target[globalId] = value;\n"
" }\n"
"}\n"
"\n"
"";
kMemset = cl->buildKernelFromString(kMemsetSource, "cl_memset", "", "cl/memset.cl");
// [[[end]]]
int N = 10000;
float *myArray = new float[N];
CLWrapper *myArrayWrapper = cl->wrap( N, myArray );
myArrayWrapper->createOnDevice();
kMemset->out( myArrayWrapper )->in( 99.0f )->in( N );
int workgroupSize = 64;
kMemset->run_1d( ( N + workgroupSize - 1 ) / workgroupSize * workgroupSize, workgroupSize );
cl->finish();
myArrayWrapper->copyToHost();
for( int i = 0; i < 10; i++ ) {
// cout << "myArray[" << i << "]=" << myArray[i] << endl;
}
for( int i = 0; i < N; i++ ) {
EXPECT_EQ( 99.0f, myArray[i] );
}
delete kMemset;
delete cl;
}
示例6: exit
TEST(testfloatarray, main) {
if(!EasyCL::isOpenCLAvailable()) {
cout << "opencl library not found" << endl;
exit(-1);
}
cout << "found opencl library" << endl;
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
float in[5];
float inout[5];
float out[5];
for(int i = 0; i < 5; i++) {
in[i] = i * 3;
inout[i] = i * 3;
}
kernel->in(5, in);
kernel->out(5, out);
kernel->inout(5, inout);
kernel->run_1d(5, 5);
for(int i = 0; i < 5; i++) {
cout << out[i] << " ";
}
cout << endl;
for(int i = 0; i < 5; i++) {
cout << inout[i] << " ";
}
cout << endl;
assertEquals(inout[0], 7);
assertEquals(inout[1] , 10);
assertEquals(inout[2] , 34);
assertEquals(inout[3] , 16);
assertEquals(inout[4], 19);
assertEquals(out[0] , 5);
assertEquals(out[1] , 8);
assertEquals(out[2] , 26);
assertEquals(out[3] , 14);
assertEquals(out[4] , 17);
cout << "tests completed ok" << endl;
delete kernel;
delete cl;
}
示例7: main
int main(int argc, char *argv[])
{
const int test_size = 128;
std::random_device rd;
std::seed_seq s{ rd(), rd(), rd(), rd(), rd(), rd(), rd(), rd() };
std::mt19937 mt(s);
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kern = cl->buildKernel("test.cl", "test");
std::vector<uint32_t> inbuf(test_size*4), outbuf(test_size), compare(test_size);
std::generate(inbuf.begin(), inbuf.end(), mt);
std::cout << "Running CL implementation" << std::endl;
kern->in(inbuf.size(), &inbuf[0]);
kern->out(outbuf.size(), &outbuf[0]);
size_t global_size[] = { test_size };
kern->run(1, global_size, nullptr);
delete kern;
std::cout << "Running local implementation" << std::endl;
for (int i = 0; i < compare.size(); ++i) {
compare[i] = inbuf[i] ^ inbuf[i + 1] ^ inbuf[i + 2] ^ inbuf[i + 3];
}
std::cout << "Comparing CL test with local implementation" << std::endl;
for (int i = 0; i < compare.size(); ++i) {
if (outbuf[i] != compare[i]) {
std::cout << "Error in index " << i << " " << outbuf[i] << " != " << compare[i] << std::endl;
}
}
return 0;
}