本文整理汇总了C++中CLWrapper::createOnDevice方法的典型用法代码示例。如果您正苦于以下问题:C++ CLWrapper::createOnDevice方法的具体用法?C++ CLWrapper::createOnDevice怎么用?C++ CLWrapper::createOnDevice使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CLWrapper
的用法示例。
在下文中一共展示了CLWrapper::createOnDevice方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: exit
TEST( SLOW_testintwrapper_huge, testreadwrite ) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_stress");
const int N = 1000000;
int *in = new int[N];
for( int i = 0; i < N; i++ ) {
in[i] = i * 3;
}
int *out = new int[N];
CLWrapper *inwrapper = cl->wrap(N, in);
CLWrapper *outwrapper = cl->wrap(N, out);
inwrapper->copyToDevice();
outwrapper->createOnDevice();
kernel->input( inwrapper );
kernel->output( outwrapper );
int globalSize = N;
int workgroupsize = cl->getMaxWorkgroupSize();
globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
kernel->run_1d( globalSize, workgroupsize );
outwrapper->copyToHost();
for( int i = 0; i < N; i++ ) {
if( out[i] != 689514 ) {
cout << "out[" << i << "] != 689514: " << out[i] << endl;
exit(-1);
}
}
delete outwrapper;
delete inwrapper;
delete kernel;
delete cl;
}
示例2: updateWeights
VIRTUAL void Adagrad::updateWeights(CLWrapper *weightsWrapper, CLWrapper *gradWeightsWrapper,
AdagradState *trainerState) {
int numWeights = trainerState->numWeights;
float *working = new float[ numWeights ];
CLWrapper *workingWrapper = cl->wrap(numWeights, working);
workingWrapper->createOnDevice();
CLMathWrapper clWeights(weightsWrapper);
CLMathWrapper clGradWeights(gradWeightsWrapper);
CLMathWrapper clSumSquares(trainerState->sumSquaresWrapper);
CLMathWrapper clWorking(workingWrapper);
// following all happens on gpu, via clmathwrapper:
clWorking = clGradWeights;
clWorking.squared();
clSumSquares += clWorking;
clWorking = clSumSquares;
clWorking.sqrt();
clWorking.inv();
clWorking *= clGradWeights;
clWorking *= - learningRate;
clWeights += clWorking;
delete workingWrapper;
delete[] working;
}
示例3: copy
void CopyBuffer::copy( EasyCL *cl, CLWrapper *sourceWrapper, int *target ) {
// first we will copy it to another buffer, so we can copy it out
int bufferSize = sourceWrapper->size();
// float *copiedBuffer = new float[ bufferSize ];
CLWrapper *targetWrapper = cl->wrap( bufferSize, target );
targetWrapper->createOnDevice();
// now copy it, via a kernel
const string kernelSource = "\n"
"kernel void copy( int N, global int const *source, global int *dest ) {\n"
" #define globalId ( get_global_id(0) )\n"
" if( (int)globalId < N ) {\n"
" dest[globalId] = source[globalId];\n"
" }\n"
" }\n";
CLKernel *kernel = cl->buildKernelFromString( kernelSource, "copy", "" );
kernel->in( bufferSize )->in( sourceWrapper )->out( targetWrapper );
int workgroupSize = 32;
int numWorkgroups = ( bufferSize + workgroupSize - 1 ) / workgroupSize;
kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
cl->finish();
targetWrapper->copyToHost();
delete targetWrapper;
delete kernel;
// delete[] copiedBuffer;
}
示例4: THClStorage_newWithSize
THClStorage* THClStorage_newWithSize(THClState *state, int device, long size)
{
THArgCheck(size >= 0, 2, "invalid size");
if(size > 0)
{
StatefulTimer::timeCheck("THClStorage_newWithSize START");
THClStorage *storage = (THClStorage*)THAlloc(sizeof(THClStorage));
float *data = new float[size];
storage->device = device;
storage->cl = THClState_getClv2(state, storage->device);
CLWrapper *wrapper = storage->cl->wrap( size, data );
if(state->trace) cout << "new wrapper, size " << size << endl;
if(state->trace) cout << "wrapper->createOnDevice()" << endl;
wrapper->createOnDevice();
storage->data = data;
storage->wrapper = wrapper;
storage->size = size;
storage->refcount = 1;
storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
StatefulTimer::timeCheck("THClStorage_newWithSize END");
return storage;
}
else
{
return THClStorage_newv2(state, device);
}
}
示例5: measurePerf
void measurePerf(int instance, int batchSize, LayerDimensions dim) {
EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
int inputNumElements = dim.inputCubeSize * batchSize;
int errorsSize = dim.outputCubeSize * batchSize;
int weightsSize = dim.filtersSize;
int errorsForUpstreamSize = dim.inputCubeSize * batchSize;
float *input = new float[inputNumElements];
float *errors = new float[errorsSize];
float *weights = new float[weightsSize];
WeightRandomizer::randomize(input, inputNumElements, -0.1f, 0.1f);
WeightRandomizer::randomize(errors, errorsSize, -0.1f, 0.1f);
WeightRandomizer::randomize(weights, weightsSize, -0.1f, 0.1f);
float *errorsForUpstream = new float[errorsForUpstreamSize];
CLWrapper *inputWrapper = cl->wrap(inputNumElements, input);
CLWrapper *errorsWrapper = cl->wrap(errorsSize, errors);
CLWrapper *weightsWrapper = cl->wrap(weightsSize, weights);
CLWrapper *errorsForUpstreamWrapper = cl->wrap(errorsForUpstreamSize, errorsForUpstream);
inputWrapper->copyToDevice();
errorsWrapper->copyToDevice();
weightsWrapper->copyToDevice();
errorsForUpstreamWrapper->createOnDevice();
StatefulTimer::timeCheck("after init");
Backward *backwardImpl = Backward::instanceSpecific(instance, cl, dim);
for(int it = 0; it < 40; it++) {
backwardImpl->backward(batchSize,
inputWrapper, errorsWrapper, weightsWrapper,
errorsForUpstreamWrapper);
}
StatefulTimer::timeCheck("after backprop");
StatefulTimer::dump(true);
delete errorsForUpstreamWrapper;
delete weightsWrapper;
delete inputWrapper;
delete errorsWrapper;
delete[] errors;
delete[] weights;
delete[] input;
delete[] errorsForUpstream;
delete backwardImpl;
delete cl;
}
示例6: if
TEST( testMemset, basic ) {
EasyCL *cl = DeepCLGtestGlobals_createEasyCL();
CLKernel *kMemset = 0;
// [[[cog
// import stringify
// stringify.write_kernel2( "kMemset", "cl/memset.cl", "cl_memset", '""' )
// ]]]
// generated using cog, from cl/memset.cl:
const char * kMemsetSource =
"// Copyright Hugh Perkins 2015 hughperkins at gmail\n"
"//\n"
"// This Source Code Form is subject to the terms of the Mozilla Public License,\n"
"// v. 2.0. If a copy of the MPL was not distributed with this file, You can\n"
"// obtain one at http://mozilla.org/MPL/2.0/.\n"
"\n"
"kernel void cl_memset(global float *target, const float value, const int N) {\n"
" #define globalId get_global_id(0)\n"
" if ((int)globalId < N) {\n"
" target[globalId] = value;\n"
" }\n"
"}\n"
"\n"
"";
kMemset = cl->buildKernelFromString(kMemsetSource, "cl_memset", "", "cl/memset.cl");
// [[[end]]]
int N = 10000;
float *myArray = new float[N];
CLWrapper *myArrayWrapper = cl->wrap( N, myArray );
myArrayWrapper->createOnDevice();
kMemset->out( myArrayWrapper )->in( 99.0f )->in( N );
int workgroupSize = 64;
kMemset->run_1d( ( N + workgroupSize - 1 ) / workgroupSize * workgroupSize, workgroupSize );
cl->finish();
myArrayWrapper->copyToHost();
for( int i = 0; i < 10; i++ ) {
// cout << "myArray[" << i << "]=" << myArray[i] << endl;
}
for( int i = 0; i < N; i++ ) {
EXPECT_EQ( 99.0f, myArray[i] );
}
delete kMemset;
delete cl;
}
示例7: clblasTeardown
TEST(testClBlas, basic) {
EasyCL *cl = DeepCLGtestGlobals_createEasyCL();
float A[] = {1, 3,
2, 7,
9, 5};
float B[] = {3,
-1};
float C[3];
ClBlasInstance clblasInstance;
CLWrapper *AWrap = cl->wrap(6, A);
CLWrapper *BWrap = cl->wrap(2, B);
CLWrapper *CWrap = cl->wrap(3, C);
AWrap->copyToDevice();
BWrap->copyToDevice();
CWrap->createOnDevice();
ClBlasHelper::Gemm(
cl,
clblasRowMajor,
clblasNoTrans, clblasNoTrans,
3, 2, 1,
1,
AWrap, 0,
BWrap, 0,
0,
CWrap, 0
);
cl->finish();
CWrap->copyToHost();
EXPECT_EQ(0, C[0]);
EXPECT_EQ(-1, C[1]);
EXPECT_EQ(22, C[2]);
cl->finish();
delete CWrap;
delete BWrap;
delete AWrap;
cl->finish();
delete cl;
clblasTeardown();
}
示例8: updateWeights
VIRTUAL void Adadelta::updateWeights( CLWrapper *weightsWrapper, CLWrapper *gradWeightsWrapper,
AdadeltaState *trainerState ) {
// need to calculate
// sumGradSquared = decay * sumGradSquared + (1 - decay ) * grad.square()
// update = - sumUpdateSquared.sqrt() / sumGradSquared.sqrt() * grad
// sumUpdateSquared = decay * sumUpdateSquared + ( 1 - decay ) * update.squared()
// weights += update
int numWeights = trainerState->numWeights;
float *working = new float[ numWeights ];
CLWrapper *workingWrapper = cl->wrap( numWeights, working );
workingWrapper->createOnDevice();
CLMathWrapper clWeights( weightsWrapper );
CLMathWrapper clGradWeights( gradWeightsWrapper );
CLMathWrapper clSumGradSquared( trainerState->sumGradSquaredWrapper );
CLMathWrapper clSumUpdateSquared( trainerState->sumUpdateSquaredWrapper );
CLMathWrapper clWorking( workingWrapper );
// following all happens on gpu, via clmathwrapper:
clWorking = clGradWeights;
clWorking.squared();
clWorking *= ( 1 - decay );
clSumGradSquared *= decay;
clSumGradSquared += clWorking;
clWorking = clSumGradSquared;
clWorking.inv();
clWorking *= clSumUpdateSquared;
clWorking.sqrt();
clWorking *= clGradWeights;
clWorking *= - 1;
clWeights += clWorking;
clSumUpdateSquared *= decay;
clWorking.squared();
clWorking *= ( 1 - decay );
clSumUpdateSquared += clWorking;
delete workingWrapper;
delete[] working;
}
示例9: THClBlas_dot
float THClBlas_dot(THClState *state, long n,
CLWrapper *xwrapper, long xoffset, long incx,
CLWrapper *ywrapper, long yoffset, long incy)
{
if(n == 1)
{
incx = 1;
incy = 1;
}
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
float result;
cl_int err;
err = clblasSetup();
if (err != CL_SUCCESS) {
THError("clblasSetup() failed with %d", err);
}
CLWrapper *resultWrapper = state->cl->wrap( 1, &result );
float *scratch = new float[i_n];
CLWrapper *scratchWrapper = state->cl->wrap(i_n, scratch);
scratchWrapper->createOnDevice();
resultWrapper->createOnDevice();
cl_event event = NULL;
err = clblasSdot( i_n, resultWrapper->getBuffer(), 0,
xwrapper->getBuffer(), xoffset, i_incx,
ywrapper->getBuffer(), yoffset, i_incy,
scratchWrapper->getBuffer(),
1, state->cl->queue, 0, NULL, &event);
// THCublasCheck(cublasSdot(*state->blasState->current_handle, i_n, x, i_incx, y, i_incy, &result));
// ClDeviceSynchronize();
if (err != CL_SUCCESS) {
THError("clblasSdot() failed with %d", err);
}
else {
/* Wait for calculations to be finished. */
err = clWaitForEvents(1, &event);
}
resultWrapper->copyToHost();
/* Finalize work with clblas. */
clblasTeardown();
delete resultWrapper;
delete scratchWrapper;
delete[] scratch;
return result;
}
THError("Cublas_dot only supports n, incx and incy "
"upto signed integer limits: %d", INT_MAX);
THError("Not implemented");
return -1;
}
示例10: THClBlas_gemm2
void THClBlas_gemm2(THClState *state, char orderchar, char transa, char transb, long m, long n, long k, float alpha,
THClTensor *a, long lda, THClTensor *b, long ldb, float beta,
THClTensor *c, long ldc)
{
StatefulTimer::timeCheck("THClBlas_gemm START");
CLWrapper *aWrapper = THClTensor_wrapper(state, a);
CLWrapper *bWrapper = THClTensor_wrapper(state, b);
CLWrapper *cWrapper = THClTensor_wrapper(state, c);
long offseta = THClTensor_storageOffset(state, a);
long offsetb = THClTensor_storageOffset(state, b);
long offsetc = THClTensor_storageOffset(state, c);
// adjustLd(transa, transb, m, n, k, &lda, &ldb, &ldc);
clblasTranspose opa = convertTransToClblasOperation(transa);
clblasTranspose opb = convertTransToClblasOperation(transb);
clblasOrder order = orderchar == 'c' ? clblasColumnMajor : clblasRowMajor;
if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) )
{
int i_m = (int)m;
int i_n = (int)n;
int i_k = (int)k;
int i_lda = (int)lda;
int i_ldb = (int)ldb;
int i_ldc = (int)ldc;
cl_int err;
if( !aWrapper->isOnDevice() ) {
aWrapper->createOnDevice();
}
if( !bWrapper->isOnDevice() ) {
bWrapper->createOnDevice();
}
if( !cWrapper->isOnDevice() ) {
cWrapper->createOnDevice();
}
EasyCL *cl = cWrapper->getCl();
cl_event *event = 0;
if(state->addFinish) {
event = new cl_event();
}
err = clblasSgemm(order, opa, opb, i_m, i_n, i_k,
alpha, aWrapper->getBuffer(), offseta, i_lda,
bWrapper->getBuffer(), offsetb, i_ldb, beta,
cWrapper->getBuffer(), offsetc, i_ldc,
1, cl->queue, 0, NULL, event);
if (err != CL_SUCCESS) {
THError("clblasSgemm() failed with %d", err);
}
else {
if(state->addFinish) {
err = clWaitForEvents(1, event);
if (err != CL_SUCCESS) {
// throw runtime_error("clblasSger() failed with " + easycl::toString(err));
THError("clblasSger: wait for event failed with %d", err);
}
clReleaseEvent(*event);
delete event;
}
}
cWrapper->markDeviceDirty();
StatefulTimer::timeCheck("THClBlas_gemm END");
return;
}
THError("Clblas_gemm only supports m, n, k, lda, ldb, ldc"
"with the bound [val] <= %d", INT_MAX);
}
示例11: THClBlas_ger
void THClBlas_ger(THClState *state, long m, long n, float alpha,
THClTensor *x, long incx,
THClTensor *y, long incy,
THClTensor *a, long lda)
{
StatefulTimer::timeCheck("THClBlas_ger START");
if(n == 1)
lda = m;
if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_m = (int)m;
int i_n = (int)n;
int i_lda = (int)lda;
int i_incx = (int)incx;
int i_incy = (int)incy;
cl_int err;
CLWrapper *xwrap = THClTensor_wrapper(state, x);
CLWrapper *ywrap = THClTensor_wrapper(state, y);
CLWrapper *awrap = THClTensor_wrapper(state, a);
long x_offset = THClTensor_storageOffset(state, x);
long y_offset = THClTensor_storageOffset(state, y);
long a_offset = THClTensor_storageOffset(state, a);
if(!awrap->isOnDevice()) {
awrap->createOnDevice();
}
EasyCL *cl = ywrap->getCl();
cl_event *event = 0;
if(state->addFinish) {
event = new cl_event();
}
err = clblasSger(clblasColumnMajor, i_m, i_n, alpha,
xwrap->getBuffer(), x_offset, i_incx,
ywrap->getBuffer(), y_offset, i_incy,
awrap->getBuffer(), a_offset, i_lda,
1, (cl->queue), 0, NULL, event);
if (err != CL_SUCCESS) {
// throw runtime_error("clblasSger() failed with " + easycl::toString(err));
THError("clblasSger() failed with %d", err);
}
else {
if(state->addFinish) {
err = clWaitForEvents(1, event);
if (err != CL_SUCCESS) {
// throw runtime_error("clblasSger() failed with " + easycl::toString(err));
THError("clblasSger: wait for event failed with %d", err);
}
clReleaseEvent(*event);
delete event;
}
}
awrap->markDeviceDirty();
StatefulTimer::timeCheck("THClBlas_ger END");
return;
}
THError("Cublas_ger only supports m, n, lda, incx, incy"
"with the bound [val] <= %d", INT_MAX);
}
示例12: THClBlas_dot
float THClBlas_dot(THClState *state, long n,
CLWrapper *xwrapper, long xoffset, long incx,
CLWrapper *ywrapper, long yoffset, long incy)
{
StatefulTimer::timeCheck("THClBlas_dot START");
if(n == 1)
{
incx = 1;
incy = 1;
}
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
float result;
cl_int err;
EasyCL *cl = ywrapper->getCl();
CLWrapper *resultWrapper = cl->wrap( 1, &result );
float *scratch = new float[i_n];
CLWrapper *scratchWrapper = cl->wrap(i_n, scratch);
scratchWrapper->createOnDevice();
resultWrapper->createOnDevice();
cl_event *event = 0;
// if(state->addFinish) {
event = new cl_event();
// }
err = clblasSdot( i_n, resultWrapper->getBuffer(), 0,
xwrapper->getBuffer(), xoffset, i_incx,
ywrapper->getBuffer(), yoffset, i_incy,
scratchWrapper->getBuffer(),
1, cl->queue, 0, NULL, event);
if (err != CL_SUCCESS) {
THError("clblasSdot() failed with %d", err);
}
else {
// if(state->addFinish) {
err = clWaitForEvents(1, event);
if (err != CL_SUCCESS) {
// throw runtime_error("clblasSger() failed with " + easycl::toString(err));
THError("clblasSger: wait for event failed with %d", err);
}
clReleaseEvent(*event);
delete event;
// }
}
// TODO: NOT copy to host here, use pointtensor
resultWrapper->copyToHost();
//resultWrapper->markDeviceDirty();
delete resultWrapper;
delete scratchWrapper;
delete[] scratch;
StatefulTimer::timeCheck("THClBlas_dot END");
return result;
}
THError("Cublas_dot only supports n, incx and incy "
"upto signed integer limits: %d", INT_MAX);
THError("Not implemented");
return -1;
}