本文整理汇总了C++中cuda_make_array函数的典型用法代码示例。如果您正苦于以下问题:C++ cuda_make_array函数的具体用法?C++ cuda_make_array怎么用?C++ cuda_make_array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cuda_make_array函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: time_ongpu
void time_ongpu(int TA, int TB, int m, int k, int n)
{
int iter = 10;
float *a = random_matrix(m,k);
float *b = random_matrix(k,n);
int lda = (!TA)?k:m;
int ldb = (!TB)?n:k;
float *c = random_matrix(m,n);
float *a_cl = cuda_make_array(a, m*k);
float *b_cl = cuda_make_array(b, k*n);
float *c_cl = cuda_make_array(c, m*n);
int i;
clock_t start = clock(), end;
for(i = 0; i<iter; ++i){
gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
cudaThreadSynchronize();
}
double flop = ((double)m)*n*(2.*k + 2.)*iter;
double gflop = flop/pow(10., 9);
end = clock();
double seconds = sec(end-start);
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
cuda_free(a_cl);
cuda_free(b_cl);
cuda_free(c_cl);
free(a);
free(b);
free(c);
}
示例2: make_shortcut_layer
layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2)
{
fprintf(stderr,"Shortcut Layer: %d\n", index);
layer l = {0};
l.type = SHORTCUT;
l.batch = batch;
l.w = w2;
l.h = h2;
l.c = c2;
l.out_w = w;
l.out_h = h;
l.out_c = c;
l.outputs = w*h*c;
l.inputs = l.outputs;
l.index = index;
l.delta = calloc(l.outputs*batch, sizeof(float));
l.output = calloc(l.outputs*batch, sizeof(float));;
l.forward = forward_shortcut_layer;
l.backward = backward_shortcut_layer;
#ifdef GPU
l.forward_gpu = forward_shortcut_layer_gpu;
l.backward_gpu = backward_shortcut_layer_gpu;
l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch);
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
#endif
return l;
}
示例3: make_maxpool_layer
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
{
maxpool_layer l = {};
l.type = MAXPOOL;
l.batch = batch;
l.h = h;
l.w = w;
l.c = c;
l.pad = padding;
l.out_w = (w + 2*padding)/stride;
l.out_h = (h + 2*padding)/stride;
l.out_c = c;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c;
l.size = size;
l.stride = stride;
int output_size = l.out_h * l.out_w * l.out_c * batch;
l.indexes = (int*)calloc(output_size, sizeof(int));
l.output = (float*)calloc(output_size, sizeof(float));
l.delta = (float*)calloc(output_size, sizeof(float));
l.forward = forward_maxpool_layer;
l.backward = backward_maxpool_layer;
#ifdef GPU
l.forward_gpu = forward_maxpool_layer_gpu;
l.backward_gpu = backward_maxpool_layer_gpu;
l.indexes_gpu = cuda_make_int_array(output_size);
l.output_gpu = cuda_make_array(l.output, output_size);
l.delta_gpu = cuda_make_array(l.delta, output_size);
#endif
fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
return l;
}
示例4: make_compact_layer
layer make_compact_layer(int batch, int splits, int method, int w, int h, int c)
{
fprintf(stderr,"Compact Layer: Split and merge channels in %d groups. %d %d %d\n", splits,w ,h ,c);
layer l = {0};
l.type = COMPACT;
l.batch = batch;
l.w = w;
l.h = h;
l.c = c;
l.out_w = w;
l.out_h = h;
if (method<10)
{
l.out_c = c/splits;
} else
{
l.out_c = c;
}
l.outputs = l.out_w * l.out_h * l.out_c;
l.inputs = w*h*c;
l.index = splits;
l.method = method;
l.delta = calloc(l.outputs*batch, sizeof(float));
l.output = calloc(l.outputs*batch, sizeof(float));;
l.indexes = calloc(l.outputs*batch, sizeof(int));
#ifdef GPU
l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch);
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
l.indexes_gpu = cuda_make_int_array(l.outputs*batch);
#endif
return l;
}
示例5: make_cost_layer
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
{
fprintf(stderr, "cost %4d\n", inputs);
cost_layer l = {0};
l.type = COST;
l.scale = scale;
l.batch = batch;
l.inputs = inputs;
l.outputs = inputs;
l.cost_type = cost_type;
l.delta = calloc(inputs*batch, sizeof(float));
l.output = calloc(inputs*batch, sizeof(float));
l.cost = calloc(1, sizeof(float));
l.forward = forward_cost_layer;
l.backward = backward_cost_layer;
#ifdef GPU
l.forward_gpu = forward_cost_layer_gpu;
l.backward_gpu = backward_cost_layer_gpu;
l.delta_gpu = cuda_make_array(l.output, inputs*batch);
l.output_gpu = cuda_make_array(l.delta, inputs*batch);
#endif
return l;
}
示例6: make_crop_layer
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
crop_layer l = {0};
l.type = CROP;
l.batch = batch;
l.h = h;
l.w = w;
l.c = c;
l.flip = flip;
l.angle = angle;
l.saturation = saturation;
l.exposure = exposure;
l.crop_width = crop_width;
l.crop_height = crop_height;
l.out_w = crop_width;
l.out_h = crop_height;
l.out_c = c;
l.inputs = l.w * l.h * l.c;
l.outputs = l.out_w * l.out_h * l.out_c;
l.output = calloc(crop_width*crop_height * c*batch, sizeof(float));
#ifdef GPU
l.output_gpu = cuda_make_array(l.output, crop_width*crop_height*c*batch);
l.rand_gpu = cuda_make_array(0, l.batch*8);
#endif
return l;
}
示例7: make_route_layer
route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes)
{
fprintf(stderr,"Route Layer:");
route_layer l = {0};
l.type = ROUTE;
l.batch = batch;
l.n = n;
l.input_layers = input_layers;
l.input_sizes = input_sizes;
int i;
int outputs = 0;
for(i = 0; i < n; ++i){
fprintf(stderr," %d", input_layers[i]);
outputs += input_sizes[i];
}
fprintf(stderr, "\n");
l.outputs = outputs;
l.inputs = outputs;
l.delta = calloc(outputs*batch, sizeof(float));
l.output = calloc(outputs*batch, sizeof(float));;
#ifdef GPU
l.delta_gpu = cuda_make_array(0, outputs*batch);
l.output_gpu = cuda_make_array(0, outputs*batch);
#endif
return l;
}
示例8: make_crop_layer
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height,
int crop_width, int flip, real_t angle, real_t saturation,
real_t exposure) {
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h, w,
crop_height, crop_width, c);
crop_layer l = { 0 };
l.type = CROP;
l.batch = batch;
l.h = h;
l.w = w;
l.c = c;
l.scale = (real_t) crop_height / h;
l.flip = flip;
l.angle = angle;
l.saturation = saturation;
l.exposure = exposure;
l.out_w = crop_width;
l.out_h = crop_height;
l.out_c = c;
l.inputs = l.w * l.h * l.c;
l.outputs = l.out_w * l.out_h * l.out_c;
l.output = calloc(l.outputs * batch, sizeof(real_t));
l.forward = forward_crop_layer;
l.backward = backward_crop_layer;
#ifdef GPU
l.forward_gpu = forward_crop_layer_gpu;
l.backward_gpu = backward_crop_layer_gpu;
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
l.rand_gpu = cuda_make_array(0, l.batch*8);
#endif
return l;
}
示例9: resize_reorg_old_layer
void resize_reorg_old_layer(layer *l, int w, int h)
{
int stride = l->stride;
int c = l->c;
l->h = h;
l->w = w;
if(l->reverse){
l->out_w = w*stride;
l->out_h = h*stride;
l->out_c = c/(stride*stride);
}else{
l->out_w = w/stride;
l->out_h = h/stride;
l->out_c = c*(stride*stride);
}
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->outputs;
int output_size = l->outputs * l->batch;
l->output = realloc(l->output, output_size * sizeof(float));
l->delta = realloc(l->delta, output_size * sizeof(float));
#ifdef GPU
cuda_free(l->output_gpu);
cuda_free(l->delta_gpu);
l->output_gpu = cuda_make_array(l->output, output_size);
l->delta_gpu = cuda_make_array(l->delta, output_size);
#endif
}
示例10: make_detection_layer
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore, const bool b_debug)
{
detection_layer l = {0};
l.type = DETECTION;
l.n = n;
l.batch = batch;
l.inputs = inputs;
l.classes = classes;
l.coords = coords;
l.rescore = rescore;
l.b_debug = b_debug;
l.side = side;
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
l.cost = calloc(1, sizeof(float));
l.outputs = l.inputs;
l.truths = l.side*l.side*(1+l.coords+l.classes);
l.output = calloc(batch*l.outputs, sizeof(float));
l.delta = calloc(batch*l.outputs, sizeof(float));
#ifdef GPU
l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif
fprintf(stderr, "Detection Layer\n");
srand(0);
return l;
}
示例11: resize_convolutional_layer
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
{
l->w = w;
l->h = h;
int out_w = convolutional_out_width(*l);
int out_h = convolutional_out_height(*l);
l->out_w = out_w;
l->out_h = out_h;
l->outputs = l->out_h * l->out_w * l->out_c;
l->inputs = l->w * l->h * l->c;
l->col_image = realloc(l->col_image,
out_h*out_w*l->size*l->size*l->c*sizeof(float));
l->output = realloc(l->output,
l->batch*out_h * out_w * l->n*sizeof(float));
l->delta = realloc(l->delta,
l->batch*out_h * out_w * l->n*sizeof(float));
#ifdef GPU
cuda_free(l->col_image_gpu);
cuda_free(l->delta_gpu);
cuda_free(l->output_gpu);
l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c);
l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n);
l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n);
#endif
}
示例12: make_avgpool_layer
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) {
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h,
c, c);
avgpool_layer l = { 0 };
l.type = AVGPOOL;
l.batch = batch;
l.h = h;
l.w = w;
l.c = c;
l.out_w = 1;
l.out_h = 1;
l.out_c = c;
l.outputs = l.out_c;
l.inputs = h * w * c;
int output_size = l.outputs * batch;
l.output = calloc(output_size, sizeof(real_t));
l.delta = calloc(output_size, sizeof(real_t));
l.forward = forward_avgpool_layer;
l.backward = backward_avgpool_layer;
#ifdef GPU
l.forward_gpu = forward_avgpool_layer_gpu;
l.backward_gpu = backward_avgpool_layer_gpu;
l.output_gpu = cuda_make_array(l.output, output_size);
l.delta_gpu = cuda_make_array(l.delta, output_size);
#endif
return l;
}
示例13: make_detection_layer
detection_layer make_detection_layer(int batch, int inputs, int classes, int coords, int joint, int rescore, int background, int objectness)
{
detection_layer l = {0};
l.type = DETECTION;
l.batch = batch;
l.inputs = inputs;
l.classes = classes;
l.coords = coords;
l.rescore = rescore;
l.objectness = objectness;
l.background = background;
l.joint = joint;
l.cost = calloc(1, sizeof(float));
l.does_cost=1;
int outputs = get_detection_layer_output_size(l);
l.outputs = outputs;
l.output = calloc(batch*outputs, sizeof(float));
l.delta = calloc(batch*outputs, sizeof(float));
#ifdef GPU
l.output_gpu = cuda_make_array(0, batch*outputs);
l.delta_gpu = cuda_make_array(0, batch*outputs);
#endif
fprintf(stderr, "Detection Layer\n");
srand(0);
return l;
}
示例14: make_region_layer
region_layer make_region_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
{
region_layer l = {0};
l.type = REGION;
l.n = n;
l.batch = batch;
l.inputs = inputs;
l.classes = classes;
l.coords = coords;
l.rescore = rescore;
l.side = side;
assert(side*side*l.coords*l.n == inputs);
l.cost = calloc(1, sizeof(float));
int outputs = l.n*5*side*side;
l.outputs = outputs;
l.output = calloc(batch*outputs, sizeof(float));
l.delta = calloc(batch*inputs, sizeof(float));
#ifdef GPU
l.output_gpu = cuda_make_array(l.output, batch*outputs);
l.delta_gpu = cuda_make_array(l.delta, batch*inputs);
#endif
fprintf(stderr, "Region Layer\n");
srand(0);
return l;
}
示例15: resize_maxpool_layer
void resize_maxpool_layer(maxpool_layer *l, int w, int h)
{
int stride = l->stride;
l->h = h;
l->w = w;
l->inputs = h*w*l->c;
l->out_w = (w-1)/stride + 1;
l->out_h = (h-1)/stride + 1;
l->outputs = l->out_w * l->out_h * l->c;
int output_size = l->outputs * l->batch;
l->indexes = realloc(l->indexes, output_size * sizeof(int));
l->output = realloc(l->output, output_size * sizeof(float));
l->delta = realloc(l->delta, output_size * sizeof(float));
#ifdef GPU
cuda_free((float *)l->indexes_gpu);
cuda_free(l->output_gpu);
cuda_free(l->delta_gpu);
l->indexes_gpu = cuda_make_int_array(output_size);
l->output_gpu = cuda_make_array(l->output, output_size);
l->delta_gpu = cuda_make_array(l->delta, output_size);
#endif
}