本文整理汇总了Java中com.nativelibs4java.opencl.CLProgram.createKernel方法的典型用法代码示例。如果您正苦于以下问题:Java CLProgram.createKernel方法的具体用法?Java CLProgram.createKernel怎么用?Java CLProgram.createKernel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.nativelibs4java.opencl.CLProgram
的用法示例。
在下文中一共展示了CLProgram.createKernel方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: GPUMiner
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
/**
* Creates a GPUMiner object. This constructor should not be used - you
* should instead use {@link
* me.apemanzilla.krist.turbokrist.miners.MinerFactory}.
*
* @param dev @param options @throws MinerInitException
*/
GPUMiner(CLDevice dev, MinerOptions options) throws MinerInitException {
this.deviceName = dev.getName().trim();
this.context = dev.getPlatform().createContext(null, new CLDevice[] { dev });
this.queue = context.createDefaultQueue();
ProgramBuilder pb = new ProgramBuilder("sha256.cl", "krist_miner.cl");
CLProgram program;
try {
program = pb.build(context);
} catch (ProgramBuildException e) {
e.printStackTrace();
throw new MinerInitException("Failed to build OpenCL program");
}
this.kernel = program.createKernel("krist_miner_basic");
Pointer<Byte> addressPtr = Pointer.allocateBytes(10).order(context.getByteOrder());
byte[] addressBytes = MinerUtils.getBytes(options.getKristAddress().getName());
addressPtr.setArray(addressBytes);
this.addressBuffer = context.createByteBuffer(Usage.Input, addressPtr);
this.workSize = new int[] { options.getWorkSize(MinerFactory.generateSignature(dev)) };
}
示例2: getKernel
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
public synchronized CLKernel getKernel(Fun2 op, Primitive prim1, Primitive prim2, Primitive primOut, boolean secondOperandIsScalar) throws CLBuildException {
Map<PrimitiveTrio, CLKernel> m = fun2Kernels.get(op);
if (m == null)
fun2Kernels.put(op, m = new HashMap<PrimitiveTrio, CLKernel>());
PrimitiveTrio key = new PrimitiveTrio(prim1, prim2, primOut, secondOperandIsScalar);
CLKernel ker = m.get(key);
if (ker == null) {
StringBuilder out = new StringBuilder(300);
String name = createVectFun2Source(op, prim1, prim2, primOut, out, secondOperandIsScalar);
CLProgram prog = getContext().createProgram(out.toString()).build();
ker = prog.createKernel(name);
m.put(key, ker);
}
return ker;
}
示例3: loadKernels
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
private void loadKernels(CLProgram program) {
addImage = program.createKernel("addImage");
addConstant = program.createKernel("addConstant");
subtractImage = program.createKernel("subtractImage");
subtractConstant = program.createKernel("subtractConstant");
multiplyImage = program.createKernel("multiplyImage");
multiplyConstant = program.createKernel("multiplyConstant");
divideImage = program.createKernel("divideImage");
divideConstant = program.createKernel("divideConstant");
}
示例4: buildAndExecuteKernel
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
private static long buildAndExecuteKernel(CLQueue queue, float realMin,
float imaginaryMin, int realResolution, int imaginaryResolution,
int maxIter, int magicNumber, float deltaReal,
float deltaImaginary, Pointer<Integer> results, String src)
throws CLBuildException, IOException {
CLContext context = queue.getContext();
long startTime = System.nanoTime();
// if (useAutoGenWrapper) {
// Mandelbrot mandelbrot = new Mandelbrot(context);
// mandelbrot.mandelbrot(queue, new float[] { deltaReal,
// deltaImaginary }, new float[] { realMin, imaginaryMin },
//
// maxIter, magicNumber, realResolution,
// context.createBuffer(CLMem.Usage.Output, results, false),
//
// new int[] { realResolution, imaginaryResolution },
// new int[] { 1, 1 });
// } else {
CLProgram program = context.createProgram(src).build();
// Create a kernel instance from the mandelbrot kernel, passing in
// parameters.
CLKernel kernel = program.createKernel("mandelbrot", new float[] {
deltaReal, deltaImaginary }, new float[] { realMin,
imaginaryMin },
maxIter, magicNumber, realResolution,
context.createBuffer(CLMem.Usage.Output, results, false));
// Enqueue and complete work using a 2D range of work groups
// corrsponding to individual pizels in the set.
// The work groups are 1x1 in size and their range is defined by the
// desired resolution. This corresponds
// to one device thread per pixel.
kernel.enqueueNDRange(queue, new int[] { realResolution,
imaginaryResolution }, new int[] { 1, 1 });
// }
queue.finish();
long time = System.nanoTime() - startTime;
return time;
}
示例5: main
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
CLContext context = JavaCL.createBestContext();
CLQueue clQueue = context.createDefaultQueue();
ByteOrder byteOrder = context.getByteOrder();
int n = 1024;
Pointer<Float> aPtr = allocateFloats(n).order(byteOrder);
for (int i = 0; i < n; i++) {
aPtr.set(i, (float)cos(i));
}
// Create OpenCL input/output buffers (using the native memory pointers aPtr and bPtr) :
CLBuffer<Float> a = context.createBuffer(Usage.InputOutput, aPtr);
// Read the program sources and compile them :
String src =
"__kernel void add_floats(global float* a, int n) {\n" +
" int i = get_global_id(0);\n" +
" if(i < n){\n" +
" a[i] = 2.f*a[i];\n" +
" }\n" +
"}";
//IOUtils.readText(new File("TutorialKernels.cl"));
CLProgram program = context.createProgram(src).build();
// Get and call the kernel :
CLKernel addFloatsKernel = program.createKernel("add_floats");
addFloatsKernel.setArgs(a, n);
CLEvent evt = addFloatsKernel.enqueueNDRange(clQueue, new int[] { n });
aPtr = a.read(clQueue, evt); // blocks until add_floats finished
// Print the first 10 output values :
for (int i = 0; i < 10 && i < n; i++) {
System.out.println("out[" + i + "] = " + aPtr.get(i));
}
}
示例6: buildAndExecuteKernel
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
private static long buildAndExecuteKernel(CLQueue queue, float realMin, float imaginaryMin, int realResolution,
int imaginaryResolution, int maxIter, int magicNumber, float deltaReal,
float deltaImaginary, Pointer<Integer> results, String src) throws CLBuildException, IOException {
CLContext context = queue.getContext();
long startTime = System.nanoTime();
if (useAutoGenWrapper) {
Mandelbrot mandelbrot = new Mandelbrot(context);
mandelbrot.mandelbrot(
queue,
new float[] { deltaReal, deltaImaginary },
new float[] { realMin, imaginaryMin },
maxIter,
magicNumber,
realResolution,
context.createBuffer(CLMem.Usage.Output, results, false),
new int[]{realResolution, imaginaryResolution},
new int[]{1,1}
);
} else {
CLProgram program = context.createProgram(src).build();
//Create a kernel instance from the mandelbrot kernel, passing in parameters.
CLKernel kernel = program.createKernel(
"mandelbrot",
new float[] { deltaReal, deltaImaginary },
new float[] { realMin, imaginaryMin },
maxIter,
magicNumber,
realResolution,
context.createBuffer(CLMem.Usage.Output, results, false)
);
//Enqueue and complete work using a 2D range of work groups corrsponding to individual pizels in the set.
//The work groups are 1x1 in size and their range is defined by the desired resolution. This corresponds
//to one device thread per pixel.
kernel.enqueueNDRange(queue, new int[]{realResolution, imaginaryResolution}, new int[]{1,1});
}
queue.finish();
long time = System.nanoTime() - startTime;
return time;
}
示例7: matrixTranspose
import com.nativelibs4java.opencl.CLProgram; //导入方法依赖的package包/类
public <T> CLEvent matrixTranspose(Primitive prim, CLBuffer<T> a, long aRows, long aColumns, long aStride, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException {
if (out == null)
throw new IllegalArgumentException("Null output matrix !");
//if (out != null)
// out = (CLBuffer<T>)context.createBuffer(Usage.Output, prim.primitiveType, aRows * aColumns);
CLKernel[] kernels;
synchronized (matrixTransposeKernels) {
kernels = matrixTransposeKernels.get(prim);
if (kernels == null) {
String src =
prim.getRequiredPragmas() +
"__kernel void transposeSelf( \n" +
" __global double* a, int aRows, int aColumns, int aStride \n" +
") { \n" +
" int i = get_global_id(0); \n" +
" int j = get_global_id(1); \n" +
" \n" +
" if (i >= aRows || j >= aColumns || j >= i) return; \n" +
" \n" +
" size_t aIndex = i * aStride + j; \n" +
" size_t outIndex = j * aRows + i; \n" +
" double temp = a[outIndex]; \n" +
" a[outIndex] = a[aIndex]; \n" +
" a[aIndex] = temp; \n" +
"} \n" +
"__kernel void transposeOther( \n" +
" __global const double* a, int aRows, int aColumns, int aStride, \n" +
" __global double* out \n" +
") { \n" +
" int i = get_global_id(0); \n" +
" int j = get_global_id(1); \n" +
" \n" +
" if (i >= aRows || j >= aColumns) return; \n" +
" \n" +
" size_t aIndex = i * aStride + j; \n" +
" size_t outIndex = j * aRows + i; \n" +
" out[outIndex] = a[aIndex]; \n" +
"} \n"
;
String clTypeName = prim.clTypeName();
src = src.replaceAll("double", clTypeName);
CLProgram program = context.createProgram(src);
kernels = new CLKernel[] { program.createKernel("transposeSelf"), program.createKernel("transposeOther") };
matrixTransposeKernels.put(prim, kernels);
}
}
boolean self = a.equals(out);
CLKernel kernel = kernels[self ? 0 : 1];
synchronized (kernel) {
if (self)
kernel.setArgs(a, (int)aRows, (int)aColumns, (int)aStride);
else
kernel.setArgs(a, (int)aRows, (int)aColumns, (int)aStride, out);
CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)aRows, (int)aColumns }, eventsToWaitFor);
return evt;
}
}