本文整理汇总了C#中GASS.CUDA.Types.CUfunction类的典型用法代码示例。如果您正苦于以下问题:C# CUfunction类的具体用法?C# CUfunction怎么用?C# CUfunction使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CUfunction类属于GASS.CUDA.Types命名空间,在下文中一共展示了CUfunction类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: cuParamSetv
public static extern CUResult cuParamSetv(CUfunction hfunc, int offset, [In] Short1[] ptr, uint numbytes);
示例2: cuFuncGetAttribute
public static extern CUResult cuFuncGetAttribute(ref int pi, CUFunctionAttribute attrib, CUfunction hfunc);
示例3: cuParamSetTexRef
public static extern CUResult cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
示例4: Main
static void Main(string[] args)
{
// Init and select 1st device.
CUDA cuda = new CUDA(0, true);
// load module
//cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx"));
CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale");
// The filter size is assumed to be a number smaller than the signal size
const int SIGNAL_SIZE = 50;
const int FILTER_KERNEL_SIZE = 11;
// Allocate host memory for the signal
Float2[] h_signal = new Float2[SIGNAL_SIZE];
// Initalize the memory for the signal
Random r = new Random();
for (int i = 0; i < SIGNAL_SIZE; ++i)
{
h_signal[i].x = r.Next() / (float)int.MaxValue;
h_signal[i].y = 0;
}
// Allocate host memory for the filter
Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE];
// Initalize the memory for the filter
for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
{
h_filter_kernel[i].x = r.Next() / (float)int.MaxValue;
h_filter_kernel[i].y = 0;
}
// Pad signal and filter kernel
Float2[] h_padded_signal;
Float2[] h_padded_filter_kernel;
int new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE,
h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE);
// Allocate device memory for signal
// Copy host memory to device
CUdeviceptr d_signal = cuda.CopyHostToDevice<Float2>(h_padded_signal);
// Allocate device memory for filter kernel
// Copy host memory to device
CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice<Float2>(h_padded_filter_kernel);
// CUFFT plan
CUFFT fft = new CUFFT(cuda);
cufftHandle handle = new cufftHandle();
CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1);
//fft.Plan1D(new_size, CUFFTType.C2C, 1);
return;
// Transform signal and kernel
fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward);
fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward);
// Multiply the coefficients together and normalize the result
// ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
cuda.SetFunctionBlockShape(func, 256, 1, 1);
cuda.SetParameter(func, 0, (uint)d_signal.Pointer);
cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer);
cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size);
cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size);
cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8));
cuda.Launch(func, 32, 1);
// Transform signal back
fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse);
// Copy device memory to host
Float2[] h_convolved_signal = h_padded_signal;
cuda.CopyDeviceToHost<Float2>(d_signal, h_convolved_signal);
// Allocate host memory for the convolution result
Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE];
// Convolve on the host
Convolve(h_signal, SIGNAL_SIZE,
h_filter_kernel, FILTER_KERNEL_SIZE,
h_convolved_signal_ref);
// check result
bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f);
Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED");
//Destroy CUFFT context
fft.Destroy();
// cleanup memory
cuda.Free(d_signal);
cuda.Free(d_filter_kernel);
}
示例5: cuLaunchGridAsync
public static extern CUResult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
示例6: cuModuleGetFunction
public static extern CUResult cuModuleGetFunction(ref CUfunction hfunc, CUmodule hmod, string name);
示例7: cuLaunch
public static extern CUResult cuLaunch(CUfunction f);
示例8: cuLaunchGrid
public static extern CUResult cuLaunchGrid(CUfunction f, int grid_width, int grid_height);
示例9: cuFuncSetSharedSize
public static extern CUResult cuFuncSetSharedSize(CUfunction hfunc, uint bytes);
示例10: cuFuncSetCacheConfig
public static extern CUResult cuFuncSetCacheConfig(CUfunction hfunc, CUFunctionCache config);
示例11: cuFuncSetBlockShape
public static extern CUResult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
示例12: cuParamSetf
public static extern CUResult cuParamSetf(CUfunction hfunc, int offset, float value);
示例13: cuParamSeti
public static extern CUResult cuParamSeti(CUfunction hfunc, int offset, uint value);
示例14: DoLayout
public override void DoLayout()
{
CUdeviceptr p1 = new CUdeviceptr();
CUDADriver.cuMemAlloc(ref p1, 1 <<10);
byte[] b = new byte[1<<10];
CUDADriver.cuMemcpyHtoD(p1, b, (uint) b.Length);
CUfunction func = new CUfunction();
CUResult res;
int nnodes = (int) Network.VertexCount*2;
int blocks = 32;
if (nnodes < 1024*blocks) nnodes = 1024*blocks;
while ((nnodes & (prop.SIMDWidth-1)) != 0) nnodes++;
nnodes--;
//float dtime = 0.025f; float dthf = dtime * 0.5f;
//float epssq = 0.05f * 0.05f;
//float itolsq = 1.0f / (0.5f * 0.5f);
CUDADriver.cuModuleGetFunction(ref func, mod, "dummy");
// Float4[] data = new Float4[100];
CUdeviceptr ptr = new CUdeviceptr();
//CUDADriver.cuMemAlloc(ref ptr, (uint) 100 * System.Runtime.InteropServices.Marshal.SizeOf(Float4));
CUDADriver.cuParamSeti(func, 0, (uint) ptr.Pointer);
CUDADriver.cuParamSetSize(func, 4);
res = CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in dummy function: " +res.ToString());
// InitializationKernel<<<1, 1>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "InitializationKernel");
res = CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in InitializationKernel: " +res.ToString());
// BoundingBoxKernel<<<blocks * FACTOR1, THREADS1>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "BoundingBoxKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in BoundingBoxKernel: "+res.ToString());
// TreeBuildingKernel<<<blocks * FACTOR2, THREADS2>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "TreeBuildingKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in TreeBuildingKernel: "+res.ToString());
// SummarizationKernel<<<blocks * FACTOR3, THREADS3>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "SummarizationKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in SummarizationKernel: "+res.ToString());
// ForceCalculationKernel<<<blocks * FACTOR5, THREADS5>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "ForceCalculationKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in ForceCalculationKernel: "+res.ToString());
// IntegrationKernel<<<blocks * FACTOR6, THREADS6>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "IntegrationKernel");
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in IntegrationKernel: "+res.ToString());
}
示例15: cuParamSetSize
public static extern CUResult cuParamSetSize(CUfunction hfunc, uint numbytes);