Python compiler.Program方法代码示例

本文整理汇总了Python中pynvrtc.compiler.Program方法的典型用法代码示例。如果您正苦于以下问题：Python compiler.Program方法的具体用法？Python compiler.Program怎么用？Python compiler.Program使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pynvrtc.compiler的用法示例。

在下文中一共展示了compiler.Program方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_sru_mod

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def load_sru_mod():
    global SRU_FWD_FUNC, SRU_BWD_FUNC, SRU_BiFWD_FUNC, SRU_BiBWD_FUNC
    global SRU_STREAM
    if check_sru_requirement():
        from cupy.cuda import function
        from pynvrtc.compiler import Program

        # This sets up device to use.
        device = torch.device("cuda")
        tmp_ = torch.rand(1, 1).to(device)

        sru_prog = Program(SRU_CODE.encode('utf-8'),
                           'sru_prog.cu'.encode('utf-8'))
        sru_ptx = sru_prog.compile()
        sru_mod = function.Module()
        sru_mod.load(bytes(sru_ptx.encode()))

        SRU_FWD_FUNC = sru_mod.get_function('sru_fwd')
        SRU_BWD_FUNC = sru_mod.get_function('sru_bwd')
        SRU_BiFWD_FUNC = sru_mod.get_function('sru_bi_fwd')
        SRU_BiBWD_FUNC = sru_mod.get_function('sru_bi_bwd')

        stream = namedtuple('Stream', ['ptr'])
        SRU_STREAM = stream(ptr=torch.cuda.current_stream().cuda_stream)

开发者ID:lizekang，项目名称:ITDD，代码行数:26，代码来源:sru.py

示例2: compile

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def compile(self):
        if self.ptx is None:
            program = Program(kernel, 'relu.cu')
            GPUReLUF.ptx = program.compile()

        if torch.cuda.current_device() not in GPUReLUF.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx))

            self.relu_forward = m.get_function('relu_forward')
            self.relu_backward = m.get_function('relu_backward')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            GPUReLUF.configured_gpus[torch.cuda.current_device()] = (self.relu_forward, self.relu_backward, self.stream)

        self.relu_forward, self.relu_backward, self.stream = GPUReLUF.configured_gpus[torch.cuda.current_device()]

开发者ID:DingKe，项目名称:pytorch_workplace，代码行数:20，代码来源:relu.py

示例3: compile

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def compile(self):
        if self.ptx is None:
            program = Program(kernel.encode(), 'recurrent_forget_mult.cu'.encode())
            GPUForgetMult.ptx = program.compile()

        if torch.cuda.current_device() not in GPUForgetMult.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            GPUForgetMult.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = GPUForgetMult.configured_gpus[torch.cuda.current_device()]

开发者ID:hendrycks，项目名称:outlier-exposure，代码行数:20，代码来源:forget_mult.py

示例4: compile

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def compile(self):
        if self.ptx is None:
            program = Program(kernel.encode(),
                              'recurrent_forget_mult.cu'.encode())
            GPUForgetMult.ptx = program.compile()

        if torch.cuda.current_device() not in GPUForgetMult.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            GPUForgetMult.configured_gpus[torch.cuda.current_device()] = (
                self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = (
            GPUForgetMult.configured_gpus[torch.cuda.current_device()])

开发者ID:pytorch，项目名称:benchmark，代码行数:23，代码来源:forget_mult.py

示例5: get_ptx

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def get_ptx(self, code, options):
        options = [o.encode('utf-8') for o in options]
        return Program(code.encode('utf-8'),
                       name="default".encode('utf-8')).compile(options).encode('utf-8')

开发者ID:NervanaSystems，项目名称:ngraph-python，代码行数:6，代码来源:float_ew2.py

示例6: compile

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def compile(self):
        # Create program
        program = Program(self.kernel, self.title)

        # Compile program
        arch = "-arch={0}".format(cupyKernel.get_compute_arch())
        ptx = program.compile([arch])

        # Load Program
        m = function.Module()
        m.load(bytes(ptx.encode()))

        # Get Function Pointer
        self.func = m.get_function(self.func_name)
        self.compiled = True

开发者ID:rdspring1，项目名称:Count-Sketch-Optimizers，代码行数:17，代码来源:cupy_kernel.py

示例7: call

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def __call__(self, input):
        if not self.jit or not isinstance(input, torch.cuda.FloatTensor):
            norm = input.norm(2, input.dim() - 1)
            return torch.cat([norm, norm.new(norm.size()).zero_()], input.dim() - 1)

        out = input.new(input.size())
        input = input.contiguous()

        if not iscomplex(input):
            raise TypeError('The input and outputs should be complex')

        if (self.modulus_cache[input.get_device()] is None):
            kernel = b"""
            extern "C"
            __global__ void abs_complex_value(const float * x, float2 * z, int n)
            {
                int i = blockIdx.x * blockDim.x + threadIdx.x;
            if (i >= n)
                return;
            z[i] = make_float2(normf(2, x + 2*i), 0);

            }
            """
            print('modulus.cu')
            prog = Program(kernel, b'modulus.cu')
            ptx = prog.compile(['-arch='+get_compute_arch(input)])
            module = Module()
            module.load(bytes(ptx.encode()))
            self.modulus_cache[input.get_device()] = module
        fabs = self.modulus_cache[input.get_device()].get_function('abs_complex_value')
        fabs(grid=(self.GET_BLOCKS(int(out.nelement())//2), 1, 1),
             block=(self.CUDA_NUM_THREADS, 1, 1),
             args=[input.data_ptr(), out.data_ptr(), out.numel() // 2],
             stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
        return out

开发者ID:tdeboissiere，项目名称:DeepLearningImplementations，代码行数:37，代码来源:utils_pytorch.py

示例8: smooth_local_affine

# 需要导入模块: from pynvrtc import compiler [as 别名]
# 或者: from pynvrtc.compiler import Program [as 别名]
def smooth_local_affine(output_cpu, input_cpu, epsilon, patch, h, w, f_r, f_e):
    # program = Program(src.encode('utf-8'), 'best_local_affine_kernel.cu'.encode('utf-8'))
    # ptx = program.compile(['-I/usr/local/cuda/include'.encode('utf-8')])
    program = Program(src, 'best_local_affine_kernel.cu')
    ptx = program.compile(['-I/usr/local/cuda/include'])
    m = function.Module()
    m.load(bytes(ptx.encode()))

    _reconstruction_best_kernel = m.get_function('reconstruction_best_kernel')
    _bilateral_smooth_kernel = m.get_function('bilateral_smooth_kernel')
    _best_local_affine_kernel = m.get_function('best_local_affine_kernel')
    Stream = namedtuple('Stream', ['ptr'])
    s = Stream(ptr=torch.cuda.current_stream().cuda_stream)

    filter_radius = f_r
    sigma1 = filter_radius / 3
    sigma2 = f_e
    radius = (patch - 1) / 2

    filtered_best_output = torch.zeros(np.shape(input_cpu)).cuda()
    affine_model =    torch.zeros((h * w, 12)).cuda()
    filtered_affine_model =torch.zeros((h * w, 12)).cuda()

    input_ = torch.from_numpy(input_cpu).cuda()
    output_ = torch.from_numpy(output_cpu).cuda()
    _best_local_affine_kernel(
        grid=(int((h * w) / 256 + 1), 1),
        block=(256, 1, 1),
        args=[output_.data_ptr(), input_.data_ptr(), affine_model.data_ptr(),
             np.int32(h), np.int32(w), np.float32(epsilon), np.int32(radius)], stream=s
     )

    _bilateral_smooth_kernel(
        grid=(int((h * w) / 256 + 1), 1),
        block=(256, 1, 1),
        args=[affine_model.data_ptr(), filtered_affine_model.data_ptr(), input_.data_ptr(), np.int32(h), np.int32(w), np.int32(f_r), np.float32(sigma1), np.float32(sigma2)], stream=s
    )

    _reconstruction_best_kernel(
        grid=(int((h * w) / 256 + 1), 1),
        block=(256, 1, 1),
        args=[input_.data_ptr(), filtered_affine_model.data_ptr(), filtered_best_output.data_ptr(),
        np.int32(h), np.int32(w)], stream=s
    )
    numpy_filtered_best_output = filtered_best_output.cpu().numpy()
    return numpy_filtered_best_output

开发者ID:sunshineatnoon，项目名称:LinearStyleTransfer，代码行数:48，代码来源:smooth_filter.py

注：本文中的pynvrtc.compiler.Program方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。