当前位置: 首页>>代码示例>>Python>>正文


Python cuda.host_from_gpu函数代码示例

本文整理汇总了Python中theano.sandbox.cuda.host_from_gpu函数的典型用法代码示例。如果您正苦于以下问题:Python host_from_gpu函数的具体用法?Python host_from_gpu怎么用?Python host_from_gpu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了host_from_gpu函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cpu_var_to_gpu_var

def cpu_var_to_gpu_var(x):
    from theano.sandbox import cuda
    type = cuda.CudaNdarrayType(broadcastable=x.broadcastable)
    name = 'gpu_%s'%x.name
    name = None
    gpu_var = cuda.CudaNdarrayVariable(type=type, name=name)
    cpu_var = cuda.host_from_gpu(gpu_var)
    return gpu_var, cpu_var
    return cuda.host_from_gpu(cuda.CudaNdarrayVariable(type=type, name=name))
开发者ID:mrocklin,项目名称:ape,代码行数:9,代码来源:theano_computation.py

示例2: test_weight_acts_strided

def test_weight_acts_strided():

    # Tests that WeightActs with all possible strides 

    rng = np.random.RandomState([2012,10,9])

    #Each list in shape_list : 
    #[img_shape,filter_shape]
    #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
    shape_list = [[(1, 7, 8, 5),     (1, 2, 2, 16)],
                  [(3, 7, 8, 5),     (3, 3, 3, 16)],
                  [(16, 11, 11, 4),  (16, 4, 4, 16)], 
                  [(3, 20, 20, 3),   (3, 5, 5, 16)],
                  [(3, 21, 21, 3),   (3, 6, 6, 16)],
                  ]
    for partial_sum in [0, 1, 4]:
        print "partial_sum: %d"%(partial_sum)
        for test_idx in xrange(len(shape_list)):
            images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
            filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
            gpu_images = float32_shared_constructor(images,name='images')
            print "test case %d..."%(test_idx+1) 
              
            for ii in xrange(filters.shape[1]):
                stride = ii + 1                            
                output_python = FilterActs_python(images,filters,stride)   
                _, h_rows, h_cols, _ = output_python.shape
                if partial_sum == 4:
                    if (h_rows*h_cols)%partial_sum != 0:
                        print "skip test case %d, stride %d when partial_sum is equal to %d"%(test_idx+1,stride,partial_sum)
                        break
                hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32')
                gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts')
                    
                weights_grad_python = WeightActs_python(images,hidacts,filters.shape[1],filters.shape[2],stride)
                
                weights_grad = WeightActs(partial_sum=partial_sum,stride=stride)(
                                                    gpu_images,
                                                    gpu_hidacts,
                                                    as_tensor_variable((filters.shape[1], filters.shape[2]))
                                                   )[0]
                weights_grad = host_from_gpu(weights_grad)
                f = function([], weights_grad)
                weights_grad_val = f()   
                
                warnings.warn("""test_weight_acts_strided success criterion is not very strict.""")
                
                if np.abs(weights_grad_val - weights_grad_python).max() > 3.4e-5:
                    assert type(weights_grad_val) == type(weights_grad_python)
                    assert weights_grad_val.dtype == weights_grad_python.dtype
                    if weights_grad_val.shape != weights_grad_python.shape:
                        print 'cuda-convnet shape: ',weights_grad_val.shape
                        print 'python conv shape: ',weights_grad_python.shape
                        assert False
                    err = np.abs(weights_grad_val - weights_grad_python)
                    print 'stride %d'%stride
                    print 'absolute error range: ', (err.min(), err.max())
                    print 'mean absolute error: ', err.mean()
                    print 'cuda-convnet value range: ', (weights_grad_val.min(), weights_grad_val.max())
                    print 'python conv value range: ', (weights_grad_python.min(), weights_grad_python.max())
开发者ID:AlexArgus,项目名称:pylearn2,代码行数:60,代码来源:test_weight_acts_strided.py

示例3: lmul

    def lmul(self, x):
        """
        dot(x, A)
        aka, do convolution with input image x

        """

        check_cuda(str(type(self)) + ".lmul")
        # TODO Why is it CPU??
        print "Por que?!?!", type(x)
        cpu = "Cuda" not in str(type(x))
        if cpu:
            x = gpu_from_host(x)

        assert x.ndim == 5
        x_axes = self.input_axes
        assert len(x_axes) == 5

        op_axes = ("c", 0, 1, "t", "b")
        if tuple(x_axes) != op_axes:
            print "ssssssssssssssss"
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        _x_4d_shape = (
            self.signal_shape[0],
            self.signal_shape[1],
            self.signal_shape[2],
            self.signal_shape[3] * self.signal_shape[4],
        )

        x = x.reshape(_x_4d_shape)

        x = gpu_contiguous(x)

        rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(x, self._filters)

        if cpu:
            rval = host_from_gpu(rval)

        rval = rval.reshape(
            (
                self.filter_shape[3],
                self.filter_shape[4],
                rval.shape[1],
                rval.shape[2],
                self.signal_shape[3],
                self.signal_shape[4],
            )
        )

        rval = diagonal_subtensor(rval, 4, 0).sum(axis=0)

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 5

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes])

        return rval
开发者ID:YangXS,项目名称:lisa_emotiw,代码行数:60,代码来源:conv3d_c01tb.py

示例4: optimize

        def optimize(node):
            if isinstance(node.op, cuda.GpuFromHost):
                # gpu_from_host(cpu_op) -> gpu_op(gpu_from_host)
                host_input = node.inputs[0]

                if host_input.owner and isinstance(host_input.owner.op, CpuOpCls):
                    cpu_op = host_input.owner.op
                    args = dict(zip(cpu_op.__props__, cpu_op._props()))
                    gpu_op = GpuOpCls(**args)
                    inputs = host_input.owner.inputs
                    out = gpu_op(*inputs)
                    return [out]

            if isinstance(node.op, CpuOpCls):
                # cpu_op(host_from_gpu) -> host_from_gpu(gpu_op)
                def _is_variable_on_gpu(var):
                    return var.owner and isinstance(var.owner.op, cuda.HostFromGpu)
                inputs = node.inputs
                inputs_on_gpu = map(_is_variable_on_gpu, inputs)

                if any(inputs_on_gpu):
                    cpu_op = node.op
                    args = dict(zip(cpu_op.__props__, cpu_op._props()))
                    gpu_op = GpuOpCls(**args)
                    out = gpu_op(*inputs)
                    out = cuda.host_from_gpu(out)
                    return [out]

            return False
开发者ID:julienr,项目名称:agro_esann16,代码行数:29,代码来源:my_ops.py

示例5: local_gpu_conv_transp3d

def local_gpu_conv_transp3d(node):
    if isinstance(node.op, ConvTransp3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                W, b, d, H, RShape = node.inputs
                return [host_from_gpu(gpu_conv_transpd(W, b, d, H, RShape))]
开发者ID:317070,项目名称:Theano,代码行数:7,代码来源:GpuConvTransp3D.py

示例6: local_to_gpu

 def local_to_gpu(node):
     """
     op(host_from_gpu()) -> host_from_gpu(op)
     gpu_from_host(op) -> op(gpu_from_host)
     """
     if isinstance(node.op, op):
         #op(host_from_gpu()) -> host_from_gpu(op)
         #If any of the input that go on the GPU are on the GPU,
         #move the op to the gpu.
         if any(node.inputs[idx].owner and
                isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
                for idx in to_gpu):
             new_inp = list(node.inputs)
             for idx in to_gpu:
                 new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
             return [cuda.host_from_gpu(op()(*new_inp))]
     if node.op == cuda.gpu_from_host:
         #gpu_from_host(op) -> op(gpu_from_host)
         host_input = node.inputs[0]
         if host_input.owner and isinstance(host_input.owner.op,
                                            op):
             op_node = host_input.owner
             new_inp = list(op_node.inputs)
             for idx in to_gpu:
                 new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
             return [op()(*new_inp)]
     return False
开发者ID:sordonia,项目名称:Theano,代码行数:27,代码来源:conv3d2d.py

示例7: local_gpu_Contiguous

def local_gpu_Contiguous(node):
  if isinstance(node.op, Contiguous):
    # see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
    from theano.sandbox.cuda import host_from_gpu
    x, = node.inputs
    if x.owner and x.owner.op == host_from_gpu:
      from theano.sandbox.cuda.basic_ops import gpu_contiguous
      return [host_from_gpu(gpu_contiguous(x.owner.inputs[0]))]
开发者ID:atuxhe,项目名称:returnn,代码行数:8,代码来源:TheanoUtil.py

示例8: local_gpu_TorchWrapper

def local_gpu_TorchWrapper(node):
  if isinstance(node.op, TorchWrapperOp):
    from theano.sandbox.cuda import host_from_gpu, gpu_from_host
    args = node.inputs
    if any([(x.owner and x.owner.op == host_from_gpu) for x in args]):
      gpu_op = GpuTorchWrapperOp(**{key: getattr(node.op, key) for key in node.op.__props__})
      args = [x.owner.inputs[0] if (x.owner and x.owner.op == host_from_gpu) else x
              for x in args]
      return [host_from_gpu(gpu_op(*args))]
开发者ID:atuxhe,项目名称:returnn,代码行数:9,代码来源:TorchWrapper.py

示例9: test_grad

def test_grad():

    rng = np.random.RandomState([2012, 10, 9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(-1.0, 1.0, (channels, rows, cols, batch_size)).astype("float32"), name="images")
    filters = shared(
        rng.uniform(-1.0, 1.0, (channels, filter_rows, filter_cols, num_filters)).astype("float32"), name="filters"
    )

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)
    output = host_from_gpu(output)
    # XXX: use verify_grad
    output_grad = grad(output.sum(), images)

    images_bc01 = images.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode="valid")

    output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)
    # XXX: use verify_grad
    output_conv2d_grad = grad(output_conv2d.sum(), images)
    f = function([], [output_grad, output_conv2d_grad])

    output_grad, output_conv2d_grad = f()

    warnings.warn(
        """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others."""
    )
    if np.abs(output_grad - output_conv2d_grad).max() > 7.7e-6:
        assert type(output_grad) == type(output_conv2d_grad)
        assert output_grad.dtype == output_conv2d_grad.dtype
        if output_grad.shape != output_conv2d_grad.shape:
            print "cuda-convnet shape: ", output_grad.shape
            print "theano shape: ", output_conv2d_grad.shape
            assert False
        err = np.abs(output_grad - output_conv2d_grad)
        print "absolute error range: ", (err.min(), err.max())
        print "mean absolute error: ", err.mean()
        print "cuda-convnet value range: ", (output_grad.min(), output_grad.max())
        print "theano value range: ", (output_conv2d_grad.min(), output_conv2d_grad.max())
        assert False
开发者ID:gbcolborne,项目名称:pylearn2,代码行数:56,代码来源:test_filter_acts.py

示例10: test_match_valid_conv_strided

def test_match_valid_conv_strided():

    # Tests that running FilterActs with stride is the same as running
    # theano's conv2D in valid mode and then downsampling

    rng = np.random.RandomState([2012,10,9])

    batch_size = 5
    rows = 9
    cols = 9
    channels = 3
    filter_rows = 3
    filter_cols = filter_rows
    stride = 3
    num_filters = 16

    images = shared(rng.uniform(-1., 1., (channels, rows, cols,
        batch_size)).astype('float32'), name='images')
    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs(stride=stride)(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = images.dimshuffle(3,0,1,2)
    filters_bc01 = filters.dimshuffle(3,0,1,2)
    filters_bc01 = filters_bc01[:,:,::-1,::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01,
            border_mode='valid', subsample=(stride, stride))

    output_conv2d_orig = output_conv2d.dimshuffle(1,2,3,0)
    output_conv2d = output_conv2d_orig  # [:, ::stride, ::stride, :]
    f = function([], [output, output_conv2d, output_conv2d_orig])

    output, output_conv2d, output_conv2d_orig = f()

    warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others.""")
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ',output.shape
            print 'theano shape: ',output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
        assert False
开发者ID:Alienfeel,项目名称:pylearn2,代码行数:56,代码来源:test_filter_acts.py

示例11: test_match_valid_conv

def test_match_valid_conv():

    # Tests that running FilterActs with no padding is the same as running
    # theano's conv2D in valid mode

    rng = np.random.RandomState([2012,10,9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(-1., 1., (channels, rows, cols,
        batch_size)).astype('float32'), name='images')
    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = images.dimshuffle(3,0,1,2)
    filters_bc01 = filters.dimshuffle(3,0,1,2)
    filters_bc01 = filters_bc01[:,:,::-1,::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01,
            border_mode='valid')

    output_conv2d = output_conv2d.dimshuffle(1,2,3,0)

    try:
        f = function([], [output, output_conv2d])
    except:
        raise KnownFailureTest("cuda-convnet code depends on an unmerged theano feature.")

    output, output_conv2d = f()

    warnings.warn("test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?")
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ',output.shape
            print 'theano shape: ',output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
        assert False
开发者ID:deigen,项目名称:pylearn,代码行数:56,代码来源:test_filter_acts.py

示例12: local_gpu_togpu_breakpoint

 def local_gpu_togpu_breakpoint(node):
     if isinstance(node.op, Breakpoint):
         result_input = node.inputs[0]
         if result_input.owner and result_input.owner.op == host_from_gpu:
             gpu_inputs = [x.owner.inputs[0]
                             if x.owner and x.owner.op == host_from_gpu
                             else x
                           for x in node.inputs]
             return [host_from_gpu(node.op.make_gpu_node(*gpu_inputs))]
     return False
开发者ID:SamLiao-github,项目名称:Depth-Map-Prediction,代码行数:10,代码来源:thutil.py

示例13: insert_gpu_filter_acts

def insert_gpu_filter_acts(node):
    if isinstance(node.op, FilterActs):
        images, filters = node.inputs
        if any_from_gpu(images, filters) or any_gpu_client(*node.outputs):
            gpu_filter_acts = GpuFilterActs(
                    module_stride=node.op.module_stride,
                    partial_sum=1)
            return [host_from_gpu(gpu_filter_acts(
                gpu_from_host(images),
                gpu_from_host(filters)))]
开发者ID:Alienfeel,项目名称:pylearn2,代码行数:10,代码来源:gpu_unshared_conv.py

示例14: test_attention_time_gauss

def test_attention_time_gauss():
  n_T = 4
  n_batch = 2
  n_inp_dim = 3
  n_cells = 5
  n_B = 5

  custom_op = get_attention(RecurrentTransform.AttentionTimeGauss,
                            n_out=n_cells, n_batches=n_batch, n_input_t=n_B, n_input_dim=n_inp_dim)
  att = custom_op.recurrent_transform

  Z_val = numpy.random.ranf((n_T,n_batch,4*n_cells)).astype('float32')
  W_re_val = numpy.random.ranf((n_cells, 4 * n_cells)).astype('float32')
  W_att_quadr_val = numpy.eye(n_B).astype('float32')
  W_att_in_val = numpy.random.ranf((n_cells, 4 * n_cells)).astype('float32')
  B_val = numpy.random.ranf((n_B,n_batch,n_cells)).astype('float32')
  c_val = numpy.random.ranf((n_batch, n_cells)).astype('float32')
  y0_val = numpy.random.ranf((n_batch, n_cells)).astype('float32')
  i_val = numpy.ones((n_T, n_batch), dtype='int8')

  Z = T.ftensor3('Z')
  B = T.ftensor3('B') #base
  W_re = T.fmatrix('W_re')
  W_att_quadr = T.fmatrix("W_att_quadr")
  W_att_in = T.fmatrix('W_att_in')
  c = T.fmatrix('c') #initial state
  y0 = T.fmatrix('y0') #initial activation
  i = T.matrix('i',dtype='int8')
  t0 = T.fvector('t0')
  custom_vars = att.get_sorted_custom_vars()
  initial_state_vars = att.get_sorted_state_vars_initial()
  custom_op_inputs = [Z, c, y0, i, W_re] + custom_vars + initial_state_vars
  print("input args num:", len(custom_op_inputs))
  print("input args:", custom_op_inputs)
  custom_op_outputs = custom_op(*custom_op_inputs)
  print("output args num:", len(custom_op_outputs))
  custom_op_outputs = [cuda.host_from_gpu(v) for v in custom_op_outputs]
  f = theano.function(inputs=[Z, c, y0, i, W_re], outputs=custom_op_outputs)

  res = f(Z_val, c_val, y0_val, i_val, W_re_val)

  #print res
  # res: (output) Y, (gates and cell state) H, (final cell state) d, state vars sequences
  (Y, H, d), state_var_seqs = res[:3], res[3:]

  # print "running custom dumped data"
  # custom_op_inputs = [theano.shared(numpy.load("../op.i.%i" % i)) for i in range(12)]
  # custom_op_outputs = custom_op(*custom_op_inputs)
  # custom_op_outputs = [cuda.host_from_gpu(v) for v in custom_op_outputs]
  # f = theano.function(inputs=[], outputs=custom_op_outputs)
  # res = f()

  print(res)

  assert False
开发者ID:rwth-i6,项目名称:returnn,代码行数:55,代码来源:test_OpLSTMCustom.py

示例15: local_gpu_conv_grad3d

def local_gpu_conv_grad3d(node):
    if isinstance(node.op, ConvGrad3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, d, WShape, dCdH = node.inputs
                return [host_from_gpu(gpu_conv_grad3d(
                    as_cuda_ndarray_variable(V),
                    d,
                    WShape,
                    as_cuda_ndarray_variable(dCdH)))]
开发者ID:5730279821-TA,项目名称:Theano,代码行数:11,代码来源:GpuConvGrad3D.py


注:本文中的theano.sandbox.cuda.host_from_gpu函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。