本文整理汇总了Python中theano.sandbox.cuda.host_from_gpu函数的典型用法代码示例。如果您正苦于以下问题:Python host_from_gpu函数的具体用法?Python host_from_gpu怎么用?Python host_from_gpu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了host_from_gpu函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cpu_var_to_gpu_var
def cpu_var_to_gpu_var(x):
from theano.sandbox import cuda
type = cuda.CudaNdarrayType(broadcastable=x.broadcastable)
name = 'gpu_%s'%x.name
name = None
gpu_var = cuda.CudaNdarrayVariable(type=type, name=name)
cpu_var = cuda.host_from_gpu(gpu_var)
return gpu_var, cpu_var
return cuda.host_from_gpu(cuda.CudaNdarrayVariable(type=type, name=name))
示例2: test_weight_acts_strided
def test_weight_acts_strided():
# Tests that WeightActs with all possible strides
rng = np.random.RandomState([2012,10,9])
#Each list in shape_list :
#[img_shape,filter_shape]
#[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
shape_list = [[(1, 7, 8, 5), (1, 2, 2, 16)],
[(3, 7, 8, 5), (3, 3, 3, 16)],
[(16, 11, 11, 4), (16, 4, 4, 16)],
[(3, 20, 20, 3), (3, 5, 5, 16)],
[(3, 21, 21, 3), (3, 6, 6, 16)],
]
for partial_sum in [0, 1, 4]:
print "partial_sum: %d"%(partial_sum)
for test_idx in xrange(len(shape_list)):
images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
gpu_images = float32_shared_constructor(images,name='images')
print "test case %d..."%(test_idx+1)
for ii in xrange(filters.shape[1]):
stride = ii + 1
output_python = FilterActs_python(images,filters,stride)
_, h_rows, h_cols, _ = output_python.shape
if partial_sum == 4:
if (h_rows*h_cols)%partial_sum != 0:
print "skip test case %d, stride %d when partial_sum is equal to %d"%(test_idx+1,stride,partial_sum)
break
hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32')
gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts')
weights_grad_python = WeightActs_python(images,hidacts,filters.shape[1],filters.shape[2],stride)
weights_grad = WeightActs(partial_sum=partial_sum,stride=stride)(
gpu_images,
gpu_hidacts,
as_tensor_variable((filters.shape[1], filters.shape[2]))
)[0]
weights_grad = host_from_gpu(weights_grad)
f = function([], weights_grad)
weights_grad_val = f()
warnings.warn("""test_weight_acts_strided success criterion is not very strict.""")
if np.abs(weights_grad_val - weights_grad_python).max() > 3.4e-5:
assert type(weights_grad_val) == type(weights_grad_python)
assert weights_grad_val.dtype == weights_grad_python.dtype
if weights_grad_val.shape != weights_grad_python.shape:
print 'cuda-convnet shape: ',weights_grad_val.shape
print 'python conv shape: ',weights_grad_python.shape
assert False
err = np.abs(weights_grad_val - weights_grad_python)
print 'stride %d'%stride
print 'absolute error range: ', (err.min(), err.max())
print 'mean absolute error: ', err.mean()
print 'cuda-convnet value range: ', (weights_grad_val.min(), weights_grad_val.max())
print 'python conv value range: ', (weights_grad_python.min(), weights_grad_python.max())
示例3: lmul
def lmul(self, x):
"""
dot(x, A)
aka, do convolution with input image x
"""
check_cuda(str(type(self)) + ".lmul")
# TODO Why is it CPU??
print "Por que?!?!", type(x)
cpu = "Cuda" not in str(type(x))
if cpu:
x = gpu_from_host(x)
assert x.ndim == 5
x_axes = self.input_axes
assert len(x_axes) == 5
op_axes = ("c", 0, 1, "t", "b")
if tuple(x_axes) != op_axes:
print "ssssssssssssssss"
x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])
_x_4d_shape = (
self.signal_shape[0],
self.signal_shape[1],
self.signal_shape[2],
self.signal_shape[3] * self.signal_shape[4],
)
x = x.reshape(_x_4d_shape)
x = gpu_contiguous(x)
rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(x, self._filters)
if cpu:
rval = host_from_gpu(rval)
rval = rval.reshape(
(
self.filter_shape[3],
self.filter_shape[4],
rval.shape[1],
rval.shape[2],
self.signal_shape[3],
self.signal_shape[4],
)
)
rval = diagonal_subtensor(rval, 4, 0).sum(axis=0)
# Format the output based on the output space
rval_axes = self.output_axes
assert len(rval_axes) == 5
if tuple(rval_axes) != op_axes:
rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes])
return rval
示例4: optimize
def optimize(node):
if isinstance(node.op, cuda.GpuFromHost):
# gpu_from_host(cpu_op) -> gpu_op(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, CpuOpCls):
cpu_op = host_input.owner.op
args = dict(zip(cpu_op.__props__, cpu_op._props()))
gpu_op = GpuOpCls(**args)
inputs = host_input.owner.inputs
out = gpu_op(*inputs)
return [out]
if isinstance(node.op, CpuOpCls):
# cpu_op(host_from_gpu) -> host_from_gpu(gpu_op)
def _is_variable_on_gpu(var):
return var.owner and isinstance(var.owner.op, cuda.HostFromGpu)
inputs = node.inputs
inputs_on_gpu = map(_is_variable_on_gpu, inputs)
if any(inputs_on_gpu):
cpu_op = node.op
args = dict(zip(cpu_op.__props__, cpu_op._props()))
gpu_op = GpuOpCls(**args)
out = gpu_op(*inputs)
out = cuda.host_from_gpu(out)
return [out]
return False
示例5: local_gpu_conv_transp3d
def local_gpu_conv_transp3d(node):
if isinstance(node.op, ConvTransp3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]):
if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
W, b, d, H, RShape = node.inputs
return [host_from_gpu(gpu_conv_transpd(W, b, d, H, RShape))]
示例6: local_to_gpu
def local_to_gpu(node):
"""
op(host_from_gpu()) -> host_from_gpu(op)
gpu_from_host(op) -> op(gpu_from_host)
"""
if isinstance(node.op, op):
#op(host_from_gpu()) -> host_from_gpu(op)
#If any of the input that go on the GPU are on the GPU,
#move the op to the gpu.
if any(node.inputs[idx].owner and
isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
for idx in to_gpu):
new_inp = list(node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [cuda.host_from_gpu(op()(*new_inp))]
if node.op == cuda.gpu_from_host:
#gpu_from_host(op) -> op(gpu_from_host)
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op,
op):
op_node = host_input.owner
new_inp = list(op_node.inputs)
for idx in to_gpu:
new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
return [op()(*new_inp)]
return False
示例7: local_gpu_Contiguous
def local_gpu_Contiguous(node):
if isinstance(node.op, Contiguous):
# see also: https://github.com/Theano/Theano/blob/master/theano/sandbox/cuda/opt.py
from theano.sandbox.cuda import host_from_gpu
x, = node.inputs
if x.owner and x.owner.op == host_from_gpu:
from theano.sandbox.cuda.basic_ops import gpu_contiguous
return [host_from_gpu(gpu_contiguous(x.owner.inputs[0]))]
示例8: local_gpu_TorchWrapper
def local_gpu_TorchWrapper(node):
if isinstance(node.op, TorchWrapperOp):
from theano.sandbox.cuda import host_from_gpu, gpu_from_host
args = node.inputs
if any([(x.owner and x.owner.op == host_from_gpu) for x in args]):
gpu_op = GpuTorchWrapperOp(**{key: getattr(node.op, key) for key in node.op.__props__})
args = [x.owner.inputs[0] if (x.owner and x.owner.op == host_from_gpu) else x
for x in args]
return [host_from_gpu(gpu_op(*args))]
示例9: test_grad
def test_grad():
rng = np.random.RandomState([2012, 10, 9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows
num_filters = 16
images = shared(rng.uniform(-1.0, 1.0, (channels, rows, cols, batch_size)).astype("float32"), name="images")
filters = shared(
rng.uniform(-1.0, 1.0, (channels, filter_rows, filter_cols, num_filters)).astype("float32"), name="filters"
)
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
output = FilterActs()(gpu_images, gpu_filters)
output = host_from_gpu(output)
# XXX: use verify_grad
output_grad = grad(output.sum(), images)
images_bc01 = images.dimshuffle(3, 0, 1, 2)
filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
filters_bc01 = filters_bc01[:, :, ::-1, ::-1]
output_conv2d = conv2d(images_bc01, filters_bc01, border_mode="valid")
output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)
# XXX: use verify_grad
output_conv2d_grad = grad(output_conv2d.sum(), images)
f = function([], [output_grad, output_conv2d_grad])
output_grad, output_conv2d_grad = f()
warnings.warn(
"""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
One possibility is that theano is numerically unstable and Alex's code is better.
Probably theano CPU 64 bit is OK but it's worth checking the others."""
)
if np.abs(output_grad - output_conv2d_grad).max() > 7.7e-6:
assert type(output_grad) == type(output_conv2d_grad)
assert output_grad.dtype == output_conv2d_grad.dtype
if output_grad.shape != output_conv2d_grad.shape:
print "cuda-convnet shape: ", output_grad.shape
print "theano shape: ", output_conv2d_grad.shape
assert False
err = np.abs(output_grad - output_conv2d_grad)
print "absolute error range: ", (err.min(), err.max())
print "mean absolute error: ", err.mean()
print "cuda-convnet value range: ", (output_grad.min(), output_grad.max())
print "theano value range: ", (output_conv2d_grad.min(), output_conv2d_grad.max())
assert False
示例10: test_match_valid_conv_strided
def test_match_valid_conv_strided():
# Tests that running FilterActs with stride is the same as running
# theano's conv2D in valid mode and then downsampling
rng = np.random.RandomState([2012,10,9])
batch_size = 5
rows = 9
cols = 9
channels = 3
filter_rows = 3
filter_cols = filter_rows
stride = 3
num_filters = 16
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
output = FilterActs(stride=stride)(gpu_images, gpu_filters)
output = host_from_gpu(output)
images_bc01 = images.dimshuffle(3,0,1,2)
filters_bc01 = filters.dimshuffle(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid', subsample=(stride, stride))
output_conv2d_orig = output_conv2d.dimshuffle(1,2,3,0)
output_conv2d = output_conv2d_orig # [:, ::stride, ::stride, :]
f = function([], [output, output_conv2d, output_conv2d_orig])
output, output_conv2d, output_conv2d_orig = f()
warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
One possibility is that theano is numerically unstable and Alex's code is better.
Probably theano CPU 64 bit is OK but it's worth checking the others.""")
if np.abs(output - output_conv2d).max() > 2.4e-6:
assert type(output) == type(output_conv2d)
assert output.dtype == output_conv2d.dtype
if output.shape != output_conv2d.shape:
print 'cuda-convnet shape: ',output.shape
print 'theano shape: ',output_conv2d.shape
assert False
err = np.abs(output - output_conv2d)
print 'absolute error range: ', (err.min(), err.max())
print 'mean absolute error: ', err.mean()
print 'cuda-convnet value range: ', (output.min(), output.max())
print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
assert False
示例11: test_match_valid_conv
def test_match_valid_conv():
# Tests that running FilterActs with no padding is the same as running
# theano's conv2D in valid mode
rng = np.random.RandomState([2012,10,9])
batch_size = 5
rows = 10
cols = 9
channels = 3
filter_rows = 4
filter_cols = filter_rows
num_filters = 16
images = shared(rng.uniform(-1., 1., (channels, rows, cols,
batch_size)).astype('float32'), name='images')
filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
filter_cols, num_filters)).astype('float32'), name='filters')
gpu_images = gpu_from_host(images)
gpu_filters = gpu_from_host(filters)
output = FilterActs()(gpu_images, gpu_filters)
output = host_from_gpu(output)
images_bc01 = images.dimshuffle(3,0,1,2)
filters_bc01 = filters.dimshuffle(3,0,1,2)
filters_bc01 = filters_bc01[:,:,::-1,::-1]
output_conv2d = conv2d(images_bc01, filters_bc01,
border_mode='valid')
output_conv2d = output_conv2d.dimshuffle(1,2,3,0)
try:
f = function([], [output, output_conv2d])
except:
raise KnownFailureTest("cuda-convnet code depends on an unmerged theano feature.")
output, output_conv2d = f()
warnings.warn("test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?")
if np.abs(output - output_conv2d).max() > 2.4e-6:
assert type(output) == type(output_conv2d)
assert output.dtype == output_conv2d.dtype
if output.shape != output_conv2d.shape:
print 'cuda-convnet shape: ',output.shape
print 'theano shape: ',output_conv2d.shape
assert False
err = np.abs(output - output_conv2d)
print 'absolute error range: ', (err.min(), err.max())
print 'mean absolute error: ', err.mean()
print 'cuda-convnet value range: ', (output.min(), output.max())
print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
assert False
示例12: local_gpu_togpu_breakpoint
def local_gpu_togpu_breakpoint(node):
if isinstance(node.op, Breakpoint):
result_input = node.inputs[0]
if result_input.owner and result_input.owner.op == host_from_gpu:
gpu_inputs = [x.owner.inputs[0]
if x.owner and x.owner.op == host_from_gpu
else x
for x in node.inputs]
return [host_from_gpu(node.op.make_gpu_node(*gpu_inputs))]
return False
示例13: insert_gpu_filter_acts
def insert_gpu_filter_acts(node):
if isinstance(node.op, FilterActs):
images, filters = node.inputs
if any_from_gpu(images, filters) or any_gpu_client(*node.outputs):
gpu_filter_acts = GpuFilterActs(
module_stride=node.op.module_stride,
partial_sum=1)
return [host_from_gpu(gpu_filter_acts(
gpu_from_host(images),
gpu_from_host(filters)))]
示例14: test_attention_time_gauss
def test_attention_time_gauss():
n_T = 4
n_batch = 2
n_inp_dim = 3
n_cells = 5
n_B = 5
custom_op = get_attention(RecurrentTransform.AttentionTimeGauss,
n_out=n_cells, n_batches=n_batch, n_input_t=n_B, n_input_dim=n_inp_dim)
att = custom_op.recurrent_transform
Z_val = numpy.random.ranf((n_T,n_batch,4*n_cells)).astype('float32')
W_re_val = numpy.random.ranf((n_cells, 4 * n_cells)).astype('float32')
W_att_quadr_val = numpy.eye(n_B).astype('float32')
W_att_in_val = numpy.random.ranf((n_cells, 4 * n_cells)).astype('float32')
B_val = numpy.random.ranf((n_B,n_batch,n_cells)).astype('float32')
c_val = numpy.random.ranf((n_batch, n_cells)).astype('float32')
y0_val = numpy.random.ranf((n_batch, n_cells)).astype('float32')
i_val = numpy.ones((n_T, n_batch), dtype='int8')
Z = T.ftensor3('Z')
B = T.ftensor3('B') #base
W_re = T.fmatrix('W_re')
W_att_quadr = T.fmatrix("W_att_quadr")
W_att_in = T.fmatrix('W_att_in')
c = T.fmatrix('c') #initial state
y0 = T.fmatrix('y0') #initial activation
i = T.matrix('i',dtype='int8')
t0 = T.fvector('t0')
custom_vars = att.get_sorted_custom_vars()
initial_state_vars = att.get_sorted_state_vars_initial()
custom_op_inputs = [Z, c, y0, i, W_re] + custom_vars + initial_state_vars
print("input args num:", len(custom_op_inputs))
print("input args:", custom_op_inputs)
custom_op_outputs = custom_op(*custom_op_inputs)
print("output args num:", len(custom_op_outputs))
custom_op_outputs = [cuda.host_from_gpu(v) for v in custom_op_outputs]
f = theano.function(inputs=[Z, c, y0, i, W_re], outputs=custom_op_outputs)
res = f(Z_val, c_val, y0_val, i_val, W_re_val)
#print res
# res: (output) Y, (gates and cell state) H, (final cell state) d, state vars sequences
(Y, H, d), state_var_seqs = res[:3], res[3:]
# print "running custom dumped data"
# custom_op_inputs = [theano.shared(numpy.load("../op.i.%i" % i)) for i in range(12)]
# custom_op_outputs = custom_op(*custom_op_inputs)
# custom_op_outputs = [cuda.host_from_gpu(v) for v in custom_op_outputs]
# f = theano.function(inputs=[], outputs=custom_op_outputs)
# res = f()
print(res)
assert False
示例15: local_gpu_conv_grad3d
def local_gpu_conv_grad3d(node):
if isinstance(node.op, ConvGrad3D):
if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
for i in node.inputs]):
if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
V, d, WShape, dCdH = node.inputs
return [host_from_gpu(gpu_conv_grad3d(
as_cuda_ndarray_variable(V),
d,
WShape,
as_cuda_ndarray_variable(dCdH)))]