本文整理汇总了Python中theano.sandbox.cuda.basic_ops.gpu_from_host函数的典型用法代码示例。如果您正苦于以下问题:Python gpu_from_host函数的具体用法?Python gpu_from_host怎么用?Python gpu_from_host使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了gpu_from_host函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: use_gpu_cumsum
def use_gpu_cumsum(node):
if type(node.op) is CumOp \
and node.inputs[0].dtype == 'float32' \
and node.inputs[0].owner \
and isinstance(node.inputs[0].owner.op, HostFromGpu):
if node.op.mode != 'add':
return None
axis = node.op.axis
x = node.inputs[0]
if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
return None
x = gpu_from_host(x)
if axis is None and x.ndim > 1:
x = gpu_flatten(x)
# ``gpu_cumsum`` assume array has been flattened if needed.
if axis is None:
axis = 0
ret = host_from_gpu(GpuCumsum(axis)(x))
ret.tag.values_eq_approx = values_eq_approx_high_tol
return [ret]
示例2: use_gpu_images2neibs
def use_gpu_images2neibs(node):
if type(node.op) is Images2Neibs:
return [
host_from_gpu(
gpu_images2neibs(gpu_from_host(node.inputs[0]), node.inputs[1], node.inputs[2], mode=node.op.mode)
)
]
示例3: use_gpu_images2neibs
def use_gpu_images2neibs(node):
if (type(node.op) is Images2Neibs and
node.inputs[0].dtype == 'float32' and
node.op.mode in ['valid', 'wrap_centered']):
return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
node.inputs[1], node.inputs[2],
mode=node.op.mode))]
示例4: local_gpu_minres
def local_gpu_minres(node):
if isinstance(node.op, MinresQLP):
sw = False
for inp in node.inputs:
if inp.owner and inp.owner.op == host_from_gpu:
sw = True
if sw:
inps = node.inputs
nw_inps = []
for inp in inps:
if not isinstance(inp.type, CudaNdarrayType):
nw_inps.append(gpu_from_host(inp))
else:
nw_inps.append(inp)
new_op = node.op
new_op.gpu = 1
_new_outs = node.op(*nw_inps)
new_outs = []
for out in _new_outs:
if isinstance(out.type, CudaNdarrayType):
new_outs.append(host_from_gpu(out))
else:
new_outs.append(out)
return new_outs
else:
return False
示例5: __init__
def __init__(self, **kwargs):
self.num_layers = kwargs.get('num_layers', None)
self.num_filters = kwargs.get('num_filters', None)
self.filter_size = kwargs.get('filter_size', None)
self.rng = kwargs.get('rng', np.random.RandomState(42))
self.load_folder = kwargs.get('weights_folder', None)
self.activation = kwargs.get('activation', 'relu')
self.cost_func = kwargs.get('cost_func', 'MSE')
#Initialize (or load) the weights for the network
if(self.load_folder == None):
try:
assert (self.num_layers != None) and (self.num_filters != None) and (self.filter_size != None)
self.__define_network()
self.__init_weights()
except:
print "ERROR: Insufficient parameters for generating new network"
sys.exit(0)
else:
self.__load_weights()
#Input and Target variables for symbolic representation of network
self.X = T.tensor4('X')
#Create the network model
self.__model()
if(theano.config.device == 'cpu'):
#Create a predicter based on this network model
self.forward = theano.function(inputs=[self.X], outputs=self.out, allow_input_downcast=True)
else:
#Create a predicter based on this network model
self.forward = theano.function(inputs=[self.X], outputs=Out(gpu_from_host(self.out), borrow=True), allow_input_downcast=True)
示例6: ctc_cost
def ctc_cost(acts, input_lengths, flat_labels, label_lengths):
# This should be properly integrated into the theano optimization catalog.
# Until then, this forces the choice based on device configuration.
if theano.config.device.startswith("gpu") or theano.sandbox.cuda.cuda_enabled:
if not isinstance(acts.type, CudaNdarrayType): # if not already on the device
acts = gpu_from_host(acts) # this should get optimized away
return gpu_ctc_cost(acts, input_lengths, flat_labels, label_lengths)
else:
return cpu_ctc_cost(acts, input_lengths, flat_labels, label_lengths)
示例7: local_gpu_argmax
def local_gpu_argmax(node):
if type(node.op) is KArgmax:
p, = node.inputs
vals, indx, = node.outputs
if (p.dtype == vals.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu) for i in node.inputs])):
gpu_op = GpuKArgmax(node.op.K)
ret_vals, ret_indx = gpu_op(gpu_from_host(p))
return [host_from_gpu(ret_vals), T.cast(host_from_gpu(ret_indx), "int32")]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and type(node.inputs[0].owner.op)
is KArgmax):
multi = node.inputs[0].owner
p, = multi.inputs
vals, indx, = multi.outputs
if (p.dtype == vals.dtype == 'float32'):
gpu_op = GpuKArgmax(node.inputs[0].owner.op.K)
ret_vals, ret_indx = gpu_op(gpu_from_host(p))
return [gpu_from_host(ret_vals), gpu_from_host(ret_indx)]
示例8: local_assigner
def local_assigner(node):
if type(node.op) is Assigner:
p, indx, gr, = node.inputs
vals, = node.outputs
if (p.dtype == vals.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu) for i in node.inputs])):
gpu_op = GpuAssigner()
ret = gpu_op(gpu_from_host(p),indx,gpu_from_host(gr))
return [host_from_gpu(ret),]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and type(node.inputs[0].owner.op)
is Assigner):
multi = node.inputs[0].owner
p,indx,gr = multi.inputs
vals, = multi.outputs
if (p.dtype == vals.dtype == 'float32'):
gpu_op = GpuAssigner()
ret_vals = gpu_op(gpu_from_host(p),indx,gpu_from_host(gr))
return [gpu_from_host(ret_vals)]
示例9: local_gpu_multinomial
def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform:
p, u = node.inputs
m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu)
for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform):
multi = node.inputs[0].owner
p, u = multi.inputs
m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
# The dimshuffle is on the cpu, but will be moved to the gpu by an opt.
return [gpu_from_host(ret)]
示例10: local_gpu_multinomial
def local_gpu_multinomial(node):
# TODO : need description for function
if type(node.op) is MultinomialFromUniform:
if len(node.inputs) == 2:
p, u = node.inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op,
theano.sandbox.cuda.HostFromGpu)
for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i)
for i in [p, u]])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and
type(node.inputs[0].owner.op) is MultinomialFromUniform):
multi = node.inputs[0].owner
if len(node.inputs) == 2:
p, u = node.inputs
n_samples = 1
else:
p, u, n_samples = node.inputs
try:
if get_scalar_constant_value(n_samples) != 1:
return None
except NotScalarConstantError:
return None
m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in [p, u]]).T
# The dimshuffle is on the cpu, but will be moved to the
# gpu by an opt.
return [gpu_from_host(ret)]
示例11: use_gpu_images2neibs
def use_gpu_images2neibs(node):
if (
type(node.op) is Images2Neibs
and node.inputs[0].dtype == "float32"
and node.op.mode in ["valid", "ignore_borders", "wrap_centered"]
):
return [
host_from_gpu(
gpu_images2neibs(gpu_from_host(node.inputs[0]), node.inputs[1], node.inputs[2], mode=node.op.mode)
)
]
示例12: parse_args
def parse_args(self, bottom, top):
function_str = self.pythonargs[0]
top_shape = self.pythonargs[1]
old_function_str = self.function_str
old_top_shape = self.top_shape
self.function_str = function_str
self.top_shape = top_shape
if function_str != old_function_str or len(top_shape) != len(old_top_shape):
if old_function_str != '':
print('TheanoGPU function string different from cache: recompiling')
import theano.tensor as T
import theano
from theano.sandbox.cuda.basic_ops import gpu_from_host
x = []
for i in range(len(bottom)):
if len(bottom[i].shape) == 1:
x.append(T.vector('x%d' % i))
if len(bottom[i].shape) == 2:
x.append(T.matrix('x%d' % i))
if len(bottom[i].shape) == 3:
x.append(T.tensor3('x%d' % i))
if len(bottom[i].shape) == 4:
x.append(T.tensor4('x%d' % i))
y = eval(function_str)
self.f = theano.function(x, gpu_from_host(y), on_unused_input='ignore')
if len(self.top_shape) == 1:
v = T.vector('v')
elif len(self.top_shape) == 2:
v = T.matrix('v')
elif len(self.top_shape) == 3:
v = T.tensor3('v')
elif len(self.top_shape) == 4:
v = T.tensor4('v')
self.b = []
for i in range(len(bottom)):
yg = T.Lop(y, x[i], v)
self.b.append(theano.function(x + [v], gpu_from_host(yg), on_unused_input='ignore'))
示例13: compileModel
def compileModel(data, nInputs, nOutputs, hiddenLayersSize = [1200, 1200], dropoutRates = [0.2, 0.5, 0.5],
activation = 'relu', weightInitMode = 'normal', regularizer = 0.0001):
"""
Creates a symbolic model given the specified parameters using Theano
Output:
A list containing three the training, validation and test compiled functions of Theano
"""
np.random.seed(815)
x = T.matrix('x')
y = T.wvector('y')
learningRate = T.scalar('learningRate')
regularization = T.scalar('regularization')
#Data sets
train_x, train_y = data[0]
valid_x, valid_y = data[1]
test_x, test_y = data[2]
nnet = MLP(x, nInputs, hiddenLayersSize, nOutputs, dropoutRates = dropoutRates,
activation = activation, weightInitMode = weightInitMode)
loss = nnet.loss(y, regularization)
error = nnet.error(y)
gParams = T.grad(loss, nnet.params)
weightUpdates = [(param, param - learningRate * gParam) for param, gParam in zip(nnet.params, gParams)]
batchIndicesVecctor = T.ivector('batchIndicesVecctor')
trainF = function([batchIndicesVecctor, learningRate, regularization], Out(sbasic.gpu_from_host(loss), borrow = True), updates = weightUpdates, givens = {x: train_x[batchIndicesVecctor], y: train_y[batchIndicesVecctor]})
validF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: valid_x[batchIndicesVecctor], y: valid_y[batchIndicesVecctor]})
testF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: test_x[batchIndicesVecctor], y: test_y[batchIndicesVecctor]})
return [trainF, validF, testF]
示例14: parse_args
def parse_args(self, bottom, top):
function_str = self.pythonargs[0]
top_shape = self.pythonargs[1]
if self.function_str != function_str or self.top_shape != top_shape:
self.function_str = function_str
self.top_shape = top_shape
import theano.tensor as T
import theano
from theano.sandbox.cuda.basic_ops import gpu_from_host
x = []
for i in range(len(bottom)):
if len(bottom[i].shape) == 1:
x.append(T.vector('x%d' % i))
if len(bottom[i].shape) == 2:
x.append(T.matrix('x%d' % i))
if len(bottom[i].shape) == 3:
x.append(T.tensor3('x%d' % i))
if len(bottom[i].shape) == 4:
x.append(T.tensor4('x%d' % i))
y = eval(function_str)
self.f = theano.function(x, gpu_from_host(y), on_unused_input='ignore')
if len(self.top_shape) == 1:
v = T.vector('v')
elif len(self.top_shape) == 2:
v = T.matrix('v')
elif len(self.top_shape) == 3:
v = T.tensor3('v')
elif len(self.top_shape) == 4:
v = T.tensor4('v')
self.b = []
for i in range(len(bottom)):
yg = T.Lop(y, x[i], v)
self.b.append(theano.function(x + [v], gpu_from_host(yg), on_unused_input='ignore'))
示例15: grad_step
def grad_step(*args):
idx = TT.cast(args[0], 'int32')
nw_inps = [x[idx * options['cbs']: \
(idx + 1) * options['cbs']]
for x in loc_inputs]
replace = dict(zip(model.inputs, nw_inps))
nw_cost = safe_clone(model.train_cost, replace=replace)
gs = TT.grad(nw_cost, model.params)
nw_gs = [op + np for op, np in zip(args[2: 2 + n_params], gs)]
_gs = [x for x in gs]
_nw_gs = [gpu_from_host(g) for g in nw_gs]
nw_gs = ifelse(comp_grad, _nw_gs, _gs, gpu=True)
nw_gs = [x.type.filter_variable(y) for x,y in zip(args[2:],nw_gs)]
return [args[0] + const(1), args[1] + nw_cost] + nw_gs