本文整理汇总了Python中theano.compile.optdb.register函数的典型用法代码示例。如果您正苦于以下问题:Python register函数的具体用法?Python register怎么用?Python register使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了register函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: f
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
optdb.register(
name, TopoOptimizer(
local_opt, failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace', 'gpu', *tags)
return local_opt
示例2: __init__
def __init__(self, pool_shape, inplace, BCHW_grad_output):
pool_shape = tuple(pool_shape)
super(PoolHWBCOpGrad, self).__init__()
assert len(pool_shape) == 2, len(pool_shape)
assert pool_shape[0] > 0, pool_shape[0]
assert pool_shape[1] > 0, pool_shape[1]
if BCHW_grad_output:
assert inplace
self.pool_shape = pool_shape
self.inplace = inplace
self.BCHW_grad_output = BCHW_grad_output
if inplace:
self.destroy_map = {0: [0]}
#register optimization for this pool_shape
else:
if not hasattr(optdb, 'PoolHWBCOpGradInplaceOpt_registered'):
optdb.PoolHWBCOpGradInplaceOpt_registered = []
if pool_shape not in optdb.PoolHWBCOpGradInplaceOpt_registered:
PoolHWBCOpGradInplaceOpt = OpSub(self, PoolHWBCOpGrad(self.pool_shape, inplace=True, BCHW_grad_output=False))
optdb.PoolHWBCOpGradInplaceOpt_registered.append(pool_shape)
optdb.register('PoolHWBCOpGradInplaceOpt' + str(pool_shape),
theano.gof.TopoOptimizer(PoolHWBCOpGradInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace),
50.0, 'fast_run', 'inplace', 'gpuarray')
示例3: register_func
def register_func(recurrent_transform):
"""
:type recurrent_transform: RecurrentTransform.RecurrentTransformBase
"""
fn = recurrent_transform.name
key = (fn, id(recurrent_transform))
if key in function_ops:
return function_ops[key]
# register op
no_inpl = LSTMCustomOp(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform)
inpl = LSTMCustomOp(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform)
function_ops[key] = no_inpl
# hack to avoid being called twice
attr = 'LSTMCustomMOpInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
if not hasattr(optdb, attr):
opt = OpSub(no_inpl, inpl)
optdb.register(attr, theano.gof.TopoOptimizer(opt),
50.0, 'fast_run', 'inplace', 'gpuarray')
setattr(optdb, attr, True)
# the same for grad
no_inpl = LSTMCustomOpGrad(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform)
inpl = LSTMCustomOpGrad(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform)
grad_ops[key] = no_inpl
# hack to avoid being called twice
attr = 'LSTMCustomMOpGradInplaceOpt_%s_%i' % (fn, id(recurrent_transform))
if not hasattr(optdb, attr):
opt = OpSub(no_inpl, inpl)
optdb.register(attr, theano.gof.TopoOptimizer(opt),
50.0, 'fast_run', 'inplace', 'gpuarray')
setattr(optdb, attr, True)
return function_ops[key]
示例4: EquilibriumDB
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
# Don't register this right now
conv_groupopt = LocalGroupDB()
conv_groupopt.__name__ = "gpua_conv_opts"
gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
'fast_compile', 'fast_run', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
'fast_compile', 'fast_run', 'gpuarray')
# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1,
'gpuarray')
def register_opt(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
return local_opt
return f
def register_inplace(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
optdb.register(
示例5: len
if len(nw_inner) != len(op_ins):
op_outs = scan_utils.clone(op_outs, replace=givens)
nw_info = op.info.copy()
nw_info['n_seqs'] = nw_n_seqs
# DEBUG CHECK
nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
nw_outs = nwScan.make_node(*nw_outer).outputs
return nw_outs
else:
return False
scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register('scan_seqopt', scan_seqopt, 1.6, 'fast_run', 'scan')
scan_seqopt.register('scanOp_remove_constants_and_unused_inputs',
opt.in2out(remove_constants_and_unused_inputs_scan,
ignore_newtrees=True),
5,
'fast_run',
'scan')
# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
def __init__(self):
gof.Optimizer.__init__(self)
示例6: local_abstractconv_check
conv_groupopt.register('local_conv2d_gradinputs_cpu',
local_conv2d_gradinputs_cpu, 40,
'fast_compile', 'fast_run')
# Verify that no AbstractConv are present in the graph
@local_optimizer([AbstractConv2d,
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs])
def local_abstractconv_check(node):
if isinstance(node.op, AbstractConv2d):
raise AssertionError(
'AbstractConv2d theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer? Is cudnn available and does the GPU support it?')
elif isinstance(node.op, AbstractConv2d_gradWeights):
raise AssertionError(
'AbstractConv2d_gradWeights theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer? Is cudnn available and does the GPU support it?')
elif isinstance(node.op, AbstractConv2d_gradInputs):
raise AssertionError(
'AbstractConv2d_gradInputs theano optimization failed. '
'Did you exclude both "conv_dnn" and "conv_gemm" from '
'the optimizer? Is cudnn available and does the GPU support it?')
optdb.register('AbstracConvCheck',
opt.in2out(local_abstractconv_check,
name="AbstractConvCheck"),
48.7, 'fast_compile', 'fast_run')
示例7: EquilibriumDB
GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmax,
GpuSoftmaxWithBias,
)
from theano.compile import optdb
from theano.tensor.blas import _is_real_vector, _is_real_matrix
# optdb.print_summary() # shows what is currently registered
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
gpu_seqopt.register("gpu_local_optimizations", gpu_optimizer, 1, "fast_run", "inplace")
gpu_seqopt.register("gpu_cut_transfers", gpu_cut_copies, 2, "fast_run", "gpu")
optdb.register("gpu_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpu")
# This second pass is needed as the fusion can put all the non float32 code
# inside the elemwise. When it there is no float64 op, this is working.
optdb.register("gpu_after_fusion", ProxyDB(gpu_seqopt), optdb.__position__.get("elemwise_fusion", 71) + 0.1, "gpu")
def register_opt(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop("name")) or local_opt.__name__
gpu_optimizer.register(name, local_opt, "fast_run", "inplace", *tags)
return local_opt
return f
# register local_track_shape_i at this level too
示例8: locals
}
""" % locals()
#!!! change this when changing the code!
def c_code_cache_version(self):
return 1, 5
LSTMOpGradNoInplaceInstance = LSTMOpGrad(inplace=False)
LSTMOpGradInplaceInstance = LSTMOpGrad(inplace=True)
LSTMOpGradInplaceOpt = OpSub(LSTMOpGradNoInplaceInstance, LSTMOpGradInplaceInstance)
#hack to avoid being called twice
if not hasattr(optdb, 'LSTMOpGradInplaceOpt_registered'):
optdb.register('LSTMOpGradInplaceOpt', theano.gof.TopoOptimizer(LSTMOpGradInplaceOpt),
50.0, 'fast_run', 'inplace', 'gpuarray')
optdb.LSTMOpGradInplaceOpt_registered = True
#------------------------
class LSTMOp(theano.sandbox.cuda.GpuOp):
def __init__(self, inplace):
self.inplace = inplace
if inplace:
#all outputs operate inplace on input 0 (which is Z)
#but when the input is marked multiple times, we get an error
#so we only mark that output 0 destroys input 0
#anyway theano knows that input 0 will be destroyed, so it should be OK
#TODO
self.destroy_map = {0: [0]}
示例9: EquilibriumDB
GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20)
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
'fast_compile', 'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
'fast_compile', 'fast_run', 'gpuarray')
# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1,
'gpuarray')
def register_opt(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
return local_opt
return f
register_opt('fast_compile')(theano.tensor.opt.local_track_shape_i)
gpu_optimizer.register('local_remove_all_assert',
theano.tensor.opt.local_remove_all_assert,
'unsafe')
示例10: len
if len(nw_inner) != len(op_ins):
op_outs = scan_utils.clone(op_outs, replace=givens)
nw_info = op.info.copy()
nw_info["n_seqs"] = nw_n_seqs
# DEBUG CHECK
nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
nw_outs = nwScan.make_node(*nw_outer).outputs
return nw_outs
else:
return False
scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register("scan_seqopt", scan_seqopt, 1.6, "fast_run", "scan")
scan_seqopt.register(
"scanOp_remove_constants_and_unused_inputs",
opt.in2out(remove_constants_and_unused_inputs_scan, ignore_newtrees=True),
5,
"fast_run",
"scan",
)
# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
def __init__(self):
gof.Optimizer.__init__(self)
示例11: local_gemm16_alpha_merge
@opt.register_opt()
@alpha_merge(Gemm16, alpha_in=1, beta_in=4)
def local_gemm16_alpha_merge(node, *inputs):
return [Gemm16(relu=node.op.relu)(*inputs)]
@opt.register_opt()
@output_merge(Gemm16, alpha_in=1, beta_in=4, out_in=0)
def local_gemm16_output_merge(node, *inputs):
return [Gemm16(relu=node.op.relu)(*inputs)]
@local_optimizer([Gemm16], inplace=True)
def local_gemm16_inplace(node):
if type(node.op) != Gemm16 or node.op.inplace:
return
inputs = list(node.inputs)
C = inputs[0]
if (C.owner and
isinstance(C.owner.op, GpuAllocEmpty) and
len(C.clients) > 1):
inputs[0] = C.owner.op(*C.owner.inputs)
return [Gemm16(relu=node.op.relu, inplace=True)(*inputs)]
optdb.register('local_gemm16_inplace',
tensor.opt.in2out(local_gemm16_inplace,
name='local_gemm16_inplace'),
70.0, 'fast_run', 'inplace', 'gpuarray')
示例12: len
if len(nw_inner) != len(op_ins):
op_outs = scan_utils.clone(op_outs, replace=givens)
nw_info = copy.deepcopy(op.info)
nw_info["n_seqs"] = nw_n_seqs
# DEBUG CHECK
nwScan = scan_op.Scan(nw_inner, op_outs, nw_info)
nw_outs = nwScan.make_node(*nw_outer).outputs
return nw_outs
else:
return False
scan_seqopt = theano.gof.SequenceDB()
# We run before blas opt at 1.7 and specialize 2.0
# but after stabilize at 1.5. Should we put it before stabilize?
optdb.register("scan_seqopt", scan_seqopt, 1.6, "fast_run", "scan")
scan_seqopt.register(
"scanOp_remove_constants_and_unused_inputs",
opt.in2out(remove_constants_and_unused_inputs_scan, ignore_newtrees=True),
5,
"fast_run",
"scan",
)
# This is a global opt for historical reason
# It should be possible to change it to a local opt.
class PushOutNonSeqScan(gof.Optimizer):
def __init__(self):
gof.Optimizer.__init__(self)
示例13: local_dnn_convgi_inplace
@local_optimizer([GpuDnnConvGradI], inplace=True)
def local_dnn_convgi_inplace(node):
if type(node.op) != GpuDnnConvGradI or node.op.inplace:
return
inputs = list(node.inputs)
dest = inputs[2]
if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and
len(dest.clients) > 1):
inputs[2] = GpuAllocEmpty(dest.owner.op.dtype)(*dest.owner.inputs)
return [GpuDnnConvGradI(algo=node.op.algo, inplace=True)(*inputs)]
optdb.register('local_dnna_conv_inplace',
tensor.opt.in2out(local_dnn_conv_inplace,
local_dnn_convgw_inplace,
local_dnn_convgi_inplace,
name="local_dnn_conv_inplace"),
70.0, 'fast_run', 'inplace', 'gpuarray', 'cudnn')
@register_opt('cudnn')
@alpha_merge(GpuDnnConv, alpha_in=4, beta_in=5, nd=4)
def local_dnn_conv_alpha_merge(node, *inputs):
return [GpuDnnConv(algo=node.op.algo)(*inputs)]
@register_opt('cudnn')
@alpha_merge(GpuDnnConvGradW, alpha_in=4, beta_in=5, nd=4)
def local_dnn_convw_alpha_merge(node, *inputs):
return [GpuDnnConvGradW(algo=node.op.algo)(*inputs)]
示例14: EquilibriumDB
from basic_ops import host_from_gpu, gpu_from_host, gpu_alloc
from elemwise import GpuElemwise, _is_scalar
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1,
'fast_run', 'inplace', 'gpuarray')
gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2,
'fast_run', 'gpuarray')
# do not add 'fast_run' to these two as this would always enable gpuarray mode
optdb.register('gpuarray_opt', gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1,
'gpuarray')
def register_opt(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
gpu_optimizer.register(name, local_opt, 'fast_run', 'gpuarray', *tags)
return local_opt
return f
register_opt()(theano.tensor.opt.local_track_shape_i)
class InputToGpuOptimizer(Optimizer):
"Transfer the input to the gpu to start the rolling wave."
def add_requirements(self, fgraph):
示例15: isinstance
"""
if isinstance(size, tuple):
msg = "size must be a tuple of int or a Theano variable"
assert all([isinstance(i, int) or isinstance(i, Variable) for i in size]), msg
else:
msg = "size must be a tuple of int or a Theano variable"
assert isinstance(size, Variable) and size.ndim == 1, msg
generator = theano.shared(False) # makes a generic
s_size = theano.tensor.as_tensor_variable(size)
u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size, self.next_seed())
self.state_updates.append(u.update)
rval = u * std + avg
if u.type.broadcastable != rval.type.broadcastable:
raise NotImplementedError(
"Increase the size to match the broadcasting pattern of `low`" "and `high` arguments"
)
return rval
@local_optimizer([CURAND_Base])
def local_destructive(node):
op = node.op
if isinstance(op, CURAND_Base) and not op.destructive:
# op might be gpu version
new_op = op.as_destructive()
return new_op.make_node(*node.inputs).outputs
return False
optdb.register("CURAND_destructive", opt.in2out(local_destructive, ignore_newtrees=True), 99, "fast_run", "inplace")