本文整理汇总了Python中veles.memory.Array.unmap方法的典型用法代码示例。如果您正苦于以下问题:Python Array.unmap方法的具体用法?Python Array.unmap怎么用?Python Array.unmap使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类veles.memory.Array
的用法示例。
在下文中一共展示了Array.unmap方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MemCpy
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
class MemCpy(AcceleratedUnit):
def __init__(self, workflow, **kwargs):
super(MemCpy, self).__init__(workflow, **kwargs)
self.output = Array()
self.demand("input")
def initialize(self, device, **kwargs):
super(MemCpy, self).initialize(device, **kwargs)
if (self.output.mem is None or
self.output.mem.size != self.input.mem.size):
self.output.reset()
self.output.mem = numpy.zeros(self.input.mem.shape,
dtype=self.input.mem.dtype)
self.input.initialize(self.device)
self.output.initialize(self.device)
def cuda_init(self):
pass
def ocl_init(self):
pass
def _gpu_run(self):
self.input.unmap()
self.output.unmap()
def ocl_run(self):
self._gpu_run()
self.device.queue_.copy_buffer(self.input.devmem, self.output.devmem,
0, 0, self.input.nbytes)
def cuda_run(self):
self._gpu_run()
self.output.devmem.from_device_async(self.input.devmem)
def numpy_run(self):
self.input.map_read()
self.output.map_invalidate()
numpy.copyto(self.output.mem, self.input.mem)
示例2: OffsetPooling
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
class OffsetPooling(Pooling):
"""Pooling by offset forward propagation.
Must be assigned before initialize():
Updates after run():
input_offset
Creates within initialize():
input_offset
Attributes:
input_offset: offsets in the input where elements are passed through.
"""
MAPPING = set()
hide_from_registry = True
def __init__(self, workflow, **kwargs):
super(OffsetPooling, self).__init__(workflow, **kwargs)
self.input_offset = Array()
self.demand("input")
def initialize(self, device, **kwargs):
super(OffsetPooling, self).initialize(device=device, **kwargs)
if self._no_output:
return
if self.input_offset:
assert self.input_offset.shape[1:] == self.output.shape[1:]
if (not self.input_offset or
self.input_offset.shape[0] != self.output.shape[0]):
self.input_offset.reset(numpy.zeros(self.output.shape,
dtype=numpy.int32))
self.input_offset.initialize(self.device)
def set_args(self, *args):
super(OffsetPooling, self).set_args(self.input, self.output,
self.input_offset, *args)
def ocl_run(self):
self.input_offset.unmap()
super(OffsetPooling, self).ocl_run()
def cuda_run(self):
self.input_offset.unmap()
super(OffsetPooling, self).cuda_run()
def numpy_run(self):
self.input_offset.map_invalidate()
super(OffsetPooling, self).numpy_run()
def numpy_run_cut(self, cut, coords):
batch, y1, x1, ch, out_y, out_x = coords
cut_index = self.numpy_run_cut_offset(
cut, numpy.ravel_multi_index((batch, out_y, out_x, ch),
self.output.shape))
i, j = numpy.unravel_index(cut_index, cut.shape)
idx = numpy.ravel_multi_index((batch, y1 + i, x1 + j, ch),
self.input.shape)
val = numpy.ravel(self.input.mem)[idx]
self.input_offset.mem[batch, out_y, out_x, ch] = idx
return val
示例3: Deconv
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
#.........这里部分代码省略.........
self._global_size_hits = (self.output.size,)
self._local_size_hits = None
self.krn_clear_output_ = self.get_kernel("clear_output")
self.krn_clear_output_.set_arg(0, self.output.devmem)
self._clear_output = lambda: (
self.execute_kernel((self.output.size,), None,
self.krn_clear_output_))
self._clear_hits = lambda: (
self.execute_kernel((self.hits.size,), None, self.krn_clear_hits_))
self._process_subblock = self._ocl_process_subblock
self.krn_pack_.set_arg(1, self.output.devmem)
def cuda_init(self):
self._gpu_init(cublas.CUBLAS)
block_size = self.device.suggest_block_size(self.krn_pack_)
self._global_size_pack = (
lambda size: (int(numpy.ceil(size / block_size)), 1, 1))
self._local_size_pack = (block_size, 1, 1)
if self.hits:
block_size = self.device.suggest_block_size(self.krn_apply_hits_)
self._global_size_hits = (
int(numpy.ceil(self.output.size / block_size)), 1, 1)
self._local_size_hits = (block_size, 1, 1)
self._clear_output = lambda: self.output.devmem.memset32_async()
self._clear_hits = lambda: self.hits.devmem.memset32_async()
self._process_subblock = self._cuda_process_subblock
def ocl_run(self):
self.gpu_run()
def cuda_run(self):
self.gpu_run()
def gpu_run(self):
self.unmap_vectors(self.output, self.input, self.weights)
unpack_data = self.device.get_temp_buffer()
self._clear_output()
if self.hits:
self.hits.unmap()
self._clear_hits()
batch_size = self.output.shape[0]
for i in range(0, batch_size, self.unpack_size):
self._process_subblock(i, min(batch_size - i, self.unpack_size),
unpack_data)
if self.hits:
self.execute_kernel(self._global_size_hits, self._local_size_hits,
self.krn_apply_hits_)
def _cuda_process_subblock(self, start_image, image_count, unpack_data):
output_offs = (start_image * self.input.sample_size *
self.input.itemsize)
unpack_side = self._kernel_app_per_image * image_count
self.gemm_(
self.device.blas, cublas.CUBLAS_OP_T if self.weights_transposed
else cublas.CUBLAS_OP_N, cublas.CUBLAS_OP_N,
self._kernel_size, unpack_side, self.weights_shape[0],
self.np_one, self.weights.devmem,
int(self.input.devmem) + output_offs,
self.np_zero, unpack_data)
self.krn_pack_.set_arg(0, unpack_data)
self.krn_pack_.set_arg(
1, int(self.output.devmem) +
start_image * self.output.sample_size * self.output.itemsize)
limit = unpack_side * self._kernel_size
self._const_i[0] = limit
self.krn_pack_.set_arg(2, self._const_i)
self.execute_kernel(self._global_size_pack(limit),
self._local_size_pack, self.krn_pack_)
def _ocl_process_subblock(self, start_image, image_count, unpack_data):
output_offs = start_image * self.input.sample_size
unpack_side = self._kernel_app_per_image * image_count
self.gemm_(
self.device.blas, cublas.CUBLAS_OP_T if self.weights_transposed
else cublas.CUBLAS_OP_N, cublas.CUBLAS_OP_N,
self._kernel_size, unpack_side, self.weights_shape[0],
self.np_one, self.weights.devmem,
self.input.devmem,
self.np_zero, unpack_data, offsetB=output_offs)
self.krn_pack_.set_arg(0, unpack_data)
self._const_i[0] = start_image * self.output.sample_size
self.krn_pack_.set_arg(2, self._const_i)
limit = unpack_side * self._kernel_size
self.execute_kernel(self._global_size_pack(limit),
self._local_size_pack, self.krn_pack_)
def numpy_run(self):
raise NotImplementedError()
示例4: GradientDescentBase
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
#.........这里部分代码省略.........
dtype = self.err_output.dtype
if self.need_err_input:
if not self.err_input:
self.err_input.reset(numpy.zeros(self.input.shape, dtype))
else:
assert self.err_input.shape == self.input.shape
if self.weights:
side = self.weights_shape[0]
other = self.weights.size // side
if self.factor_ortho:
if not self.col_sums:
self.col_sums.reset(numpy.zeros(other, dtype=dtype))
else:
assert self.col_sums.size == other
self.col_sums.initialize(self.device)
self.reduce_size = roundup(min(self.reduce_size, other), 32)
self.weights.initialize(self.device)
for vec in self.bias, self.input, self.err_input:
if vec:
vec.initialize(self.device)
self.init_vectors(
self.err_output,
self.gradient_weights,
self.gradient_bias,
self.accumulated_gradient_weights,
self.accumulated_gradient_bias,
self.gradient_weights_with_moment,
self.gradient_bias_with_moment,
)
def gpu_weights_update(self):
self.unmap_vectors(
self.input,
self.err_output,
self.weights,
self.gradient_weights,
self.accumulated_gradient_weights,
self.gradient_weights_with_moment,
)
if self.factor_ortho:
self.col_sums.unmap()
self.execute_kernel(self._global_size_ortho, self._local_size_ortho, self.krn_compute_col_sums_)
self._weights_const[12] = self.factor_ortho
self.krn_weights_.set_arg(12, self._weights_const[12:13])
self._weights_const[4:12] = (
self.learning_rate,
self.weights_decay,
self.l1_vs_l2,
self.gradient_moment,
self.acc_alpha,
self.acc_beta,
self.gd_alpha,
self.gd_beta,
)
self.krn_weights_.set_args(
self.device.skip(4),
self._weights_const[4:5],
self._weights_const[5:6],
self._weights_const[6:7],
self._weights_const[7:8],
self._weights_const[8:9],
示例5: KohonenForward
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
#.........这里部分代码省略.........
if chunk_size < 2:
chunk_size = self.neurons_number // 2 + 1
self.argmin_group_size = \
int(numpy.ceil(self.neurons_number / chunk_size))
block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
kernel="matrix_multiplication", dtype=self.input.dtype)
defines = {
'BLOCK_SIZE': block_size,
'VECTOR_OPT': int(bool(vector_opt)),
'BATCH': batch_size,
'SAMPLE_LENGTH': self.sample_length,
'NEURONS_NUMBER': self.neurons_number,
'CHUNK_SIZE': chunk_size,
'COPY_CHUNK_SIZE': copy_chunk_size,
}
if self.weights_transposed:
defines['WEIGHTS_TRANSPOSED'] = 1
self.build_program(defines, "%s_%d_%d_%d" %
(self.__class__.__name__,
batch_size, self.sample_length,
self.neurons_number),
dtype=self.weights.mem.dtype)
if self.total is not None:
self._set_total_global_size_ = \
[int(numpy.ceil(batch_size / copy_chunk_size))]
self._krn_set_total_ = self.get_kernel("set_total")
self._krn_set_total_.set_args(self.output.devmem, cl.skip,
self.total.devmem)
if self.argmins is not None:
return
self._krn_distances_ = self.get_kernel("calculate_distances")
self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
self._distances.devmem)
self._krn_argmin_ = self.get_kernel("calculate_argmin")
self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
None)
self._gs_distance = [
roundup(self.neurons_number, block_size),
roundup(batch_size, block_size)]
self._ls_distance = [block_size, block_size]
def ocl_run(self):
self.output.unmap()
if self.total is not None:
self.total.unmap()
if self.argmins is None:
self.input.unmap()
self.weights.unmap()
self.execute_kernel(self._gs_distance, self._ls_distance,
self._krn_distances_)
self.execute_kernel([self.argmin_group_size],
[self.argmin_group_size],
self._krn_argmin_)
else:
self.argmins.unmap()
self.argmins.map_read()
self.output.map_write()
self.output.mem[:] = self.argmins.mem
self.output.unmap()
self.argmins.unmap()
if self.total is not None:
self._minibatch_offset_[0] = \
self.minibatch_offset - self.minibatch_size
self._krn_set_total_.set_arg(1, self._minibatch_offset_)
self.execute_kernel(self._set_total_global_size_, None,
self._krn_set_total_)
def numpy_run(self):
self.output.map_invalidate()
if self.argmins is not None:
self.argmins.map_read()
self.output.mem[:] = self.argmins.mem
else:
self.input.map_read()
self.weights.map_read()
if self.total is not None:
self.total.map_invalidate()
length = self.minibatch_size if self.total is not None \
else self.input.mem.shape[0]
for sindex in range(length):
if self.argmins is None:
dist = self.weights.mem - self.input[sindex]
winner = numpy.argmin(self.numpy_linalg_norm(dist))
self.output[sindex] = winner
else:
winner = self.argmins[sindex]
if self.total is not None:
index = sindex + self.minibatch_offset - self.minibatch_size
self.total[index] = winner
示例6: ZeroFiller
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import unmap [as 别名]
class ZeroFiller(ForwardBase, TriviallyDistributable):
"""Fills weights of given unit with zero on every step"""
MAPPING = {"zero_filter"}
def __init__(self, workflow, **kwargs):
super(ZeroFiller, self).__init__(workflow, **kwargs)
self.mask = Array()
self.grouping = kwargs.get("grouping", 1)
self.demand("weights")
def init_unpickled(self):
super(ZeroFiller, self).init_unpickled()
self.sources_["weights_zerofilling"] = {}
@property
def effective_shape(self):
return (self.weights.shape[0],
self.weights.size // self.weights.shape[0])
@property
def grouping(self):
return self._grouping
@grouping.setter
def grouping(self, value):
if not isinstance(value, int):
raise TypeError(
"grouping value must be an integer (got %s)" % type(value))
if value < 2:
raise ValueError("grouping value %d is invalid" % value)
self._grouping = value
def initialize(self, device=None, **kwargs):
super(ZeroFiller, self).initialize(device, **kwargs)
if not self.weights:
return True
if not self.mask:
if self.effective_shape[1] % self.grouping != 0:
raise ValueError(
"Non-multiple of grouping weights shape detected: "
"%s, grouping=%d" %
(self.weights.shape, self.grouping))
self.mask.reset(numpy.zeros(self.effective_shape,
dtype=self.weights.dtype))
self.mask.map_invalidate()
# TODO(a.kazantsev): add check for transposed weights.
for kernel in range(self.effective_shape[0]):
for chan in range(self.effective_shape[1]):
self.mask[kernel, chan] = not (
kernel % self.grouping == chan % self.grouping)
else:
assert self.mask.shape == self.effective_shape
for vec in self.mask, self.weights:
vec.initialize(device)
def _gpu_init(self):
self.build_program(cache_file_name="zero_filling_%d" % self.grouping,
dtype=self.weights.dtype)
self.assign_kernel("multiply_by_mask")
self.set_args(self.mask, self.weights)
def ocl_init(self):
self._gpu_init()
self._global_size = [self.weights.size]
self._local_size = None
def cuda_init(self):
self._gpu_init()
self._global_size = (self.weights.size, 1, 1)
self._local_size = (1, 1, 1)
def numpy_run(self):
self.mask.map_read()
self.weights.map_write()
self.weights.mem *= self.mask.mem
def _gpu_run(self):
self.weights.unmap()
self.mask.unmap()
self.execute_kernel(self._global_size, self._local_size)
def ocl_run(self):
self._gpu_run()
def cuda_run(self):
self._gpu_run()