本文整理匯總了Python中pyopencl.mem_flags方法的典型用法代碼示例。如果您正苦於以下問題:Python pyopencl.mem_flags方法的具體用法?Python pyopencl.mem_flags怎麽用?Python pyopencl.mem_flags使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyopencl
的用法示例。
在下文中一共展示了pyopencl.mem_flags方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: create_internal_buffer
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def create_internal_buffer(self, ctx):
cityxy = [(self.city_info[idx][0], self.city_info[idx][1]) for idx in range(len(self.city_info))]
self.__np_cityxy = numpy.array(cityxy, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__np_iterations = numpy.int32(self.iterations)
self.__np_temperature = numpy.float32(self.temperature)
self.__np_terminate_temperature = numpy.float32(self.terminate_temperature)
self.__np_alpha = numpy.float32(self.alpha)
示例2: __init_cl_member
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __init_cl_member(self):
self.__np_costs = self.sas.get_cost_buffer()
num_of_solution, self.__np_solution = self.sas.get_solution_info()
mf = cl.mem_flags
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(num_of_solution)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_costs = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_costs)
self.__dev_solution = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_solution)
self.sas.create_internal_buffer(self.__ctx)
## To save the annealing state
示例3: __prepare_fitness_args
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __prepare_fitness_args(self):
mf = cl.mem_flags
self.__fitness_args_list = [self.__dev_chromosomes, self.__dev_fitnesses]
self.__extra_fitness_args_list = []
if self.__fitness_args is not None:
## create buffers for fitness arguments
for arg in self.__fitness_args:
cl_buffer = cl.Buffer(self.__ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(arg['v'],
dtype=self.__type_to_numpy_type(arg['t'])))
self.__extra_fitness_args_list.append(cl_buffer)
# concatenate two fitness args list
self.__fitness_args_list = self.__fitness_args_list + self.__extra_fitness_args_list
示例4: __init__
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __init__(self, device=0, platform=0, iterations=7, compiler_options=None):
"""Creates OpenCL device context and reads device properties
:param device: The ID of the OpenCL device to use for benchmarking
:type device: int
:param iterations: The number of iterations to run the kernel during benchmarking, 7 by default.
:type iterations: int
"""
if not cl:
raise ImportError("Error: pyopencl not installed, please install e.g. using 'pip install pyopencl'.")
self.iterations = iterations
#setup context and queue
platforms = cl.get_platforms()
self.ctx = cl.Context(devices=[platforms[platform].get_devices()[device]])
self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
self.mf = cl.mem_flags
#inspect device properties
self.max_threads = self.ctx.devices[0].get_info(cl.device_info.MAX_WORK_GROUP_SIZE)
self.compiler_options = compiler_options or []
#collect environment information
dev = self.ctx.devices[0]
env = dict()
env["platform_name"] = dev.platform.name
env["platform_version"] = dev.platform.version
env["device_name"] = dev.name
env["device_version"] = dev.version
env["opencl_c_version"] = dev.opencl_c_version
env["driver_version"] = dev.driver_version
env["iterations"] = self.iterations
env["compiler_options"] = compiler_options
self.env = env
self.name = dev.name
示例5: create_bytearray
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def create_bytearray(ctx, size):
mf = cl.mem_flags
py_buffer = numpy.zeros(size, dtype=numpy.int32)
cl_buffer = cl.Buffer(ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=py_buffer)
return py_buffer, cl_buffer
示例6: preexecute_kernels
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def preexecute_kernels(self, ctx, queue, population):
## initialize global variables for kernel execution
total_dna_size = population * self.dna_total_length
other_chromosomes = numpy.zeros(total_dna_size, dtype=numpy.int32)
cross_map = numpy.zeros(total_dna_size, dtype=numpy.int32)
ratios = numpy.zeros(population, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_ratios = cl.Buffer(ctx, mf.WRITE_ONLY, ratios.nbytes)
self.__dev_other_chromosomes = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=other_chromosomes)
self.__dev_cross_map = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=cross_map)
示例7: __restore_state
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __restore_state(self, data):
# restore algorithm information
self.__prob_mutation = data['prob_mutation']
self.__prob_crossover = data['prob_crossover']
self.__generation_index = data['generation_idx']
self.__dictStatistics = data['statistics']
self.__generation_time_diff = data['generation_time_diff']
self.__population = data['population']
rnum = data['rnum']
self.__fitnesses = data['fitnesses']
self.__np_chromosomes = data['chromosomes']
self.__best_fitnesses = data['best']
self.__worst_fitnesses = data['worst']
self.__avg = data['avg']
# build CL memory from restored memory
mf = cl.mem_flags
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_chromosomes = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_chromosomes)
self.__dev_fitnesses = cl.Buffer(self.__ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.__fitnesses)
self.__prepare_fitness_args()
self.__sample_chromosome.restore(data, self.__ctx, self.__queue, self.__population)
self._paused = True
# public methods
示例8: restore
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def restore(self, data, ctx, queue, population):
other_chromosomes = data['other_chromosomes']
ratios = data['ratios']
# prepare CL memory
mf = cl.mem_flags
self.__dev_ratios = cl.Buffer(ctx, mf.WRITE_ONLY, ratios.nbytes)
self.__dev_other_chromosomes = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=other_chromosomes)
# Copy data from main memory to GPU memory
cl.enqueue_copy(queue, self.__dev_ratios, ratios)
cl.enqueue_copy(queue, self.__dev_other_chromosomes, other_chromosomes)
示例9: krak
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def krak(bnum,bdata,cdata):
t=[]
ctr=1
for table in mytables:
for pos in range(0,len(bdata)-63):
for color in range(0,8):
sample=bdata[pos:pos+64]
if not cdata:
t.append(int("0b%s"%sample, 2)) # pivot
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(color)
t.append(0x7)
t.append(0x0)
#print("sample %s, color %x, table %i"%(sample,color,table))
else: # hunting for a challenge
if cdata[ctr] != 0: # block was found in table
pivot = revbits(cdata[ctr])
target = int("0b%s"%sample, 2)
t.append(pivot)
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(0x0)
scolor=color|pos<<4|int(bnum)<<12
t.append(scolor)
t.append(target)
#if pos<5:
# print("pos %i, sample %s, color %x, s_color %x, table %i, pivot %x, target %x"%(pos,sample,color,scolor,table,pivot,target))
ctr+=1
a = np.array(t,dtype=np.uint64)
a_dev = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
hostbuf=a)
s = np.uint32(a.shape)
r = np.uint32(5000)
z = np.uint32(0)
# compile the kernel
FILE_NAME="krak.cl"
f=open(FILE_NAME,"r")
SRC = ''.join(f.readlines())
f.close()
prg = cl.Program(ctx, SRC).build()
x = time.time()
# launch the kernel
print("Launching kernel, size %i",a.shape)
event = prg.krak(queue, a.shape, None, a_dev, s)
event.wait()
# copy the output from the context to the Python process
cl.enqueue_copy(queue, a, a_dev)
print("lag=%f"%(time.time()-x))
report(a)
示例10: krak
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def krak(bnum,bdata,cdata):
t=[]
ctr=0
for table in mytables:
for pos in range(0,len(bdata)-63):
for color in range(0,8):
sample=bdata[pos:pos+64]
if not cdata:
t.append(int("0b%s"%sample, 2)) # pivot
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(color)
t.append(0x7)
t.append(0x0)
#print("sample %s, color %x, target %x"%(sample,color,0))
else: # hunting for a challenge
if cdata[ctr] != 0: # block was found in table
pivot = revbits(cdata[ctr])
target = int("0b%s"%sample, 2)
t.append(pivot)
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(0x0)
t.append(color|pos<<4|int(bnum)<<12)
t.append(target)
#print("sample %s, pivot %x, target %x"%(sample,pivot,target))
ctr+=1
a = np.array(t,dtype=np.uint64)
a_dev = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
hostbuf=a)
s = np.uint32(a.shape)
r = np.uint32(5000)
z = np.uint32(0)
# compile the kernel
FILE_NAME="krak.cl"
f=open(FILE_NAME,"r")
SRC = ''.join(f.readlines())
f.close()
prg = cl.Program(ctx, SRC).build()
x = time.time()
# launch the kernel
print("Launching kernel, size %i",a.shape)
event = prg.krak(queue, a.shape, None, a_dev, s)
event.wait()
# copy the output from the context to the Python process
cl.enqueue_copy(queue, a, a_dev)
print("lag=%f"%(time.time()-x))
report(a)
示例11: compute
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def compute(self, video, num_frame):
"""
input:
video: numpy array gray image of type input_dtype [num_frames, h, w] or [num_frames, h, w, 1]
num_frame: frame for which to compute the neighbors
output:
numpy array (video height/step, video width/step, num_neighbors) of indices of nearest neighbors.
[i, j, :] contains the indices of the top left pixel of patches of size patch_data_width * patch_data_width.
The matches have been computed for patches of size patch_search_width * patch_search_width.
If [i, j] is the top left pixel of a given search patch, [i, j] points to the top left of the matched
data patches with same patch center (see comments in the code for more details).
The indices convert to (y, x) by doing indice / width and indice % width.
"""
assert(video.dtype.type == self.input_dtype)
assert(len(video.shape) == 3 or (len(video.shape) == 4 and video.shape[3] == 1))
nf = video.shape[0]
h = video.shape[1]
w = video.shape[2]
assert(num_frame >= self.past_frames and num_frame < (nf-self.future_frames))
assert(nf*h*w <= 4294967295) # patch indice overflow (stored on uint32). Use video extract.
mf = cl.mem_flags
mp = cl.map_flags
video_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=video)
dst_pos_cl = cl.Buffer(self.ctx, mf.WRITE_ONLY, (w // self.step) * (h // self.step) * self.num_neighbors * 4)
global_size = [h//self.step, DIVUP(w, self.wksize - (self.patch_search_width - 1)) * self.wksize]
local_size = [1, self.wksize]
# For naive kernel:
#global_size = [w, h]
#local_size = None
self.compute_nn(self.queue, global_size, local_size, dst_pos_cl, video_cl, w, h, w, w*h, 0, 0, num_frame*w*h, 0)
dst_pos = np.empty([h // self.step, w // self.step, self.num_neighbors], dtype=np.uint32)
cl.enqueue_copy(self.queue, dst_pos, dst_pos_cl)
# At this point, dst_pos[i, j, :] contains the indices (top left of patch_search_width*patch_search_width
# patches) of matches for the patch with top left (i, j).
# We now shift such that the [i, j] points to the top left of the patch_data_width*patch_data_width
# patch centered on the same point than the patch of size patch_search_width*patch_search_width
# This enables to have
# anchor = self.patch_width_nn//2
# nn_patch[y-anchor, x-anchor,:] points to the top left of the patch of size patch_data_width
# * patch_data_width centered in [y, x].
if (self.patch_search_width != self.patch_data_width):
diff_xy = (self.patch_search_width - self.patch_data_width) // 2
index_offset = diff_xy * w + diff_xy
dst_pos = dst_pos + index_offset
return dst_pos
示例12: run
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def run(vector):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
f = open('casting_vector.c' if vector else 'casting.c', 'r')
fstr = ''.join(f.readlines())
f.close()
data_size = 100;
global_size = int(data_size / 4) if vector else data_size
if vector:
struct = 'typedef struct {\n'
code = ' switch(id) {\n'
codeTemp = ' case {0}:\n tsR->data{0} = tsA->data{0} + tsB->data{0};\n break;\n'
for i in range(global_size):
struct += ' float4 data' + str(i) + ';\n'
code += codeTemp.format(i)
struct += '} TypedStruct2;\n'
code += ' }\n'
fstr = fstr.replace('%code_generation%', code);
fstr = '#define GLOBAL_SIZE ' + str(global_size) + '\n' + struct + fstr
else:
fstr = '#define GLOBAL_SIZE ' + str(global_size) + '\n' + fstr;
print('=' * 50)
print(fstr)
print('-' * 50)
a_np = np.random.rand(data_size).astype(np.float32)
b_np = np.random.rand(data_size).astype(np.float32)
mf = cl.mem_flags
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
prg = cl.Program(ctx, fstr).build();
exec_evt = prg.casting_test(queue, (global_size,), None, a_g, b_g, res_g)
exec_evt.wait()
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, res_g).wait()
print(res_np)
elapsed = 1e-9 * (exec_evt.profile.end - exec_evt.profile.start)
print('Vector: {0} => Execution time of test: {1}s'.format(vector, elapsed))
示例13: __prepare_cl_buffers
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __prepare_cl_buffers(self):
mf = cl.mem_flags
# prepare distances buffers
self.__path_distances = numpy.zeros(shape=[self.__node_count, self.__node_count],
dtype=numpy.float32)
self.__dev_path_distances = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__path_distances)
# initialize all pheromones of paths with 1
self.__path_pheromones = numpy.empty(shape=[self.__node_count, self.__node_count],
dtype=numpy.float32)
self.__path_pheromones.fill(1)
self.__dev_path_pheromones = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__path_pheromones)
# prepare buffers for node position: x, y
x = numpy.empty(self.__node_count, dtype=numpy.float32)
y = numpy.empty(self.__node_count, dtype=numpy.float32)
for i in range(self.__node_count):
x[i] = self.__nodes[i][0]
y[i] = self.__nodes[i][1]
self.__dev_x = cl.Buffer(self.__ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=x)
self.__dev_y = cl.Buffer(self.__ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=y)
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(self.__ants)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
# we should prepare buffer memory for each ant on each node.
buffer_size = 4 * self.__node_count * self.__ants
# the visited_nodes is used for storing the path of an ant.
self.__dev_visited_nodes = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# the path_probabilities is used for choosing next node
self.__dev_path_probabilities = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# the tmp pheromones is used for calcuating probabilities for next node
self.__dev_tmp_pheromones = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# this is for keepting fitness value of each ant at a single round.
self.__dev_ant_fitnesses = cl.Buffer(self.__ctx, mf.READ_WRITE, 4 * self.__ants)
示例14: __preexecute_kernels
# 需要導入模塊: import pyopencl [as 別名]
# 或者: from pyopencl import mem_flags [as 別名]
def __preexecute_kernels(self):
total_dna_size = self.__population * self.__sample_chromosome.dna_total_length
self.__fitnesses = numpy.zeros(self.__population, dtype=numpy.float32)
self.__np_chromosomes = numpy.zeros(total_dna_size, dtype=numpy.int32)
mf = cl.mem_flags
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(self.__population)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_chromosomes = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_chromosomes)
self.__dev_fitnesses = cl.Buffer(self.__ctx, mf.WRITE_ONLY, self.__fitnesses.nbytes)
self.__prepare_fitness_args()
if self.__is_elitism_mode:
self.__elites_updated = False
self.__current_elites = numpy.zeros(self.__sample_chromosome.dna_total_length * self.__elitism_top,
dtype=numpy.int32)
self.__dev_current_elites = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__current_elites)
self.__updated_elites = numpy.zeros(self.__sample_chromosome.dna_total_length * self.__elitism_top,
dtype=numpy.int32)
self.__dev_updated_elites = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__updated_elites)
self.__updated_elite_fitnesses = numpy.zeros(self.__elitism_top,
dtype=numpy.float32)
self.__dev_updated_elite_fitnesses = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__updated_elite_fitnesses)
# For statistics
self.__dev_best_indices = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__best_indices)
self.__dev_worst_indices = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__worst_indices)
cl.enqueue_copy(self.__queue, self.__dev_fitnesses, self.__fitnesses)
## call preexecute_kernels for internal data structure preparation
self.__sample_chromosome.preexecute_kernels(self.__ctx, self.__queue, self.__population)
## dump information on kernel resources usage
self.__dump_kernel_info(self.__prg, self.__ctx, self.__sample_chromosome)
## Populate the first generation.