当前位置: 首页>>代码示例>>C++>>正文


C++ command_queue::enqueue_task方法代码示例

本文整理汇总了C++中command_queue::enqueue_task方法的典型用法代码示例。如果您正苦于以下问题:C++ command_queue::enqueue_task方法的具体用法?C++ command_queue::enqueue_task怎么用?C++ command_queue::enqueue_task使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在command_queue的用法示例。


在下文中一共展示了command_queue::enqueue_task方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: generate_state

 void generate_state(command_queue &queue)
 {
     kernel generate_state_kernel =
         m_program.create_kernel("generate_state");
     generate_state_kernel.set_arg(0, m_state_buffer);
     queue.enqueue_task(generate_state_kernel);
 }
开发者ID:sajis997,项目名称:compute,代码行数:7,代码来源:mersenne_twister.hpp

示例2: if

inline void sort2(const buffer &buffer, command_queue &queue)
{
    const context &context = queue.get_context();

    boost::shared_ptr<detail::program_cache> cache =
        detail::get_program_cache(context);
    std::string cache_key =
        std::string("fixed_sort2_") + type_name<T>();

    program sort2_program = cache->get(cache_key);
    if(!sort2_program.get()){
        const char source[] =
            "__kernel void sort2(__global T *input)\n"
            "{\n"
            "    const T x = input[0];\n"
            "    const T y = input[1];\n"
            "    if(y < x){\n"
            "        input[0] = y;\n"
            "        input[1] = x;\n"
            "    }\n"
            "}\n";

        sort2_program = program::build_with_source(
                source, context, std::string("-DT=") + type_name<T>()
                );

        cache->insert(cache_key, sort2_program);
    }

    kernel sort2_kernel = sort2_program.create_kernel("sort2");
    sort2_kernel.set_arg(0, buffer);
    queue.enqueue_task(sort2_kernel);
}
开发者ID:BeauJoh,项目名称:compute,代码行数:33,代码来源:fixed_sort.hpp

示例3: serial_reduce

inline void serial_reduce(InputIterator first,
                          InputIterator last,
                          OutputIterator result,
                          BinaryFunction function,
                          command_queue &queue)
{
    typedef typename
        std::iterator_traits<InputIterator>::value_type T;
    typedef typename
        ::boost::compute::result_of<BinaryFunction(T, T)>::type result_type;

    const context &context = queue.get_context();
    size_t count = detail::iterator_range_size(first, last);
    if(count == 0){
        return;
    }

    meta_kernel k("serial_reduce");
    size_t count_arg = k.add_arg<cl_uint>("count");

    k <<
        k.decl<result_type>("result") << " = " << first[0] << ";\n" <<
        "for(uint i = 1; i < count; i++)\n" <<
        "    result = " << function(k.var<T>("result"),
                                    first[k.var<uint_>("i")]) << ";\n" <<
        result[0] << " = result;\n";

    kernel kernel = k.compile(context);

    kernel.set_arg(count_arg, static_cast<uint_>(count));

    queue.enqueue_task(kernel);
}
开发者ID:ASMlover,项目名称:study,代码行数:33,代码来源:serial_reduce.hpp

示例4: generate_multiplicands

    /// \internal_
    /// Generates the multiplicands for each thread
    void generate_multiplicands(command_queue &queue)
    {
        kernel multiplicand_kernel =
            m_program.create_kernel("multiplicand");
        multiplicand_kernel.set_arg(0, m_multiplicands);

        queue.enqueue_task(multiplicand_kernel);
    }
开发者ID:huiyongsun,项目名称:compute,代码行数:10,代码来源:linear_congruential_engine.hpp

示例5: serial_insertion_sort_by_key

inline void serial_insertion_sort_by_key(KeyIterator keys_first,
                                         KeyIterator keys_last,
                                         ValueIterator values_first,
                                         Compare compare,
                                         command_queue &queue)
{
    typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
    typedef typename std::iterator_traits<ValueIterator>::value_type value_type;

    size_t count = iterator_range_size(keys_first, keys_last);
    if(count < 2){
        return;
    }

    meta_kernel k("serial_insertion_sort_by_key");
    size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "keys");
    size_t local_data_arg = k.add_arg<value_type *>(memory_object::local_memory, "data");
    size_t count_arg = k.add_arg<uint_>("n");

    k <<
        // copy data to local memory
        "for(uint i = 0; i < n; i++){\n" <<
        "    keys[i] = " << keys_first[k.var<uint_>("i")] << ";\n"
        "    data[i] = " << values_first[k.var<uint_>("i")] << ";\n"
        "}\n"

        // sort data in local memory
        "for(uint i = 1; i < n; i++){\n" <<
        "    " << k.decl<const key_type>("key") << " = keys[i];\n" <<
        "    " << k.decl<const value_type>("value") << " = data[i];\n" <<
        "    uint pos = i;\n" <<
        "    while(pos > 0 && " <<
                   compare(k.var<const key_type>("key"),
                           k.var<const key_type>("keys[pos-1]")) << "){\n" <<
        "        keys[pos] = keys[pos-1];\n" <<
        "        data[pos] = data[pos-1];\n" <<
        "        pos--;\n" <<
        "    }\n" <<
        "    keys[pos] = key;\n" <<
        "    data[pos] = value;\n" <<
        "}\n" <<

        // copy sorted data to output
        "for(uint i = 0; i < n; i++){\n" <<
        "    " << keys_first[k.var<uint_>("i")] << " = keys[i];\n"
        "    " << values_first[k.var<uint_>("i")] << " = data[i];\n"
        "}\n";

    const context &context = queue.get_context();
    ::boost::compute::kernel kernel = k.compile(context);
    kernel.set_arg(local_keys_arg, static_cast<uint_>(count * sizeof(key_type)), 0);
    kernel.set_arg(local_data_arg, static_cast<uint_>(count * sizeof(value_type)), 0);
    kernel.set_arg(count_arg, static_cast<uint_>(count));

    queue.enqueue_task(kernel);
}
开发者ID:junmuz,项目名称:compute,代码行数:56,代码来源:insertion_sort.hpp

示例6: seed

    /// Seeds the random number generator with \p value.
    void seed(command_queue &queue, result_type value = default_seed)
    {
        kernel seed_kernel = m_program.create_kernel("seed");
        seed_kernel.set_arg(0, value);
        seed_kernel.set_arg(1, m_state_buffer);

        queue.enqueue_task(seed_kernel);

        m_state_index = 0;
    }
开发者ID:huiyongsun,项目名称:compute,代码行数:11,代码来源:mersenne_twister_engine.hpp

示例7: serial_insertion_sort

inline void serial_insertion_sort(Iterator first,
                                  Iterator last,
                                  Compare compare,
                                  command_queue &queue)
{
    typedef typename std::iterator_traits<Iterator>::value_type T;

    size_t count = iterator_range_size(first, last);
    if(count < 2){
        return;
    }

    meta_kernel k("serial_insertion_sort");
    size_t local_data_arg = k.add_arg<T *>(memory_object::local_memory, "data");
    size_t count_arg = k.add_arg<uint_>("n");

    k <<
        // copy data to local memory
        "for(uint i = 0; i < n; i++){\n" <<
        "    data[i] = " << first[k.var<uint_>("i")] << ";\n"
        "}\n"

        // sort data in local memory
        "for(uint i = 1; i < n; i++){\n" <<
        "    " << k.decl<const T>("value") << " = data[i];\n" <<
        "    uint pos = i;\n" <<
        "    while(pos > 0 && " <<
                   compare(k.var<const T>("value"),
                           k.var<const T>("data[pos-1]")) << "){\n" <<
        "        data[pos] = data[pos-1];\n" <<
        "        pos--;\n" <<
        "    }\n" <<
        "    data[pos] = value;\n" <<
        "}\n" <<

        // copy sorted data to output
        "for(uint i = 0; i < n; i++){\n" <<
        "    " << first[k.var<uint_>("i")] << " = data[i];\n"
        "}\n";

    const context &context = queue.get_context();
    ::boost::compute::kernel kernel = k.compile(context);
    kernel.set_arg(local_data_arg, local_buffer<T>(count));
    kernel.set_arg(count_arg, static_cast<uint_>(count));

    queue.enqueue_task(kernel);
}
开发者ID:junmuz,项目名称:compute,代码行数:47,代码来源:insertion_sort.hpp

示例8: serial_count_if

inline size_t serial_count_if(InputIterator first,
                              InputIterator last,
                              Predicate predicate,
                              command_queue &queue)
{
    typedef typename std::iterator_traits<InputIterator>::value_type value_type;
    typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;

    const context &context = queue.get_context();
    size_t size = iterator_range_size(first, last);

    meta_kernel k("serial_count_if");
    k.add_arg<const uint_>("size", size);
    size_t result_arg = k.add_arg<uint_ *>("__global", "result");

    k <<
        "uint count = 0;\n" <<
        "for(uint i = 0; i < size; i++){\n" <<
            k.decl<const value_type>("value") << "="
                << first[k.var<uint_>("i")] << ";\n" <<
            "if(" << predicate(k.var<const value_type>("value")) << "){\n" <<
                "count++;\n" <<
            "}\n"
        "}\n"
        "*result = count;\n";

    kernel kernel = k.compile(context);

    // setup result buffer
    buffer result_buffer(context, sizeof(uint_));
    kernel.set_arg(result_arg, result_buffer);

    // run kernel
    queue.enqueue_task(kernel);

    // read index
    return detail::read_single_value<uint_>(result_buffer, queue);
}
开发者ID:d-meiser,项目名称:compute,代码行数:38,代码来源:serial_count_if.hpp

示例9: serial_reduce

inline T serial_reduce(InputIterator first,
                       InputIterator last,
                       T init,
                       BinaryFunction function,
                       command_queue &queue)
{
    size_t count = detail::iterator_range_size(first, last);
    if(count == 0){
        return init;
    }

    const context &context = queue.get_context();

    meta_kernel k("serial_reduce");
    size_t init_arg = k.add_arg<T>("init");
    size_t count_arg = k.add_arg<cl_uint>("count");
    size_t output_arg = k.add_arg<T *>("__global", "output");

    k <<
        k.decl<T>("result") << " = init;\n" <<
        "for(uint i = 0; i < count; i++)\n" <<
        "    result = " << function(k.var<T>("result"),
                                    first[k.var<cl_uint>("i")]) << ";\n" <<
        "*output = result;\n";

    kernel kernel = k.compile(context);

    scalar<T> output(context);

    kernel.set_arg(init_arg, init);
    kernel.set_arg(count_arg, static_cast<cl_uint>(count));
    kernel.set_arg(output_arg, output.get_buffer());

    queue.enqueue_task(kernel);

    return output.read(queue);
}
开发者ID:jayavanth,项目名称:compute,代码行数:37,代码来源:serial_reduce.hpp

示例10: serial_count_if

inline size_t serial_count_if(InputIterator first,
                              InputIterator last,
                              Predicate predicate,
                              command_queue &queue)
{
    typedef typename std::iterator_traits<InputIterator>::value_type value_type;

    const context &context = queue.get_context();
    size_t size = iterator_range_size(first, last);

    meta_kernel k("serial_count_if");
    k.add_set_arg("size", static_cast<uint_>(size));
    size_t result_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result");

    k <<
        "uint count = 0;\n" <<
        "for(uint i = 0; i < size; i++){\n" <<
            k.decl<const value_type>("value") << "="
                << first[k.var<uint_>("i")] << ";\n" <<
            "if(" << predicate(k.var<const value_type>("value")) << "){\n" <<
                "count++;\n" <<
            "}\n"
        "}\n"
        "*result = count;\n";

    kernel kernel = k.compile(context);

    // setup result buffer
    scalar<uint_> result(context);
    kernel.set_arg(result_arg, result.get_buffer());

    // run kernel
    queue.enqueue_task(kernel);

    // read index
    return result.read(queue);
}
开发者ID:Mageswaran1989,项目名称:compute,代码行数:37,代码来源:serial_count_if.hpp

示例11: reduce

size_t reduce(InputIterator first,
              size_t count,
              OutputIterator result,
              size_t block_size,
              BinaryFunction function,
              command_queue &queue)
{
    typedef typename
        std::iterator_traits<InputIterator>::value_type
        input_type;
    typedef typename
        boost::compute::result_of<BinaryFunction(input_type, input_type)>::type
        result_type;

    const context &context = queue.get_context();
    size_t block_count = count / 2 / block_size;
    size_t total_block_count =
        static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size)));

    if(block_count != 0){
        meta_kernel k("block_reduce");
        size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output");
        size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block");

        k <<
            "const uint gid = get_global_id(0);\n" <<
            "const uint lid = get_local_id(0);\n" <<

            // copy values to local memory
            "block[lid] = " <<
                function(first[k.make_var<uint_>("gid*2+0")],
                         first[k.make_var<uint_>("gid*2+1")]) << ";\n" <<

            // perform reduction
            "for(uint i = 1; i < " << uint_(block_size) << "; i <<= 1){\n" <<
            "    barrier(CLK_LOCAL_MEM_FENCE);\n" <<
            "    uint mask = (i << 1) - 1;\n" <<
            "    if((lid & mask) == 0){\n" <<
            "        block[lid] = " <<
                         function(k.expr<input_type>("block[lid]"),
                                  k.expr<input_type>("block[lid+i]")) << ";\n" <<
            "    }\n" <<
            "}\n" <<

            // write block result to global output
            "if(lid == 0)\n" <<
            "    output[get_group_id(0)] = block[0];\n";

        kernel kernel = k.compile(context);
        kernel.set_arg(output_arg, result.get_buffer());
        kernel.set_arg(block_arg, local_buffer<input_type>(block_size));

        queue.enqueue_1d_range_kernel(kernel,
                                      0,
                                      block_count * block_size,
                                      block_size);
    }

    // serially reduce any leftovers
    if(block_count * block_size * 2 < count){
        size_t last_block_start = block_count * block_size * 2;

        meta_kernel k("extra_serial_reduce");
        size_t count_arg = k.add_arg<uint_>("count");
        size_t offset_arg = k.add_arg<uint_>("offset");
        size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output");
        size_t output_offset_arg = k.add_arg<uint_>("output_offset");

        k <<
            k.decl<result_type>("result") << " = \n" <<
                first[k.expr<uint_>("offset")] << ";\n" <<
            "for(uint i = offset + 1; i < count; i++)\n" <<
            "    result = " <<
                     function(k.var<result_type>("result"),
                              first[k.var<uint_>("i")]) << ";\n" <<
            "output[output_offset] = result;\n";

        kernel kernel = k.compile(context);
        kernel.set_arg(count_arg, static_cast<uint_>(count));
        kernel.set_arg(offset_arg, static_cast<uint_>(last_block_start));
        kernel.set_arg(output_arg, result.get_buffer());
        kernel.set_arg(output_offset_arg, static_cast<uint_>(block_count));

        queue.enqueue_task(kernel);
    }

    return total_block_count;
}
开发者ID:junmuz,项目名称:compute,代码行数:88,代码来源:reduce.hpp

示例12: serial_reduce_by_key

inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
                                   InputKeyIterator keys_last,
                                   InputValueIterator values_first,
                                   OutputKeyIterator keys_result,
                                   OutputValueIterator values_result,
                                   BinaryFunction function,
                                   BinaryPredicate predicate,
                                   command_queue &queue)
{
    typedef typename
        std::iterator_traits<InputValueIterator>::value_type value_type;
    typedef typename
        std::iterator_traits<InputKeyIterator>::value_type key_type;
    typedef typename
        ::boost::compute::result_of<BinaryFunction(value_type, value_type)>::type result_type;

    const context &context = queue.get_context();
    size_t count = detail::iterator_range_size(keys_first, keys_last);
    if(count < 1){
        return count;
    }

    meta_kernel k("serial_reduce_by_key");
    size_t count_arg = k.add_arg<uint_>("count");
    size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory,
                                                "result_size");

    k <<
        k.decl<result_type>("result") <<
            " = " << values_first[0] << ";\n" <<
        k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" <<
        k.decl<result_type>("value") << ";\n" <<
        k.decl<key_type>("key") << ";\n" <<

        k.decl<uint_>("size") << " = 1;\n" <<

        keys_result[0] << " = previous_key;\n" <<
        values_result[0] << " = result;\n" <<

        "for(ulong i = 1; i < count; i++) {\n" <<
        "    value = " << values_first[k.var<uint_>("i")] << ";\n" <<
        "    key = " << keys_first[k.var<uint_>("i")] << ";\n" <<
        "    if (" << predicate(k.var<key_type>("previous_key"),
                                k.var<key_type>("key")) << ") {\n" <<

        "        result = " << function(k.var<result_type>("result"),
                                        k.var<result_type>("value")) << ";\n" <<
        "    }\n " <<
        "    else { \n" <<
                 keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" <<
                 values_result[k.var<uint_>("size - 1")] << " = result;\n" <<
        "        result = value;\n" <<
        "        size++;\n" <<
        "    } \n" <<
        "    previous_key = key;\n" <<
        "}\n" <<
        keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" <<
        values_result[k.var<uint_>("size - 1")] << " = result;\n" <<
        "*result_size = size;";

    kernel kernel = k.compile(context);

    scalar<uint_> result_size(context);
    kernel.set_arg(result_size_arg, result_size.get_buffer());
    kernel.set_arg(count_arg, static_cast<uint_>(count));

    queue.enqueue_task(kernel);

    return static_cast<size_t>(result_size.read(queue));
}
开发者ID:AbhinavJain13,项目名称:turicreate,代码行数:70,代码来源:serial_reduce_by_key.hpp

示例13: radix_sort_impl


//.........这里部分代码省略.........
    uint_ block_count = static_cast<uint_>(count / block_size);
    if(block_count * block_size != count){
        block_count++;
    }

    // setup temporary buffers
    vector<value_type> output(count, context);
    vector<T2> values_output(sort_by_key ? count : 0, context);
    vector<uint_> offsets(k2, context);
    vector<uint_> counts(block_count * k2, context);

    const buffer *input_buffer = &first.get_buffer();
    uint_ input_offset = static_cast<uint_>(first.get_index());
    const buffer *output_buffer = &output.get_buffer();
    uint_ output_offset = 0;
    const buffer *values_input_buffer = &values_first.get_buffer();
    uint_ values_input_offset = static_cast<uint_>(values_first.get_index());
    const buffer *values_output_buffer = &values_output.get_buffer();
    uint_ values_output_offset = 0;

    for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){
        // write counts
        count_kernel.set_arg(0, *input_buffer);
        count_kernel.set_arg(1, input_offset);
        count_kernel.set_arg(2, static_cast<uint_>(count));
        count_kernel.set_arg(3, counts);
        count_kernel.set_arg(4, offsets);
        count_kernel.set_arg(5, block_size * sizeof(uint_), 0);
        count_kernel.set_arg(6, i * k);
        queue.enqueue_1d_range_kernel(count_kernel,
                                      0,
                                      block_count * block_size,
                                      block_size);

        // scan counts
        if(k == 1){
            typedef uint2_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 2),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else if(k == 2){
            typedef uint4_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 4),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else if(k == 4){
            typedef uint16_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 16),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else {
            BOOST_ASSERT(false && "unknown k");
            break;
        }

        // scan global offsets
        scan_kernel.set_arg(0, counts);
        scan_kernel.set_arg(1, offsets);
        scan_kernel.set_arg(2, block_count);
        queue.enqueue_task(scan_kernel);

        // scatter values
        scatter_kernel.set_arg(0, *input_buffer);
        scatter_kernel.set_arg(1, input_offset);
        scatter_kernel.set_arg(2, static_cast<uint_>(count));
        scatter_kernel.set_arg(3, i * k);
        scatter_kernel.set_arg(4, counts);
        scatter_kernel.set_arg(5, offsets);
        scatter_kernel.set_arg(6, *output_buffer);
        scatter_kernel.set_arg(7, output_offset);
        if(sort_by_key){
            scatter_kernel.set_arg(8, *values_input_buffer);
            scatter_kernel.set_arg(9, values_input_offset);
            scatter_kernel.set_arg(10, *values_output_buffer);
            scatter_kernel.set_arg(11, values_output_offset);
        }
        queue.enqueue_1d_range_kernel(scatter_kernel,
                                      0,
                                      block_count * block_size,
                                      block_size);

        // swap buffers
        std::swap(input_buffer, output_buffer);
        std::swap(values_input_buffer, values_output_buffer);
        std::swap(input_offset, output_offset);
        std::swap(values_input_offset, values_output_offset);
    }
}
开发者ID:2bbb,项目名称:compute,代码行数:101,代码来源:radix_sort.hpp

示例14: radix_sort


//.........这里部分代码省略.........
    }

    // setup kernels
    program radix_sort_program =
        program::create_with_source(radix_sort_source, context);
    std::stringstream options;
    options << "-DK=" << k;
    options << " -DT=" << type_name<sort_type>();
    options << " -DBLOCK_SIZE=" << block_size;

    if(boost::is_floating_point<value_type>::value){
        options << " -DIS_FLOATING_POINT";
    }

    if(boost::is_signed<value_type>::value){
        options << " -DIS_SIGNED";
    }

    radix_sort_program.build(options.str());

    kernel count_kernel(radix_sort_program, "count");
    kernel scan_kernel(radix_sort_program, "scan");
    kernel scatter_kernel(radix_sort_program, "scatter");

    // setup temporary buffers
    vector<value_type> output(count, context);
    vector<uint_> offsets(k2, context);
    vector<uint_> counts(block_count * k2, context);

    const buffer *input_buffer = &first.get_buffer();
    const buffer *output_buffer = &output.get_buffer();

    for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){
        // write counts
        count_kernel.set_arg(0, *input_buffer);
        count_kernel.set_arg(1, static_cast<uint_>(count));
        count_kernel.set_arg(2, counts.get_buffer());
        count_kernel.set_arg(3, offsets.get_buffer());
        count_kernel.set_arg(4, block_size * sizeof(uint_), 0);
        count_kernel.set_arg(5, i * k);
        queue.enqueue_1d_range_kernel(count_kernel,
                                      0,
                                      block_count * block_size,
                                      block_size);

        // scan counts
        if(k == 1){
            typedef uint2_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 2),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else if(k == 2){
            typedef uint4_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 4),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else if(k == 4){
            typedef uint16_ counter_type;
            ::boost::compute::exclusive_scan(
                make_buffer_iterator<counter_type>(counts.get_buffer(), 0),
                make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 16),
                make_buffer_iterator<counter_type>(counts.get_buffer()),
                queue
            );
        }
        else {
            BOOST_ASSERT(false && "unknown k");
            break;
        }

        // scan global offsets
        scan_kernel.set_arg(0, counts.get_buffer());
        scan_kernel.set_arg(1, offsets.get_buffer());
        scan_kernel.set_arg(2, block_count);
        queue.enqueue_task(scan_kernel);

        // scatter values
        scatter_kernel.set_arg(0, *input_buffer);
        scatter_kernel.set_arg(1, static_cast<uint_>(count));
        scatter_kernel.set_arg(2, i * k);
        scatter_kernel.set_arg(3, counts.get_buffer());
        scatter_kernel.set_arg(4, offsets.get_buffer());
        scatter_kernel.set_arg(5, *output_buffer);
        queue.enqueue_1d_range_kernel(scatter_kernel,
                                      0,
                                      block_count * block_size,
                                      block_size);

        // swap buffers
        std::swap(input_buffer, output_buffer);
    }
}
开发者ID:d-meiser,项目名称:compute,代码行数:101,代码来源:radix_sort.hpp


注:本文中的command_queue::enqueue_task方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。