1//---------------------------------------------------------------------------//
2// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
3//
4// Distributed under the Boost Software License, Version 1.0
5// See accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt
7//
8// See http://boostorg.github.com/compute for more information.
9//---------------------------------------------------------------------------//
10
11#include <iostream>
12
13#include <boost/compute/command_queue.hpp>
14#include <boost/compute/kernel.hpp>
15#include <boost/compute/program.hpp>
16#include <boost/compute/system.hpp>
17#include <boost/compute/algorithm/copy.hpp>
18#include <boost/compute/container/vector.hpp>
19#include <boost/compute/utility/source.hpp>
20
21namespace compute = boost::compute;
22
23// this example shows how to use the static c++ kernel language
24// extension (currently only supported by AMD) to compile and
25// execute a templated c++ kernel.
26// Using platform vendor info to decide if this is AMD platform
27int main()
28{
29 // get default device and setup context
30 compute::device device = compute::system::default_device();
31 compute::context context(device);
32 compute::command_queue queue(context, device);
33
34 // check the platform vendor string
35 if(device.platform().vendor() != "Advanced Micro Devices, Inc."){
36 std::cerr << "error: static C++ kernel language is only "
37 << "supported on AMD devices."
38 << std::endl;
39 return 0;
40 }
41
42 // create input int values and copy them to the device
43 int int_data[] = { 1, 2, 3, 4};
44 compute::vector<int> int_vector(int_data, int_data + 4, queue);
45
46 // create input float values and copy them to the device
47 float float_data[] = { 2.0f, 4.0f, 6.0f, 8.0f };
48 compute::vector<float> float_vector(float_data, float_data + 4, queue);
49
50 // create kernel source with a templated function and templated kernel
51 const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
52 // define our templated function which returns the square of its input
53 template<typename T>
54 inline T square(const T x)
55 {
56 return x * x;
57 }
58
59 // define our templated kernel which calls square on each value in data
60 template<typename T>
61 __kernel void square_kernel(__global T *data)
62 {
63 const uint i = get_global_id(0);
64 data[i] = square(data[i]);
65 }
66
67 // explicitly instantiate the square kernel for int's. this allows
68 // for it to be called from the host with the given mangled name.
69 template __attribute__((mangled_name(square_kernel_int)))
70 __kernel void square_kernel(__global int *data);
71
72 // also instantiate the square kernel for float's.
73 template __attribute__((mangled_name(square_kernel_float)))
74 __kernel void square_kernel(__global float *data);
75 );
76
77 // build the program. must enable the c++ static kernel language
78 // by passing the "-x clc++" compile option.
79 compute::program square_program =
80 compute::program::build_with_source(source, context, "-x clc++");
81
82 // create the square kernel for int's by using its mangled name declared
83 // in the explicit template instantiation.
84 compute::kernel square_int_kernel(square_program, "square_kernel_int");
85 square_int_kernel.set_arg(0, int_vector);
86
87 // execute the square int kernel
88 queue.enqueue_1d_range_kernel(kernel: square_int_kernel, global_work_offset: 0, global_work_size: int_vector.size(), local_work_size: 4);
89
90 // print out the squared int values
91 std::cout << "int's: ";
92 compute::copy(
93 int_vector.begin(), int_vector.end(),
94 std::ostream_iterator<int>(std::cout, " "),
95 queue
96 );
97 std::cout << std::endl;
98
99 // now create the square kernel for float's
100 compute::kernel square_float_kernel(square_program, "square_kernel_float");
101 square_float_kernel.set_arg(0, float_vector);
102
103 // execute the square int kernel
104 queue.enqueue_1d_range_kernel(kernel: square_float_kernel, global_work_offset: 0, global_work_size: float_vector.size(), local_work_size: 4);
105
106 // print out the squared float values
107 std::cout << "float's: ";
108 compute::copy(
109 float_vector.begin(), float_vector.end(),
110 std::ostream_iterator<float>(std::cout, " "),
111 queue
112 );
113 std::cout << std::endl;
114
115 return 0;
116}
117

source code of boost/libs/compute/example/amd_cpp_kernel.cpp