amd_cpp_kernel.cpp source code [boost/libs/compute/example/amd_cpp_kernel.cpp]

1	//---------------------------------------------------------------------------//
2	// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
3	//
4	// Distributed under the Boost Software License, Version 1.0
5	// See accompanying file LICENSE_1_0.txt or copy at
6	// http://www.boost.org/LICENSE_1_0.txt
7	//
8	// See http://boostorg.github.com/compute for more information.
9	//---------------------------------------------------------------------------//
10
11	#include <iostream>
12
13	#include <boost/compute/command_queue.hpp>
14	#include <boost/compute/kernel.hpp>
15	#include <boost/compute/program.hpp>
16	#include <boost/compute/system.hpp>
17	#include <boost/compute/algorithm/copy.hpp>
18	#include <boost/compute/container/vector.hpp>
19	#include <boost/compute/utility/source.hpp>
20
21	namespace compute = boost::compute;
22
23	// this example shows how to use the static c++ kernel language
24	// extension (currently only supported by AMD) to compile and
25	// execute a templated c++ kernel.
26	// Using platform vendor info to decide if this is AMD platform
27	int main()
28	{
29	// get default device and setup context
30	compute::device device = compute::system::default_device();
31	compute::context context(device);
32	compute::command_queue queue(context, device);
33
34	// check the platform vendor string
35	if(device.platform().vendor() != "Advanced Micro Devices, Inc."){
36	std::cerr << "error: static C++ kernel language is only "
37	<< "supported on AMD devices."
38	<< std::endl;
39	return `0`;
40	}
41
42	// create input int values and copy them to the device
43	int int_data[] = { `1`, `2`, `3`, `4`};
44	compute::vector<int> int_vector(int_data, int_data + `4`, queue);
45
46	// create input float values and copy them to the device
47	float float_data[] = { `2.0f`, `4.0f`, `6.0f`, `8.0f` };
48	compute::vector<float> float_vector(float_data, float_data + `4`, queue);
49
50	// create kernel source with a templated function and templated kernel
51	const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
52	// define our templated function which returns the square of its input
53	template<typename T>
54	inline T square(const T x)
55	{
56	return x * x;
57	}
58
59	// define our templated kernel which calls square on each value in data
60	template<typename T>
61	__kernel void square_kernel(__global T *data)
62	{
63	const uint i = get_global_id(`0`);
64	data[i] = square(data[i]);
65	}
66
67	// explicitly instantiate the square kernel for int's. this allows
68	// for it to be called from the host with the given mangled name.
69	template __attribute__((mangled_name(square_kernel_int)))
70	__kernel void square_kernel(__global int *data);
71
72	// also instantiate the square kernel for float's.
73	template __attribute__((mangled_name(square_kernel_float)))
74	__kernel void square_kernel(__global float *data);
75	);
76
77	// build the program. must enable the c++ static kernel language
78	// by passing the "-x clc++" compile option.
79	compute::program square_program =
80	compute::program::build_with_source(source, context, "-x clc++");
81
82	// create the square kernel for int's by using its mangled name declared
83	// in the explicit template instantiation.
84	compute::kernel square_int_kernel(square_program, "square_kernel_int");
85	square_int_kernel.set_arg(`0`, int_vector);
86
87	// execute the square int kernel
88	queue.enqueue_1d_range_kernel(kernel: square_int_kernel, global_work_offset: `0`, global_work_size: int_vector.size(), local_work_size: `4`);
89
90	// print out the squared int values
91	std::cout << "int's: ";
92	compute::copy(
93	int_vector.begin(), int_vector.end(),
94	std::ostream_iterator<int>(std::cout, " "),
95	queue
96	);
97	std::cout << std::endl;
98
99	// now create the square kernel for float's
100	compute::kernel square_float_kernel(square_program, "square_kernel_float");
101	square_float_kernel.set_arg(`0`, float_vector);
102
103	// execute the square int kernel
104	queue.enqueue_1d_range_kernel(kernel: square_float_kernel, global_work_offset: `0`, global_work_size: float_vector.size(), local_work_size: `4`);
105
106	// print out the squared float values
107	std::cout << "float's: ";
108	compute::copy(
109	float_vector.begin(), float_vector.end(),
110	std::ostream_iterator<float>(std::cout, " "),
111	queue
112	);
113	std::cout << std::endl;
114
115	return `0`;
116	}
117

source code of boost/libs/compute/example/amd_cpp_kernel.cpp