1 | //! Direct, unsafe bindings for Linux [`perf_event_open`][man] and friends. |
2 | //! |
3 | //! Linux's `perf_event_open` system call provides access to the processor's |
4 | //! performance measurement counters (things like instructions retired, cache |
5 | //! misses, and so on), kernel counters (context switches, page faults), and |
6 | //! many other sources of performance information. |
7 | //! |
8 | //! You can't get the `perf_event_open` function from the `libc` crate, as you |
9 | //! would any other system call. The Linux standard C library does not provide a |
10 | //! binding for this function or its associated types and constants. |
11 | //! |
12 | //! Rust analogs to the C types and constants from `<linux/perf_event.h>` and |
13 | //! `<linux/hw_breakpoint.h>`, generated with `bindgen`, are available in the |
14 | //! [`bindings`] module. |
15 | //! |
16 | //! There are several ioctls for use with `perf_event_open` file descriptors; |
17 | //! see the [`ioctls`] module for those. |
18 | //! |
19 | //! For a safe and convenient interface to this functionality, see the |
20 | //! [`perf_event`] crate. |
21 | //! |
22 | //! ## Using the raw API |
23 | //! |
24 | //! As the kernel interface evolves, the struct and union types from the |
25 | //! [`bindings`] module may acquire new fields. To ensure that your code will |
26 | //! continue to compile against newer versions of this crate, you should |
27 | //! construct values of these types by calling their `Default` implementations, |
28 | //! which return zero-filled values, and then assigning to the fields you care |
29 | //! about. For example: |
30 | //! |
31 | //! ``` |
32 | //! use perf_event_open_sys as sys; |
33 | //! |
34 | //! // Construct a zero-filled `perf_event_attr`. |
35 | //! let mut attrs = sys::bindings::perf_event_attr::default(); |
36 | //! |
37 | //! // Populate the fields we need. |
38 | //! attrs.size = std::mem::size_of::<sys::bindings::perf_event_attr>() as u32; |
39 | //! attrs.type_ = sys::bindings::perf_type_id_PERF_TYPE_HARDWARE; |
40 | //! attrs.config = sys::bindings::perf_hw_id_PERF_COUNT_HW_INSTRUCTIONS as u64; |
41 | //! attrs.set_disabled(1); |
42 | //! attrs.set_exclude_kernel(1); |
43 | //! attrs.set_exclude_hv(1); |
44 | //! |
45 | //! // Make the system call. |
46 | //! let result = unsafe { |
47 | //! sys::perf_event_open(&mut attrs, 0, -1, -1, 0) |
48 | //! }; |
49 | //! |
50 | //! if result < 0 { |
51 | //! // ... handle error |
52 | //! } |
53 | //! |
54 | //! // ... use `result` as a raw file descriptor |
55 | //! ``` |
56 | //! |
57 | //! It is not necessary to adjust `size` to what the running kernel expects: |
58 | //! older kernels can accept newer `perf_event_attr` structs, and vice versa. As |
59 | //! long as the `size` field was properly initialized, an error result of |
60 | //! `E2BIG` indicates that the `attrs` structure has requested behavior the |
61 | //! kernel is too old to support. |
62 | //! |
63 | //! When `E2BIG` is returned, the kernel writes the size it expected back to the |
64 | //! `size` field of the `attrs` struct. Again, if you want to retry the call, it |
65 | //! is not necessary to adjust the size you pass to match what the kernel passed |
66 | //! back. The size from the kernel just indicates which version of the API the |
67 | //! kernel supports; see the documentation for the `PERF_EVENT_ATTR_SIZE_VER...` |
68 | //! constants for details. |
69 | //! |
70 | //! ## Kernel versions |
71 | //! |
72 | //! The bindings in this crate are generated from the Linux kernel headers |
73 | //! packaged by Fedora as `kernel-headers-5.18.4-200.fc36`, which |
74 | //! corresponds to `PERF_EVENT_ATTR_SIZE_VER7`. |
75 | //! |
76 | //! As explained above, bugs aside, it is not necessary to use the version of |
77 | //! these structures that matches the kernel you want to run under, so it should |
78 | //! always be acceptable to use the latest version of this crate, even if you |
79 | //! want to support older kernels. |
80 | //! |
81 | //! This crate's `README.md` file includes instructions on regenerating the |
82 | //! bindings from newer kernel headers. However, this can be a breaking change |
83 | //! for users that have not followed the advice above, so regeneration should |
84 | //! cause a major version increment. |
85 | //! |
86 | //! If you need features that are available only in a more recent version of the |
87 | //! types than this crate provides, please file an issue. |
88 | //! |
89 | //! ## Linux API Backward/Forward Compatibility Strategy |
90 | //! |
91 | //! (This is more detail than necessary if you just want to use the crate. I |
92 | //! want to write this down somewhere so that I have something to refer to when |
93 | //! I forget the details.) |
94 | //! |
95 | //! It is an important principle of Linux kernel development that new versions |
96 | //! of the kernel should not break userspace. If upgrading your kernel breaks a |
97 | //! user program, then that's a bug in the kernel. (This refers to the run-time |
98 | //! interface. I don't know what the stability rules are for the kernel headers: |
99 | //! can new headers cause old code to fail to compile? Anyway, run time is our |
100 | //! concern here.) |
101 | //! |
102 | //! But when you have an open-ended, complex system call like `perf_event_open`, |
103 | //! it's really important for the interface to be able to evolve. Certainly, old |
104 | //! programs must run properly on new kernels, but ideally, it should work the |
105 | //! other way, too: a program built against a newer version of the kernel |
106 | //! headers should run on an older kernel, as long as it only requests features |
107 | //! the old kernel actually supports. That is, simply compiling against newer |
108 | //! headers should not be disqualifying - only using those new headers to |
109 | //! request new features the running kernel can't provide should cause an error. |
110 | //! |
111 | //! Consider the specific case of passing a struct like `perf_event_attr` to a |
112 | //! system call like `perf_event_open`. In general, there are two versions of |
113 | //! the struct in play: the version the user program was compiled against, and |
114 | //! the version the running kernel was compiled against. How can we let old |
115 | //! programs call `perf_event_open` on new kernels, and vice versa? |
116 | //! |
117 | //! Linux has a neat strategy for making this work. There are four rules: |
118 | //! |
119 | //! - Every system call that passes a struct to the kernel includes some |
120 | //! indication of how large userspace thinks that struct is. For |
121 | //! `perf_event_open`, it's the `size` field of the `perf_event_attr` |
122 | //! struct. For `ioctl`s that pass a struct, it's a bitfield of the |
123 | //! `request` value. |
124 | //! |
125 | //! - Fields are never deleted from structs. At most, newer kernel headers may |
126 | //! rename them to `__reserved_foo` or something like that, but once a field |
127 | //! has been placed, its layout in the struct never changes. |
128 | //! |
129 | //! - New fields are added to the end of structs. |
130 | //! |
131 | //! - New fields' semantics are chosen such that filling them with zeros |
132 | //! preserves the old behavior. That is, turning an old struct into a new |
133 | //! struct by extending it with zero bytes should always give you a new |
134 | //! struct with the same meaning as the old struct. |
135 | //! |
136 | //! Then, the kernel's strategy for receiving structs from userspace is as |
137 | //! follows (according to the comments for `copy_struct_from_user` in |
138 | //! the kernel source `include/linux/uaccess.h`): |
139 | //! |
140 | //! - If the kernel's struct is larger than the one passed from userspace, |
141 | //! then that means the kernel is newer than the userspace program. The |
142 | //! kernel copies the userspace data into the initial bytes of its own |
143 | //! struct, and zeros the remaining bytes. Since zeroed fields have no |
144 | //! effect, the resulting struct properly reflects the user's intent. |
145 | //! |
146 | //! - If the kernel's struct is smaller than the one passed from userspace, |
147 | //! then that means that a userspace program compiled against newer kernel |
148 | //! headers is running on an older kernel. The kernel checks that the excess |
149 | //! bytes in the userspace struct are all zero; if they are not, the system |
150 | //! call returns `E2BIG`, indicating that userspace has requested a feature |
151 | //! the kernel doesn't support. If they are all zero, then the kernel |
152 | //! initializes its own struct with the bytes from the start of the |
153 | //! userspace struct, and drops the rest. Since the dropped bytes were all |
154 | //! zero, they did not affect the requested behavior, and the resulting |
155 | //! struct reflects the user's intent. |
156 | //! |
157 | //! - In either case, the kernel verifies that any `__reserved_foo` fields in |
158 | //! its own version of the struct are zero. |
159 | //! |
160 | //! This covers both the old-on-new and new-on-old cases, and returns an error |
161 | //! only when the call requests functionality the kernel doesn't support. |
162 | //! |
163 | //! You can find one example of using `perf_event_open` in the [`perf_event`] |
164 | //! crate, which provides a safe interface to a subset of `perf_event_open`'s |
165 | //! functionality. |
166 | //! |
167 | //! [`bindings`]: bindings/index.html |
168 | //! [`ioctls`]: ioctls/index.html |
169 | //! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html |
170 | //! [`perf_event`]: https://crates.io/crates/perf_event |
171 | |
172 | pub mod bindings; |
173 | |
174 | use libc::pid_t; |
175 | use std::os::raw::{c_int, c_ulong}; |
176 | |
177 | /// The `perf_event_open` system call. |
178 | /// |
179 | /// See the [`perf_event_open(2) man page`][man] for details. |
180 | /// |
181 | /// On error, this returns a negated raw OS error value. The C `errno` value is |
182 | /// not changed. |
183 | /// |
184 | /// Note: The `attrs` argument needs to be a `*mut` because if the `size` field |
185 | /// is too small or too large, the kernel writes the size it was expecing back |
186 | /// into that field. It might do other things as well. |
187 | /// |
188 | /// # Safety |
189 | /// |
190 | /// The `attrs` argument must point to a properly initialized |
191 | /// `perf_event_attr` struct. The measurements and other behaviors its |
192 | /// contents request must be safe. |
193 | /// |
194 | /// [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html |
195 | pub unsafe fn perf_event_open( |
196 | attrs: *mut bindings::perf_event_attr, |
197 | pid: pid_t, |
198 | cpu: c_int, |
199 | group_fd: c_int, |
200 | flags: c_ulong, |
201 | ) -> c_int { |
202 | libc::syscall( |
203 | num:bindings::__NR_perf_event_open as libc::c_long, |
204 | attrs as *const bindings::perf_event_attr, |
205 | pid, |
206 | cpu, |
207 | group_fd, |
208 | flags, |
209 | ) as c_int |
210 | } |
211 | |
212 | #[allow (dead_code, non_snake_case)] |
213 | pub mod ioctls { |
214 | //! Ioctls for use with `perf_event_open` file descriptors. |
215 | //! |
216 | //! See the [`perf_event_open(2)`][man] man page for details. |
217 | //! |
218 | //! On error, these return `-1` and set the C `errno` value. |
219 | //! |
220 | //! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html |
221 | use crate::bindings::{self, perf_event_attr, perf_event_query_bpf}; |
222 | use std::os::raw::{c_char, c_int, c_uint, c_ulong}; |
223 | |
224 | macro_rules! define_ioctls { |
225 | ( $( $args:tt )* ) => { |
226 | $( |
227 | define_ioctl!($args); |
228 | )* |
229 | } |
230 | } |
231 | |
232 | macro_rules! define_ioctl { |
233 | ({ $name:ident, $ioctl:ident, $arg_type:ty }) => { |
234 | #[allow(clippy::missing_safety_doc)] |
235 | pub unsafe fn $name(fd: c_int, arg: $arg_type) -> c_int { |
236 | untyped_ioctl(fd, bindings::$ioctl, arg) |
237 | } |
238 | }; |
239 | } |
240 | |
241 | define_ioctls! { |
242 | { ENABLE, ENABLE, c_uint } |
243 | { DISABLE, DISABLE, c_uint } |
244 | { REFRESH, REFRESH, c_int } |
245 | { RESET, RESET, c_uint } |
246 | { PERIOD, PERIOD, u64 } |
247 | { SET_OUTPUT, SET_OUTPUT, c_int } |
248 | { SET_FILTER, SET_FILTER, *mut c_char } |
249 | { ID, ID, *mut u64 } |
250 | { SET_BPF, SET_BPF, u32 } |
251 | { PAUSE_OUTPUT, PAUSE_OUTPUT, u32 } |
252 | { QUERY_BPF, QUERY_BPF, *mut perf_event_query_bpf } |
253 | { MODIFY_ATTRIBUTES, MODIFY_ATTRIBUTES, *mut perf_event_attr } |
254 | } |
255 | |
256 | unsafe fn untyped_ioctl<A>(fd: c_int, ioctl: bindings::perf_event_ioctls, arg: A) -> c_int { |
257 | #[cfg (any(target_env = "musl" , target_os = "android" ))] |
258 | return libc::ioctl(fd, ioctl as c_int, arg); |
259 | |
260 | #[cfg (not(any(target_env = "musl" , target_os = "android" )))] |
261 | libc::ioctl(fd, ioctl as c_ulong, arg) |
262 | } |
263 | } |
264 | |