1 | #include "cpuid.h" |
2 | #include "sanitizer_common/sanitizer_common.h" |
3 | #if !SANITIZER_FUCHSIA |
4 | #include "sanitizer_common/sanitizer_posix.h" |
5 | #endif |
6 | #include "xray_defs.h" |
7 | #include "xray_interface_internal.h" |
8 | |
9 | #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE |
10 | #include <sys/types.h> |
11 | #include <sys/sysctl.h> |
12 | #elif SANITIZER_FUCHSIA |
13 | #include <zircon/syscalls.h> |
14 | #endif |
15 | |
16 | #include <atomic> |
17 | #include <cstdint> |
18 | #include <errno.h> |
19 | #include <fcntl.h> |
20 | #include <iterator> |
21 | #include <limits> |
22 | #include <tuple> |
23 | #include <unistd.h> |
24 | |
25 | namespace __xray { |
26 | |
27 | #if SANITIZER_LINUX |
28 | static std::pair<ssize_t, bool> |
29 | retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { |
30 | auto BytesToRead = std::distance(first: Begin, last: End); |
31 | ssize_t BytesRead; |
32 | ssize_t TotalBytesRead = 0; |
33 | while (BytesToRead && (BytesRead = read(fd: Fd, buf: Begin, nbytes: BytesToRead))) { |
34 | if (BytesRead == -1) { |
35 | if (errno == EINTR) |
36 | continue; |
37 | Report(format: "Read error; errno = %d\n" , errno); |
38 | return std::make_pair(x&: TotalBytesRead, y: false); |
39 | } |
40 | |
41 | TotalBytesRead += BytesRead; |
42 | BytesToRead -= BytesRead; |
43 | Begin += BytesRead; |
44 | } |
45 | return std::make_pair(x&: TotalBytesRead, y: true); |
46 | } |
47 | |
48 | static bool readValueFromFile(const char *Filename, |
49 | long long *Value) XRAY_NEVER_INSTRUMENT { |
50 | int Fd = open(file: Filename, O_RDONLY | O_CLOEXEC); |
51 | if (Fd == -1) |
52 | return false; |
53 | static constexpr size_t BufSize = 256; |
54 | char Line[BufSize] = {}; |
55 | ssize_t BytesRead; |
56 | bool Success; |
57 | std::tie(args&: BytesRead, args&: Success) = retryingReadSome(Fd, Begin: Line, End: Line + BufSize); |
58 | close(fd: Fd); |
59 | if (!Success) |
60 | return false; |
61 | const char *End = nullptr; |
62 | long long Tmp = internal_simple_strtoll(nptr: Line, endptr: &End, base: 10); |
63 | bool Result = false; |
64 | if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { |
65 | *Value = Tmp; |
66 | Result = true; |
67 | } |
68 | return Result; |
69 | } |
70 | |
71 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
72 | long long TSCFrequency = -1; |
73 | if (readValueFromFile(Filename: "/sys/devices/system/cpu/cpu0/tsc_freq_khz" , |
74 | Value: &TSCFrequency)) { |
75 | TSCFrequency *= 1000; |
76 | } else if (readValueFromFile( |
77 | Filename: "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" , |
78 | Value: &TSCFrequency)) { |
79 | TSCFrequency *= 1000; |
80 | } else { |
81 | Report(format: "Unable to determine CPU frequency for TSC accounting.\n" ); |
82 | } |
83 | return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); |
84 | } |
85 | #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE |
86 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
87 | long long TSCFrequency = -1; |
88 | size_t tscfreqsz = sizeof(TSCFrequency); |
89 | #if SANITIZER_APPLE |
90 | if (internal_sysctlbyname("machdep.tsc.frequency" , &TSCFrequency, |
91 | &tscfreqsz, NULL, 0) != -1) { |
92 | |
93 | #else |
94 | if (internal_sysctlbyname("machdep.tsc_freq" , &TSCFrequency, &tscfreqsz, |
95 | NULL, 0) != -1) { |
96 | #endif |
97 | return static_cast<uint64_t>(TSCFrequency); |
98 | } else { |
99 | Report("Unable to determine CPU frequency for TSC accounting.\n" ); |
100 | } |
101 | |
102 | return 0; |
103 | } |
104 | #elif !SANITIZER_FUCHSIA |
105 | uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { |
106 | /* Not supported */ |
107 | return 0; |
108 | } |
109 | #endif |
110 | |
111 | static constexpr uint8_t CallOpCode = 0xe8; |
112 | static constexpr uint16_t MovR10Seq = 0xba41; |
113 | static constexpr uint16_t Jmp9Seq = 0x09eb; |
114 | static constexpr uint16_t Jmp20Seq = 0x14eb; |
115 | static constexpr uint16_t Jmp15Seq = 0x0feb; |
116 | static constexpr uint8_t JmpOpCode = 0xe9; |
117 | static constexpr uint8_t RetOpCode = 0xc3; |
118 | static constexpr uint16_t NopwSeq = 0x9066; |
119 | |
120 | static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; |
121 | static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; |
122 | |
123 | bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, |
124 | const XRaySledEntry &Sled, |
125 | void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { |
126 | // Here we do the dance of replacing the following sled: |
127 | // |
128 | // xray_sled_n: |
129 | // jmp +9 |
130 | // <9 byte nop> |
131 | // |
132 | // With the following: |
133 | // |
134 | // mov r10d, <function id> |
135 | // call <relative 32bit offset to entry trampoline> |
136 | // |
137 | // We need to do this in the following order: |
138 | // |
139 | // 1. Put the function id first, 2 bytes from the start of the sled (just |
140 | // after the 2-byte jmp instruction). |
141 | // 2. Put the call opcode 6 bytes from the start of the sled. |
142 | // 3. Put the relative offset 7 bytes from the start of the sled. |
143 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" |
144 | // opcode and first operand. |
145 | // |
146 | // Prerequisite is to compute the relative offset to the trampoline's address. |
147 | const uint64_t Address = Sled.address(); |
148 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - |
149 | (static_cast<int64_t>(Address) + 11); |
150 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
151 | Report(format: "XRay Entry trampoline (%p) too far from sled (%p)\n" , |
152 | reinterpret_cast<void *>(Trampoline), |
153 | reinterpret_cast<void *>(Address)); |
154 | return false; |
155 | } |
156 | if (Enable) { |
157 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
158 | *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; |
159 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
160 | std::atomic_store_explicit( |
161 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
162 | m: std::memory_order_release); |
163 | } else { |
164 | std::atomic_store_explicit( |
165 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq, |
166 | m: std::memory_order_release); |
167 | // FIXME: Write out the nops still? |
168 | } |
169 | return true; |
170 | } |
171 | |
172 | bool patchFunctionExit(const bool Enable, const uint32_t FuncId, |
173 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
174 | // Here we do the dance of replacing the following sled: |
175 | // |
176 | // xray_sled_n: |
177 | // ret |
178 | // <10 byte nop> |
179 | // |
180 | // With the following: |
181 | // |
182 | // mov r10d, <function id> |
183 | // jmp <relative 32bit offset to exit trampoline> |
184 | // |
185 | // 1. Put the function id first, 2 bytes from the start of the sled (just |
186 | // after the 1-byte ret instruction). |
187 | // 2. Put the jmp opcode 6 bytes from the start of the sled. |
188 | // 3. Put the relative offset 7 bytes from the start of the sled. |
189 | // 4. Do an atomic write over the jmp instruction for the "mov r10d" |
190 | // opcode and first operand. |
191 | // |
192 | // Prerequisite is to compute the relative offset fo the |
193 | // __xray_FunctionExit function's address. |
194 | const uint64_t Address = Sled.address(); |
195 | int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - |
196 | (static_cast<int64_t>(Address) + 11); |
197 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
198 | Report(format: "XRay Exit trampoline (%p) too far from sled (%p)\n" , |
199 | reinterpret_cast<void *>(__xray_FunctionExit), |
200 | reinterpret_cast<void *>(Address)); |
201 | return false; |
202 | } |
203 | if (Enable) { |
204 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
205 | *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; |
206 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
207 | std::atomic_store_explicit( |
208 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
209 | m: std::memory_order_release); |
210 | } else { |
211 | std::atomic_store_explicit( |
212 | a: reinterpret_cast<std::atomic<uint8_t> *>(Address), i: RetOpCode, |
213 | m: std::memory_order_release); |
214 | // FIXME: Write out the nops still? |
215 | } |
216 | return true; |
217 | } |
218 | |
219 | bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, |
220 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
221 | // Here we do the dance of replacing the tail call sled with a similar |
222 | // sequence as the entry sled, but calls the tail exit sled instead. |
223 | const uint64_t Address = Sled.address(); |
224 | int64_t TrampolineOffset = |
225 | reinterpret_cast<int64_t>(__xray_FunctionTailExit) - |
226 | (static_cast<int64_t>(Address) + 11); |
227 | if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { |
228 | Report(format: "XRay Tail Exit trampoline (%p) too far from sled (%p)\n" , |
229 | reinterpret_cast<void *>(__xray_FunctionTailExit), |
230 | reinterpret_cast<void *>(Address)); |
231 | return false; |
232 | } |
233 | if (Enable) { |
234 | *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; |
235 | *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; |
236 | *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; |
237 | std::atomic_store_explicit( |
238 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: MovR10Seq, |
239 | m: std::memory_order_release); |
240 | } else { |
241 | std::atomic_store_explicit( |
242 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp9Seq, |
243 | m: std::memory_order_release); |
244 | // FIXME: Write out the nops still? |
245 | } |
246 | return true; |
247 | } |
248 | |
249 | bool patchCustomEvent(const bool Enable, const uint32_t FuncId, |
250 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
251 | // Here we do the dance of replacing the following sled: |
252 | // |
253 | // xray_sled_n: |
254 | // jmp +15 // 2 bytes |
255 | // ... |
256 | // |
257 | // With the following: |
258 | // |
259 | // nopw // 2 bytes* |
260 | // ... |
261 | // |
262 | // |
263 | // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'. |
264 | const uint64_t Address = Sled.address(); |
265 | if (Enable) { |
266 | std::atomic_store_explicit( |
267 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq, |
268 | m: std::memory_order_release); |
269 | } else { |
270 | std::atomic_store_explicit( |
271 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp15Seq, |
272 | m: std::memory_order_release); |
273 | } |
274 | return false; |
275 | } |
276 | |
277 | bool patchTypedEvent(const bool Enable, const uint32_t FuncId, |
278 | const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { |
279 | // Here we do the dance of replacing the following sled: |
280 | // |
281 | // xray_sled_n: |
282 | // jmp +20 // 2 byte instruction |
283 | // ... |
284 | // |
285 | // With the following: |
286 | // |
287 | // nopw // 2 bytes |
288 | // ... |
289 | // |
290 | // |
291 | // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. |
292 | // The 20 byte sled stashes three argument registers, calls the trampoline, |
293 | // unstashes the registers and returns. If the arguments are already in |
294 | // the correct registers, the stashing and unstashing become equivalently |
295 | // sized nops. |
296 | const uint64_t Address = Sled.address(); |
297 | if (Enable) { |
298 | std::atomic_store_explicit( |
299 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: NopwSeq, |
300 | m: std::memory_order_release); |
301 | } else { |
302 | std::atomic_store_explicit( |
303 | a: reinterpret_cast<std::atomic<uint16_t> *>(Address), i: Jmp20Seq, |
304 | m: std::memory_order_release); |
305 | } |
306 | return false; |
307 | } |
308 | |
309 | #if !SANITIZER_FUCHSIA |
310 | // We determine whether the CPU we're running on has the correct features we |
311 | // need. In x86_64 this will be rdtscp support. |
312 | bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { |
313 | unsigned int EAX, EBX, ECX, EDX; |
314 | |
315 | // We check whether rdtscp support is enabled. According to the x86_64 manual, |
316 | // level should be set at 0x80000001, and we should have a look at bit 27 in |
317 | // EDX. That's 0x8000000 (or 1u << 27). |
318 | __asm__ __volatile__("cpuid" : "=a" (EAX), "=b" (EBX), "=c" (ECX), "=d" (EDX) |
319 | : "0" (0x80000001)); |
320 | if (!(EDX & (1u << 27))) { |
321 | Report(format: "Missing rdtscp support.\n" ); |
322 | return false; |
323 | } |
324 | // Also check whether we can determine the CPU frequency, since if we cannot, |
325 | // we should use the emulated TSC instead. |
326 | if (!getTSCFrequency()) { |
327 | Report(format: "Unable to determine CPU frequency.\n" ); |
328 | return false; |
329 | } |
330 | return true; |
331 | } |
332 | #endif |
333 | |
334 | } // namespace __xray |
335 | |