1 | // SPDX-License-Identifier: LGPL-2.1 |
2 | /* |
3 | * rseq.c |
4 | * |
5 | * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; only |
10 | * version 2.1 of the License. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | */ |
17 | |
18 | #define _GNU_SOURCE |
19 | #include <errno.h> |
20 | #include <sched.h> |
21 | #include <stdio.h> |
22 | #include <stdlib.h> |
23 | #include <string.h> |
24 | #include <unistd.h> |
25 | #include <syscall.h> |
26 | #include <assert.h> |
27 | #include <signal.h> |
28 | #include <limits.h> |
29 | #include <dlfcn.h> |
30 | #include <stddef.h> |
31 | #include <sys/auxv.h> |
32 | #include <linux/auxvec.h> |
33 | |
34 | #include <linux/compiler.h> |
35 | |
36 | #include "../kselftest.h" |
37 | #include "rseq.h" |
38 | |
39 | /* |
40 | * Define weak versions to play nice with binaries that are statically linked |
41 | * against a libc that doesn't support registering its own rseq. |
42 | */ |
43 | __weak ptrdiff_t __rseq_offset; |
44 | __weak unsigned int __rseq_size; |
45 | __weak unsigned int __rseq_flags; |
46 | |
47 | static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; |
48 | static const unsigned int *libc_rseq_size_p = &__rseq_size; |
49 | static const unsigned int *libc_rseq_flags_p = &__rseq_flags; |
50 | |
51 | /* Offset from the thread pointer to the rseq area. */ |
52 | ptrdiff_t rseq_offset; |
53 | |
54 | /* |
55 | * Size of the registered rseq area. 0 if the registration was |
56 | * unsuccessful. |
57 | */ |
58 | unsigned int rseq_size = -1U; |
59 | |
60 | /* Flags used during rseq registration. */ |
61 | unsigned int rseq_flags; |
62 | |
63 | /* |
64 | * rseq feature size supported by the kernel. 0 if the registration was |
65 | * unsuccessful. |
66 | */ |
67 | unsigned int rseq_feature_size = -1U; |
68 | |
69 | static int rseq_ownership; |
70 | static int rseq_reg_success; /* At least one rseq registration has succeded. */ |
71 | |
72 | /* Allocate a large area for the TLS. */ |
73 | #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 |
74 | |
75 | /* Original struct rseq feature size is 20 bytes. */ |
76 | #define ORIG_RSEQ_FEATURE_SIZE 20 |
77 | |
78 | /* Original struct rseq allocation size is 32 bytes. */ |
79 | #define ORIG_RSEQ_ALLOC_SIZE 32 |
80 | |
81 | static |
82 | __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec" ), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { |
83 | .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, |
84 | }; |
85 | |
86 | static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, |
87 | int flags, uint32_t sig) |
88 | { |
89 | return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); |
90 | } |
91 | |
92 | static int sys_getcpu(unsigned *cpu, unsigned *node) |
93 | { |
94 | return syscall(__NR_getcpu, cpu, node, NULL); |
95 | } |
96 | |
97 | int rseq_available(void) |
98 | { |
99 | int rc; |
100 | |
101 | rc = sys_rseq(NULL, rseq_len: 0, flags: 0, sig: 0); |
102 | if (rc != -1) |
103 | abort(); |
104 | switch (errno) { |
105 | case ENOSYS: |
106 | return 0; |
107 | case EINVAL: |
108 | return 1; |
109 | default: |
110 | abort(); |
111 | } |
112 | } |
113 | |
114 | int rseq_register_current_thread(void) |
115 | { |
116 | int rc; |
117 | |
118 | if (!rseq_ownership) { |
119 | /* Treat libc's ownership as a successful registration. */ |
120 | return 0; |
121 | } |
122 | rc = sys_rseq(rseq_abi: &__rseq_abi, rseq_len: rseq_size, flags: 0, RSEQ_SIG); |
123 | if (rc) { |
124 | if (RSEQ_READ_ONCE(rseq_reg_success)) { |
125 | /* Incoherent success/failure within process. */ |
126 | abort(); |
127 | } |
128 | return -1; |
129 | } |
130 | assert(rseq_current_cpu_raw() >= 0); |
131 | RSEQ_WRITE_ONCE(rseq_reg_success, 1); |
132 | return 0; |
133 | } |
134 | |
135 | int rseq_unregister_current_thread(void) |
136 | { |
137 | int rc; |
138 | |
139 | if (!rseq_ownership) { |
140 | /* Treat libc's ownership as a successful unregistration. */ |
141 | return 0; |
142 | } |
143 | rc = sys_rseq(rseq_abi: &__rseq_abi, rseq_len: rseq_size, flags: RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); |
144 | if (rc) |
145 | return -1; |
146 | return 0; |
147 | } |
148 | |
149 | static |
150 | unsigned int get_rseq_feature_size(void) |
151 | { |
152 | unsigned long auxv_rseq_feature_size, auxv_rseq_align; |
153 | |
154 | auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); |
155 | assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); |
156 | |
157 | auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); |
158 | assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); |
159 | if (auxv_rseq_feature_size) |
160 | return auxv_rseq_feature_size; |
161 | else |
162 | return ORIG_RSEQ_FEATURE_SIZE; |
163 | } |
164 | |
165 | static __attribute__((constructor)) |
166 | void rseq_init(void) |
167 | { |
168 | /* |
169 | * If the libc's registered rseq size isn't already valid, it may be |
170 | * because the binary is dynamically linked and not necessarily due to |
171 | * libc not having registered a restartable sequence. Try to find the |
172 | * symbols if that's the case. |
173 | */ |
174 | if (!*libc_rseq_size_p) { |
175 | libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset" ); |
176 | libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size" ); |
177 | libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags" ); |
178 | } |
179 | if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && |
180 | *libc_rseq_size_p != 0) { |
181 | /* rseq registration owned by glibc */ |
182 | rseq_offset = *libc_rseq_offset_p; |
183 | rseq_size = *libc_rseq_size_p; |
184 | rseq_flags = *libc_rseq_flags_p; |
185 | rseq_feature_size = get_rseq_feature_size(); |
186 | if (rseq_feature_size > rseq_size) |
187 | rseq_feature_size = rseq_size; |
188 | return; |
189 | } |
190 | rseq_ownership = 1; |
191 | if (!rseq_available()) { |
192 | rseq_size = 0; |
193 | rseq_feature_size = 0; |
194 | return; |
195 | } |
196 | rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); |
197 | rseq_flags = 0; |
198 | rseq_feature_size = get_rseq_feature_size(); |
199 | if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) |
200 | rseq_size = ORIG_RSEQ_ALLOC_SIZE; |
201 | else |
202 | rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; |
203 | } |
204 | |
205 | static __attribute__((destructor)) |
206 | void rseq_exit(void) |
207 | { |
208 | if (!rseq_ownership) |
209 | return; |
210 | rseq_offset = 0; |
211 | rseq_size = -1U; |
212 | rseq_feature_size = -1U; |
213 | rseq_ownership = 0; |
214 | } |
215 | |
216 | int32_t rseq_fallback_current_cpu(void) |
217 | { |
218 | int32_t cpu; |
219 | |
220 | cpu = sched_getcpu(); |
221 | if (cpu < 0) { |
222 | perror("sched_getcpu()" ); |
223 | abort(); |
224 | } |
225 | return cpu; |
226 | } |
227 | |
228 | int32_t rseq_fallback_current_node(void) |
229 | { |
230 | uint32_t cpu_id, node_id; |
231 | int ret; |
232 | |
233 | ret = sys_getcpu(cpu: &cpu_id, node: &node_id); |
234 | if (ret) { |
235 | perror("sys_getcpu()" ); |
236 | return ret; |
237 | } |
238 | return (int32_t) node_id; |
239 | } |
240 | |