1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2020 Collabora Ltd. |
4 | * |
5 | * Benchmark and test syscall user dispatch |
6 | */ |
7 | |
8 | #define _GNU_SOURCE |
9 | #include <stdio.h> |
10 | #include <string.h> |
11 | #include <stdlib.h> |
12 | #include <signal.h> |
13 | #include <errno.h> |
14 | #include <time.h> |
15 | #include <sys/time.h> |
16 | #include <unistd.h> |
17 | #include <sys/sysinfo.h> |
18 | #include <sys/prctl.h> |
19 | #include <sys/syscall.h> |
20 | |
21 | #ifndef PR_SET_SYSCALL_USER_DISPATCH |
22 | # define PR_SET_SYSCALL_USER_DISPATCH 59 |
23 | # define PR_SYS_DISPATCH_OFF 0 |
24 | # define PR_SYS_DISPATCH_ON 1 |
25 | # define SYSCALL_DISPATCH_FILTER_ALLOW 0 |
26 | # define SYSCALL_DISPATCH_FILTER_BLOCK 1 |
27 | #endif |
28 | |
29 | #ifdef __NR_syscalls |
30 | # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ |
31 | #else |
32 | # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ |
33 | #endif |
34 | |
35 | /* |
36 | * To test returning from a sigsys with selector blocked, the test |
37 | * requires some per-architecture support (i.e. knowledge about the |
38 | * signal trampoline address). On i386, we know it is on the vdso, and |
39 | * a small trampoline is open-coded for x86_64. Other architectures |
40 | * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN |
41 | * out of the box, but don't enable them until they support syscall user |
42 | * dispatch. |
43 | */ |
44 | #if defined(__x86_64__) || defined(__i386__) |
45 | #define TEST_BLOCKED_RETURN |
46 | #endif |
47 | |
48 | #ifdef __x86_64__ |
49 | void* (syscall_dispatcher_start)(void); |
50 | void* (syscall_dispatcher_end)(void); |
51 | #else |
52 | unsigned long syscall_dispatcher_start = 0; |
53 | unsigned long syscall_dispatcher_end = 0; |
54 | #endif |
55 | |
56 | unsigned long trapped_call_count = 0; |
57 | unsigned long native_call_count = 0; |
58 | |
59 | char selector; |
60 | #define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) |
61 | #define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) |
62 | |
63 | #define CALIBRATION_STEP 100000 |
64 | #define CALIBRATE_TO_SECS 5 |
65 | int factor; |
66 | |
67 | static double one_sysinfo_step(void) |
68 | { |
69 | struct timespec t1, t2; |
70 | int i; |
71 | struct sysinfo info; |
72 | |
73 | clock_gettime(CLOCK_MONOTONIC, &t1); |
74 | for (i = 0; i < CALIBRATION_STEP; i++) |
75 | sysinfo(&info); |
76 | clock_gettime(CLOCK_MONOTONIC, &t2); |
77 | return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); |
78 | } |
79 | |
80 | static void calibrate_set(void) |
81 | { |
82 | double elapsed = 0; |
83 | |
84 | printf("Calibrating test set to last ~%d seconds...\n" , CALIBRATE_TO_SECS); |
85 | |
86 | while (elapsed < 1) { |
87 | elapsed += one_sysinfo_step(); |
88 | factor += CALIBRATE_TO_SECS; |
89 | } |
90 | |
91 | printf("test iterations = %d\n" , CALIBRATION_STEP * factor); |
92 | } |
93 | |
94 | static double perf_syscall(void) |
95 | { |
96 | unsigned int i; |
97 | double partial = 0; |
98 | |
99 | for (i = 0; i < factor; ++i) |
100 | partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); |
101 | return partial; |
102 | } |
103 | |
104 | static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) |
105 | { |
106 | char buf[1024]; |
107 | int len; |
108 | |
109 | SYSCALL_UNBLOCK; |
110 | |
111 | /* printf and friends are not signal-safe. */ |
112 | len = snprintf(buf, 1024, "Caught sys_%x\n" , info->si_syscall); |
113 | write(1, buf, len); |
114 | |
115 | if (info->si_syscall == MAGIC_SYSCALL_1) |
116 | trapped_call_count++; |
117 | else |
118 | native_call_count++; |
119 | |
120 | #ifdef TEST_BLOCKED_RETURN |
121 | SYSCALL_BLOCK; |
122 | #endif |
123 | |
124 | #ifdef __x86_64__ |
125 | __asm__ volatile("movq $0xf, %rax" ); |
126 | __asm__ volatile("leaveq" ); |
127 | __asm__ volatile("add $0x8, %rsp" ); |
128 | __asm__ volatile("syscall_dispatcher_start:" ); |
129 | __asm__ volatile("syscall" ); |
130 | __asm__ volatile("nop" ); /* Landing pad within dispatcher area */ |
131 | __asm__ volatile("syscall_dispatcher_end:" ); |
132 | #endif |
133 | |
134 | } |
135 | |
136 | int main(void) |
137 | { |
138 | struct sigaction act; |
139 | double time1, time2; |
140 | int ret; |
141 | sigset_t mask; |
142 | |
143 | memset(&act, 0, sizeof(act)); |
144 | sigemptyset(&mask); |
145 | |
146 | act.sa_sigaction = handle_sigsys; |
147 | act.sa_flags = SA_SIGINFO; |
148 | act.sa_mask = mask; |
149 | |
150 | calibrate_set(); |
151 | |
152 | time1 = perf_syscall(); |
153 | printf("Avg syscall time %.0lfns.\n" , time1 * 1.0e9); |
154 | |
155 | ret = sigaction(SIGSYS, &act, NULL); |
156 | if (ret) { |
157 | perror("Error sigaction:" ); |
158 | exit(-1); |
159 | } |
160 | |
161 | fprintf(stderr, "Enabling syscall trapping.\n" ); |
162 | |
163 | if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, |
164 | syscall_dispatcher_start, |
165 | (syscall_dispatcher_end - syscall_dispatcher_start + 1), |
166 | &selector)) { |
167 | perror("prctl failed\n" ); |
168 | exit(-1); |
169 | } |
170 | |
171 | SYSCALL_BLOCK; |
172 | syscall(MAGIC_SYSCALL_1); |
173 | |
174 | #ifdef TEST_BLOCKED_RETURN |
175 | if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { |
176 | fprintf(stderr, "Failed to return with selector blocked.\n" ); |
177 | exit(-1); |
178 | } |
179 | #endif |
180 | |
181 | SYSCALL_UNBLOCK; |
182 | |
183 | if (!trapped_call_count) { |
184 | fprintf(stderr, "syscall trapping does not work.\n" ); |
185 | exit(-1); |
186 | } |
187 | |
188 | time2 = perf_syscall(); |
189 | |
190 | if (native_call_count) { |
191 | perror("syscall trapping intercepted more syscalls than expected\n" ); |
192 | exit(-1); |
193 | } |
194 | |
195 | printf("trapped_call_count %lu, native_call_count %lu.\n" , |
196 | trapped_call_count, native_call_count); |
197 | printf("Avg syscall time %.0lfns.\n" , time2 * 1.0e9); |
198 | printf("Interception overhead: %.1lf%% (+%.0lfns).\n" , |
199 | 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); |
200 | return 0; |
201 | |
202 | } |
203 | |