1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4 */
5
6#include <linux/array_size.h>
7#include <linux/minmax.h>
8#include <vdso/datapage.h>
9#include <vdso/getrandom.h>
10#include <vdso/unaligned.h>
11#include <asm/vdso/getrandom.h>
12#include <uapi/linux/mman.h>
13#include <uapi/linux/random.h>
14
15/* Bring in default accessors */
16#include <vdso/vsyscall.h>
17
18#undef PAGE_SIZE
19#undef PAGE_MASK
20#define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT)
21#define PAGE_MASK (~(PAGE_SIZE - 1))
22
23#define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do { \
24 while (len >= sizeof(type)) { \
25 __put_unaligned_t(type, __get_unaligned_t(type, src), dst); \
26 __put_unaligned_t(type, 0, src); \
27 dst += sizeof(type); \
28 src += sizeof(type); \
29 len -= sizeof(type); \
30 } \
31} while (0)
32
33static void memcpy_and_zero_src(void *dst, void *src, size_t len)
34{
35 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
36 if (IS_ENABLED(CONFIG_64BIT))
37 MEMCPY_AND_ZERO_SRC(u64, dst, src, len);
38 MEMCPY_AND_ZERO_SRC(u32, dst, src, len);
39 MEMCPY_AND_ZERO_SRC(u16, dst, src, len);
40 }
41 MEMCPY_AND_ZERO_SRC(u8, dst, src, len);
42}
43
44/**
45 * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall.
46 * @rng_info: Describes state of kernel RNG, memory shared with kernel.
47 * @buffer: Destination buffer to fill with random bytes.
48 * @len: Size of @buffer in bytes.
49 * @flags: Zero or more GRND_* flags.
50 * @opaque_state: Pointer to an opaque state area.
51 * @opaque_len: Length of opaque state area.
52 *
53 * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's
54 * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same
55 * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always
56 * calls into the syscall.
57 *
58 * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated
59 * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0,
60 * this function should not be used.
61 *
62 * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields
63 * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external
64 * locking is used, one state must be allocated per thread, as it is not safe to call this function
65 * concurrently with the same @opaque_state. However, it is safe to call this using the same
66 * @opaque_state that is shared between main code and signal handling code, within the same thread.
67 *
68 * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
69 */
70static __always_inline ssize_t
71__cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len,
72 unsigned int flags, void *opaque_state, size_t opaque_len)
73{
74 ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len);
75 struct vgetrandom_state *state = opaque_state;
76 size_t batch_len, nblocks, orig_len = len;
77 bool in_use, have_retried = false;
78 void *orig_buffer = buffer;
79 u64 current_generation;
80 u32 counter[2] = { 0 };
81
82 if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) {
83 struct vgetrandom_opaque_params *params = opaque_state;
84 params->size_of_opaque_state = sizeof(*state);
85 params->mmap_prot = PROT_READ | PROT_WRITE;
86 params->mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS;
87 for (size_t i = 0; i < ARRAY_SIZE(params->reserved); ++i)
88 params->reserved[i] = 0;
89 return 0;
90 }
91
92 /* The state must not straddle a page, since pages can be zeroed at any time. */
93 if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE))
94 return -EFAULT;
95
96 /* Handle unexpected flags by falling back to the kernel. */
97 if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)))
98 goto fallback_syscall;
99
100 /* If the caller passes the wrong size, which might happen due to CRIU, fallback. */
101 if (unlikely(opaque_len != sizeof(*state)))
102 goto fallback_syscall;
103
104 /*
105 * If the kernel's RNG is not yet ready, then it's not possible to provide random bytes from
106 * userspace, because A) the various @flags require this to block, or not, depending on
107 * various factors unavailable to userspace, and B) the kernel's behavior before the RNG is
108 * ready is to reseed from the entropy pool at every invocation.
109 */
110 if (unlikely(!READ_ONCE(rng_info->is_ready)))
111 goto fallback_syscall;
112
113 /*
114 * This condition is checked after @rng_info->is_ready, because before the kernel's RNG is
115 * initialized, the @flags parameter may require this to block or return an error, even when
116 * len is zero.
117 */
118 if (unlikely(!len))
119 return 0;
120
121 /*
122 * @state->in_use is basic reentrancy protection against this running in a signal handler
123 * with the same @opaque_state, but obviously not atomic wrt multiple CPUs or more than one
124 * level of reentrancy. If a signal interrupts this after reading @state->in_use, but before
125 * writing @state->in_use, there is still no race, because the signal handler will run to
126 * its completion before returning execution.
127 */
128 in_use = READ_ONCE(state->in_use);
129 if (unlikely(in_use))
130 /* The syscall simply fills the buffer and does not touch @state, so fallback. */
131 goto fallback_syscall;
132 WRITE_ONCE(state->in_use, true);
133
134retry_generation:
135 /*
136 * @rng_info->generation must always be read here, as it serializes @state->key with the
137 * kernel's RNG reseeding schedule.
138 */
139 current_generation = READ_ONCE(rng_info->generation);
140
141 /*
142 * If @state->generation doesn't match the kernel RNG's generation, then it means the
143 * kernel's RNG has reseeded, and so @state->key is reseeded as well.
144 */
145 if (unlikely(state->generation != current_generation)) {
146 /*
147 * Write the generation before filling the key, in case of fork. If there is a fork
148 * just after this line, the parent and child will get different random bytes from
149 * the syscall, which is good. However, were this line to occur after the getrandom
150 * syscall, then both child and parent could have the same bytes and the same
151 * generation counter, so the fork would not be detected. Therefore, write
152 * @state->generation before the call to the getrandom syscall.
153 */
154 WRITE_ONCE(state->generation, current_generation);
155
156 /*
157 * Prevent the syscall from being reordered wrt current_generation. Pairs with the
158 * smp_store_release(&vdso_k_rng_data->generation) in random.c.
159 */
160 smp_rmb();
161
162 /* Reseed @state->key using fresh bytes from the kernel. */
163 if (getrandom_syscall(buffer: state->key, len: sizeof(state->key), flags: 0) != sizeof(state->key)) {
164 /*
165 * If the syscall failed to refresh the key, then @state->key is now
166 * invalid, so invalidate the generation so that it is not used again, and
167 * fallback to using the syscall entirely.
168 */
169 WRITE_ONCE(state->generation, 0);
170
171 /*
172 * Set @state->in_use to false only after the last write to @state in the
173 * line above.
174 */
175 WRITE_ONCE(state->in_use, false);
176
177 goto fallback_syscall;
178 }
179
180 /*
181 * Set @state->pos to beyond the end of the batch, so that the batch is refilled
182 * using the new key.
183 */
184 state->pos = sizeof(state->batch);
185 }
186
187 /* Set len to the total amount of bytes that this function is allowed to read, ret. */
188 len = ret;
189more_batch:
190 /*
191 * First use bytes out of @state->batch, which may have been filled by the last call to this
192 * function.
193 */
194 batch_len = min_t(size_t, sizeof(state->batch) - state->pos, len);
195 if (batch_len) {
196 /* Zeroing at the same time as memcpying helps preserve forward secrecy. */
197 memcpy_and_zero_src(dst: buffer, src: state->batch + state->pos, len: batch_len);
198 state->pos += batch_len;
199 buffer += batch_len;
200 len -= batch_len;
201 }
202
203 if (!len) {
204 /* Prevent the loop from being reordered wrt ->generation. */
205 barrier();
206
207 /*
208 * Since @rng_info->generation will never be 0, re-read @state->generation, rather
209 * than using the local current_generation variable, to learn whether a fork
210 * occurred or if @state was zeroed due to memory pressure. Primarily, though, this
211 * indicates whether the kernel's RNG has reseeded, in which case generate a new key
212 * and start over.
213 */
214 if (unlikely(READ_ONCE(state->generation) != READ_ONCE(rng_info->generation))) {
215 /*
216 * Prevent this from looping forever in case of low memory or racing with a
217 * user force-reseeding the kernel's RNG using the ioctl.
218 */
219 if (have_retried) {
220 WRITE_ONCE(state->in_use, false);
221 goto fallback_syscall;
222 }
223
224 have_retried = true;
225 buffer = orig_buffer;
226 goto retry_generation;
227 }
228
229 /*
230 * Set @state->in_use to false only when there will be no more reads or writes of
231 * @state.
232 */
233 WRITE_ONCE(state->in_use, false);
234 return ret;
235 }
236
237 /* Generate blocks of RNG output directly into @buffer while there's enough room left. */
238 nblocks = len / CHACHA_BLOCK_SIZE;
239 if (nblocks) {
240 __arch_chacha20_blocks_nostack(dst_bytes: buffer, key: state->key, counter, nblocks);
241 buffer += nblocks * CHACHA_BLOCK_SIZE;
242 len -= nblocks * CHACHA_BLOCK_SIZE;
243 }
244
245 BUILD_BUG_ON(sizeof(state->batch_key) % CHACHA_BLOCK_SIZE != 0);
246
247 /* Refill the batch and overwrite the key, in order to preserve forward secrecy. */
248 __arch_chacha20_blocks_nostack(dst_bytes: state->batch_key, key: state->key, counter,
249 nblocks: sizeof(state->batch_key) / CHACHA_BLOCK_SIZE);
250
251 /* Since the batch was just refilled, set the position back to 0 to indicate a full batch. */
252 state->pos = 0;
253 goto more_batch;
254
255fallback_syscall:
256 return getrandom_syscall(buffer: orig_buffer, len: orig_len, flags);
257}
258
259static __always_inline ssize_t
260__cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
261{
262 return __cvdso_getrandom_data(rng_info: __arch_get_vdso_u_rng_data(), buffer, len, flags,
263 opaque_state, opaque_len);
264}
265

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/lib/vdso/getrandom.c