1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * queue_stack_maps.c: BPF queue and stack maps |
4 | * |
5 | * Copyright (c) 2018 Politecnico di Torino |
6 | */ |
7 | #include <linux/bpf.h> |
8 | #include <linux/list.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/btf_ids.h> |
11 | #include "percpu_freelist.h" |
12 | |
13 | #define QUEUE_STACK_CREATE_FLAG_MASK \ |
14 | (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) |
15 | |
16 | struct bpf_queue_stack { |
17 | struct bpf_map map; |
18 | raw_spinlock_t lock; |
19 | u32 head, tail; |
20 | u32 size; /* max_entries + 1 */ |
21 | |
22 | char elements[] __aligned(8); |
23 | }; |
24 | |
25 | static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) |
26 | { |
27 | return container_of(map, struct bpf_queue_stack, map); |
28 | } |
29 | |
30 | static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) |
31 | { |
32 | return qs->head == qs->tail; |
33 | } |
34 | |
35 | static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) |
36 | { |
37 | u32 head = qs->head + 1; |
38 | |
39 | if (unlikely(head >= qs->size)) |
40 | head = 0; |
41 | |
42 | return head == qs->tail; |
43 | } |
44 | |
45 | /* Called from syscall */ |
46 | static int queue_stack_map_alloc_check(union bpf_attr *attr) |
47 | { |
48 | /* check sanity of attributes */ |
49 | if (attr->max_entries == 0 || attr->key_size != 0 || |
50 | attr->value_size == 0 || |
51 | attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK || |
52 | !bpf_map_flags_access_ok(access_flags: attr->map_flags)) |
53 | return -EINVAL; |
54 | |
55 | if (attr->value_size > KMALLOC_MAX_SIZE) |
56 | /* if value_size is bigger, the user space won't be able to |
57 | * access the elements. |
58 | */ |
59 | return -E2BIG; |
60 | |
61 | return 0; |
62 | } |
63 | |
64 | static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) |
65 | { |
66 | int numa_node = bpf_map_attr_numa_node(attr); |
67 | struct bpf_queue_stack *qs; |
68 | u64 size, queue_size; |
69 | |
70 | size = (u64) attr->max_entries + 1; |
71 | queue_size = sizeof(*qs) + size * attr->value_size; |
72 | |
73 | qs = bpf_map_area_alloc(size: queue_size, numa_node); |
74 | if (!qs) |
75 | return ERR_PTR(error: -ENOMEM); |
76 | |
77 | bpf_map_init_from_attr(map: &qs->map, attr); |
78 | |
79 | qs->size = size; |
80 | |
81 | raw_spin_lock_init(&qs->lock); |
82 | |
83 | return &qs->map; |
84 | } |
85 | |
86 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ |
87 | static void queue_stack_map_free(struct bpf_map *map) |
88 | { |
89 | struct bpf_queue_stack *qs = bpf_queue_stack(map); |
90 | |
91 | bpf_map_area_free(base: qs); |
92 | } |
93 | |
94 | static long __queue_map_get(struct bpf_map *map, void *value, bool delete) |
95 | { |
96 | struct bpf_queue_stack *qs = bpf_queue_stack(map); |
97 | unsigned long flags; |
98 | int err = 0; |
99 | void *ptr; |
100 | |
101 | if (in_nmi()) { |
102 | if (!raw_spin_trylock_irqsave(&qs->lock, flags)) |
103 | return -EBUSY; |
104 | } else { |
105 | raw_spin_lock_irqsave(&qs->lock, flags); |
106 | } |
107 | |
108 | if (queue_stack_map_is_empty(qs)) { |
109 | memset(value, 0, qs->map.value_size); |
110 | err = -ENOENT; |
111 | goto out; |
112 | } |
113 | |
114 | ptr = &qs->elements[qs->tail * qs->map.value_size]; |
115 | memcpy(value, ptr, qs->map.value_size); |
116 | |
117 | if (delete) { |
118 | if (unlikely(++qs->tail >= qs->size)) |
119 | qs->tail = 0; |
120 | } |
121 | |
122 | out: |
123 | raw_spin_unlock_irqrestore(&qs->lock, flags); |
124 | return err; |
125 | } |
126 | |
127 | |
128 | static long __stack_map_get(struct bpf_map *map, void *value, bool delete) |
129 | { |
130 | struct bpf_queue_stack *qs = bpf_queue_stack(map); |
131 | unsigned long flags; |
132 | int err = 0; |
133 | void *ptr; |
134 | u32 index; |
135 | |
136 | if (in_nmi()) { |
137 | if (!raw_spin_trylock_irqsave(&qs->lock, flags)) |
138 | return -EBUSY; |
139 | } else { |
140 | raw_spin_lock_irqsave(&qs->lock, flags); |
141 | } |
142 | |
143 | if (queue_stack_map_is_empty(qs)) { |
144 | memset(value, 0, qs->map.value_size); |
145 | err = -ENOENT; |
146 | goto out; |
147 | } |
148 | |
149 | index = qs->head - 1; |
150 | if (unlikely(index >= qs->size)) |
151 | index = qs->size - 1; |
152 | |
153 | ptr = &qs->elements[index * qs->map.value_size]; |
154 | memcpy(value, ptr, qs->map.value_size); |
155 | |
156 | if (delete) |
157 | qs->head = index; |
158 | |
159 | out: |
160 | raw_spin_unlock_irqrestore(&qs->lock, flags); |
161 | return err; |
162 | } |
163 | |
164 | /* Called from syscall or from eBPF program */ |
165 | static long queue_map_peek_elem(struct bpf_map *map, void *value) |
166 | { |
167 | return __queue_map_get(map, value, delete: false); |
168 | } |
169 | |
170 | /* Called from syscall or from eBPF program */ |
171 | static long stack_map_peek_elem(struct bpf_map *map, void *value) |
172 | { |
173 | return __stack_map_get(map, value, delete: false); |
174 | } |
175 | |
176 | /* Called from syscall or from eBPF program */ |
177 | static long queue_map_pop_elem(struct bpf_map *map, void *value) |
178 | { |
179 | return __queue_map_get(map, value, delete: true); |
180 | } |
181 | |
182 | /* Called from syscall or from eBPF program */ |
183 | static long stack_map_pop_elem(struct bpf_map *map, void *value) |
184 | { |
185 | return __stack_map_get(map, value, delete: true); |
186 | } |
187 | |
188 | /* Called from syscall or from eBPF program */ |
189 | static long queue_stack_map_push_elem(struct bpf_map *map, void *value, |
190 | u64 flags) |
191 | { |
192 | struct bpf_queue_stack *qs = bpf_queue_stack(map); |
193 | unsigned long irq_flags; |
194 | int err = 0; |
195 | void *dst; |
196 | |
197 | /* BPF_EXIST is used to force making room for a new element in case the |
198 | * map is full |
199 | */ |
200 | bool replace = (flags & BPF_EXIST); |
201 | |
202 | /* Check supported flags for queue and stack maps */ |
203 | if (flags & BPF_NOEXIST || flags > BPF_EXIST) |
204 | return -EINVAL; |
205 | |
206 | if (in_nmi()) { |
207 | if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) |
208 | return -EBUSY; |
209 | } else { |
210 | raw_spin_lock_irqsave(&qs->lock, irq_flags); |
211 | } |
212 | |
213 | if (queue_stack_map_is_full(qs)) { |
214 | if (!replace) { |
215 | err = -E2BIG; |
216 | goto out; |
217 | } |
218 | /* advance tail pointer to overwrite oldest element */ |
219 | if (unlikely(++qs->tail >= qs->size)) |
220 | qs->tail = 0; |
221 | } |
222 | |
223 | dst = &qs->elements[qs->head * qs->map.value_size]; |
224 | memcpy(dst, value, qs->map.value_size); |
225 | |
226 | if (unlikely(++qs->head >= qs->size)) |
227 | qs->head = 0; |
228 | |
229 | out: |
230 | raw_spin_unlock_irqrestore(&qs->lock, irq_flags); |
231 | return err; |
232 | } |
233 | |
234 | /* Called from syscall or from eBPF program */ |
235 | static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) |
236 | { |
237 | return NULL; |
238 | } |
239 | |
240 | /* Called from syscall or from eBPF program */ |
241 | static long queue_stack_map_update_elem(struct bpf_map *map, void *key, |
242 | void *value, u64 flags) |
243 | { |
244 | return -EINVAL; |
245 | } |
246 | |
247 | /* Called from syscall or from eBPF program */ |
248 | static long queue_stack_map_delete_elem(struct bpf_map *map, void *key) |
249 | { |
250 | return -EINVAL; |
251 | } |
252 | |
253 | /* Called from syscall */ |
254 | static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, |
255 | void *next_key) |
256 | { |
257 | return -EINVAL; |
258 | } |
259 | |
260 | static u64 queue_stack_map_mem_usage(const struct bpf_map *map) |
261 | { |
262 | u64 usage = sizeof(struct bpf_queue_stack); |
263 | |
264 | usage += ((u64)map->max_entries + 1) * map->value_size; |
265 | return usage; |
266 | } |
267 | |
268 | BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack) |
269 | const struct bpf_map_ops queue_map_ops = { |
270 | .map_meta_equal = bpf_map_meta_equal, |
271 | .map_alloc_check = queue_stack_map_alloc_check, |
272 | .map_alloc = queue_stack_map_alloc, |
273 | .map_free = queue_stack_map_free, |
274 | .map_lookup_elem = queue_stack_map_lookup_elem, |
275 | .map_update_elem = queue_stack_map_update_elem, |
276 | .map_delete_elem = queue_stack_map_delete_elem, |
277 | .map_push_elem = queue_stack_map_push_elem, |
278 | .map_pop_elem = queue_map_pop_elem, |
279 | .map_peek_elem = queue_map_peek_elem, |
280 | .map_get_next_key = queue_stack_map_get_next_key, |
281 | .map_mem_usage = queue_stack_map_mem_usage, |
282 | .map_btf_id = &queue_map_btf_ids[0], |
283 | }; |
284 | |
285 | const struct bpf_map_ops stack_map_ops = { |
286 | .map_meta_equal = bpf_map_meta_equal, |
287 | .map_alloc_check = queue_stack_map_alloc_check, |
288 | .map_alloc = queue_stack_map_alloc, |
289 | .map_free = queue_stack_map_free, |
290 | .map_lookup_elem = queue_stack_map_lookup_elem, |
291 | .map_update_elem = queue_stack_map_update_elem, |
292 | .map_delete_elem = queue_stack_map_delete_elem, |
293 | .map_push_elem = queue_stack_map_push_elem, |
294 | .map_pop_elem = stack_map_pop_elem, |
295 | .map_peek_elem = stack_map_peek_elem, |
296 | .map_get_next_key = queue_stack_map_get_next_key, |
297 | .map_mem_usage = queue_stack_map_mem_usage, |
298 | .map_btf_id = &queue_map_btf_ids[0], |
299 | }; |
300 | |